e1b00de370214e5412cae448f27775d368071c1d
[musl] / src / locale / iconv.c
1 #include <iconv.h>
2 #include <errno.h>
3 #include <wchar.h>
4 #include <string.h>
5 #include <stdlib.h>
6 #include <limits.h>
7 #include <stdint.h>
8
9 #define UTF_32BE    000
10 #define UTF_16LE    001
11 #define UTF_16BE    002
12 #define UTF_32LE    003
13 #define UCS2BE      004
14 #define UCS2LE      005
15 #define WCHAR_T     007
16
17 #define US_ASCII    021
18 #define UTF_8       022
19 #define LATIN_9     024
20 #define TIS_620     025
21 #define JIS_0201    026
22
23 #define EUC         031
24 #define EUC_TW      032
25 #define SHIFT_JIS   033
26 #define BIG5        034
27 #define GBK         035
28
29 /* FIXME: these are not implemented yet
30  * EUC:   A1-FE A1-FE
31  * GBK:   81-FE 40-7E,80-FE
32  * Big5:  A1-FE 40-7E,A1-FE
33  */
34
35 /* Definitions of charmaps. Each charmap consists of:
36  * 1. Empty-string-terminated list of null-terminated aliases.
37  * 2. Special type code or bits per character.
38  * 3. Number of elided entries (128 for specials).
39  * 4. Character table (size determined by fields 2 and 3). */
40
41 static const unsigned char charmaps[] =
42 "utf8\0\0\022\x80"
43 "wchart\0\0\007\x80"
44
45 "ucs2\0ucs2be\0\0\004\x80"
46 "ucs2le\0\0\005\x80"
47
48 "utf16\0utf16be\0\0\002\x80"
49 "utf16le\0\0\001\x80"
50
51 "ucs4\0ucs4be\0utf32\0utf32be\0\0\000\x80"
52 "ucs4le\0utf32le\0\0\003\x80"
53
54 "ascii\0iso646\0usascii\0\0\021\x80"
55 "latin1\0iso88591\0\0\x09\x80"
56 "latin9\0iso885915\0\0\024\x80"
57 "tis620\0iso885911\0\0\025\x80"
58 "jis0201\0\0\026\x80"
59
60 "iso88592\0\0\x0a\x21"
61 "\x04\x61\x1b\x14\x29\x3d\x69\x75\x0a\x2a"
62 "\x60\x79\x45\x56\x5e\xad\xf4\xb5\x17\x2c"
63 "\x05\x6d\x2b\x14\x2d\x3e\x6d\x75\x2c\x2e"
64 "\x61\x7d\x55\x96\x5e\xdd\xfa\xc5\x17\x55"
65 "\xc1\x08\x23\x10\x31\x39\x19\x74\x0c\x43"
66 "\xc9\x60\xb4\x8c\x46\xcd\x38\xe3\x10\x44"
67 "\x43\x1d\x35\x0d\x35\x50\x59\x73\x0d\x56"
68 "\x6e\x69\x03\x17\x37\xdd\x88\xf5\x4d\x55"
69 "\xe1\x88\x33\x10\x39\x3a\x1d\x74\x4e\x43"
70 "\xe9\x64\xb4\xce\x46\xed\xb8\xf3\x50\x44"
71 "\x44\x21\x35\x0f\x3d\x51\xd9\x73\x4f\x56"
72 "\x6f\xe9\x13\x17\x3f\xfd\x8c\x95\x2d"
73
74 "iso88593\0\0\x0a\x21"
75 "\x26\x61\x3b\x0a\x29\x00\x90\x74\x0a\x2a"
76 "\x30\x79\xe5\x11\x4d\xad\x00\xb0\x17\x2c"
77 "\x27\xc9\x32\x0b\x2d\xb5\x94\x74\x0b\x2e"
78 "\x31\x7d\xf5\x51\x4d\xbd\x00\xc0\x17\x30"
79 "\xc1\x08\x03\x00\x31\x0a\x21\x74\x0c\x32"
80 "\xc9\x28\xb3\x0c\x33\xcd\x38\xf3\x0c\x00"
81 "\xd1\x48\x33\x0d\x35\x20\x59\x73\x0d\x47"
82 "\xd9\x68\xb3\x0d\x37\x6c\x71\xf5\x0d\x38"
83 "\xe1\x88\x03\x00\x39\x0b\x25\x74\x0e\x3a"
84 "\xe9\xa8\xb3\x0e\x3b\xed\xb8\xf3\x0e\x00"
85 "\xf1\xc8\x33\x0f\x3d\x21\xd9\x73\x4f\x47"
86 "\xf9\xe8\xb3\x0f\x3f\x6d\x75\x95\x2d"
87
88 "iso88594\0\0\x0a\x21"
89 "\x04\xe1\x64\x15\x29\x28\xed\x74\x0a\x2a"
90 "\x60\x49\x24\x92\x59\xad\xf4\xf5\x0a\x2c"
91 "\x05\x6d\x7b\x15\x2d\x29\xf1\x74\x2c\x2e"
92 "\x61\x4d\x34\xd2\x59\x4a\xf9\xb5\x14\x40"
93 "\xc1\x08\x33\x0c\x31\xc5\x18\xe3\x12\x43"
94 "\xc9\x60\xb4\x8c\x45\xcd\x38\xa3\x12\x44"
95 "\x45\x31\x65\x13\x35\xd5\x58\x73\x0d\x36"
96 "\x72\x69\xb3\x0d\x37\x68\xa9\xf5\x4d\x40"
97 "\xe1\x88\x33\x0e\x39\xe5\x98\xf3\x52\x43"
98 "\xe9\x64\xb4\xce\x45\xed\xb8\xb3\x52\x44"
99 "\x46\x35\x75\x13\x3d\xf5\xd8\x73\x0f\x3e"
100 "\x73\xe9\xb3\x0f\x3f\x69\xad\x95\x2d"
101
102 "iso88595\0\0\x0e\x21"
103 "\x01\x84\x00\x31\x40\x10\x10\x05\x84\x01"
104 "\x71\x40\x20\x10\x09\x84\x02\xb1\x40\x30"
105 "\x10\xad\x80\x03\xf1\x40\x40\x10\x11\x84"
106 "\x04\x31\x41\x50\x10\x15\x84\x05\x71\x41"
107 "\x60\x10\x19\x84\x06\xb1\x41\x70\x10\x1d"
108 "\x84\x07\xf1\x41\x80\x10\x21\x84\x08\x31"
109 "\x42\x90\x10\x25\x84\x09\x71\x42\xa0\x10"
110 "\x29\x84\x0a\xb1\x42\xb0\x10\x2d\x84\x0b"
111 "\xf1\x42\xc0\x10\x31\x84\x0c\x31\x43\xd0"
112 "\x10\x35\x84\x0d\x71\x43\xe0\x10\x39\x84"
113 "\x0e\xb1\x43\xf0\x10\x3d\x84\x0f\xf1\x43"
114 "\x00\x11\x41\x84\x10\x31\x44\x10\x11\x45"
115 "\x84\x11\x71\x44\x20\x11\x49\x84\x12\xb1"
116 "\x44\x30\x11\x4d\x84\x13\xf1\x44\x58\x84"
117 "\x51\x84\x14\x31\x45\x50\x11\x55\x84\x15"
118 "\x71\x45\x60\x11\x59\x84\x16\xb1\x45\x70"
119 "\x11\xa7\x80\x17\xf1\x45\x00"
120
121 "iso88596\0\0\x0b\x21"
122 "\x00\x00\x00\x00\x48\x01\x00\x00\x00\x00"
123 "\x00\x00\x00\x00\x00\x18\xdc\x0a\x00\x00"
124 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
125 "\x00\x00\x00\x00\x00\xc0\x86\x01\x00\x00"
126 "\x00\x7c\x18\x00\x21\x16\xf1\x88\x49\x5c"
127 "\x62\x13\x9f\x18\xc5\x29\x56\xf1\x8a\x59"
128 "\xdc\x62\x17\xbf\x18\xc6\x31\x96\xf1\x8c"
129 "\x69\x5c\x63\x1b\xdf\x18\xc7\x39\xd6\x31"
130 "\x00\x00\x00\x00\x00\x00\x00\xc8\x41\x16"
131 "\xf2\x90\x89\x5c\x64\x23\x1f\x19\xc9\x49"
132 "\x56\xf2\x92\x99\xdc\x64\x27\x3f\x19\xca"
133 "\x51\x96\x32\x00\x00\x00\x00\x00\x00\x00"
134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
135 "\x00"
136
137 "iso88597\0\0\x0e\x21"
138 "\x18\x60\x06\x38\x0a\xb0\x82\xaf\xa0\x29"
139 "\x70\x0a\xa0\x02\xa9\x80\xde\xb0\x0a\xb0"
140 "\x02\xad\x00\x00\x50\x01\xc2\x02\xb1\x80"
141 "\x2c\x30\x0b\x10\x0e\x85\x83\xe1\x70\x0b"
142 "\x20\x0e\x89\x83\xe2\xb0\x0b\x30\x0e\xbd"
143 "\x80\xe3\xf0\x38\x40\x0e\x91\x83\xe4\x30"
144 "\x39\x50\x0e\x95\x83\xe5\x70\x39\x60\x0e"
145 "\x99\x83\xe6\xb0\x39\x70\x0e\x9d\x83\xe7"
146 "\xf0\x39\x80\x0e\xa1\x03\x00\x30\x3a\x90"
147 "\x0e\xa5\x83\xe9\x70\x3a\xa0\x0e\xa9\x83"
148 "\xea\xb0\x3a\xb0\x0e\xad\x83\xeb\xf0\x3a"
149 "\xc0\x0e\xb1\x83\xec\x30\x3b\xd0\x0e\xb5"
150 "\x83\xed\x70\x3b\xe0\x0e\xb9\x83\xee\xb0"
151 "\x3b\xf0\x0e\xbd\x83\xef\xf0\x3b\x00\x0f"
152 "\xc1\x83\xf0\x30\x3c\x10\x0f\xc5\x83\xf1"
153 "\x70\x3c\x20\x0f\xc9\x83\xf2\xb0\x3c\x30"
154 "\x0f\xcd\x83\xf3\x00\x00\x00"
155
156 "iso88598\0\0\x0e\x21"
157 "\x00\x80\x28\x30\x0a\x90\x02\xa5\x80\x29"
158 "\x70\x0a\xa0\x02\xa9\xc0\x35\xb0\x0a\xb0"
159 "\x02\xad\x80\x2b\xf0\x0a\xc0\x02\xb1\x80"
160 "\x2c\x30\x0b\xd0\x02\xb5\x80\x2d\x70\x0b"
161 "\xe0\x02\xb9\xc0\x3d\xb0\x0b\xf0\x02\xbd"
162 "\x80\x2f\x00\x00\x00\x00\x00\x00\x00\x00"
163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
164 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
167 "\x00\x00\x00\x00\x00\x00\x00\x00\x70\x01"
168 "\x42\x17\xd1\x85\x74\x31\x5d\x50\x17\xd5"
169 "\x85\x75\x71\x5d\x60\x17\xd9\x85\x76\xb1"
170 "\x5d\x70\x17\xdd\x85\x77\xf1\x5d\x80\x17"
171 "\xe1\x85\x78\x31\x5e\x90\x17\xe5\x85\x79"
172 "\x71\x5e\xa0\x17\xe9\x85\x7a\x01\x00\x00"
173 "\x00\x0e\xe0\x03\x08\x00\x00"
174
175 "iso88599\0\0\x09\x50"
176 "\x1e\xa3\x49\x9b\x46\xad\x9a\xb5\x6b\xd8"
177 "\xb2\x69\xdb\xc6\x0d\xa6\xd7\x6f\xe0\xc2"
178 "\x89\x1b\x47\xae\x9c\xb9\x73\xe8\xd2\xa9"
179 "\x5b\xc7\xae\x9d\xbb\x77\x1f\xe3\xc9\x9b"
180 "\x47\xaf\x9e\xbd\x7b\xf8\xf2\xe9\xdb\xc7"
181 "\x2f\xe6\xd7\x7f"
182
183 "iso885910\0\0\x0e\x21"
184 "\x04\x81\x44\x20\x12\xa8\x04\x28\x81\x4d"
185 "\x70\x0a\xec\x04\x10\x01\x58\x60\x16\xf4"
186 "\x05\xad\x80\x5a\xa0\x14\xc0\x02\x05\xc1"
187 "\x44\x30\x12\xac\x04\x29\xc1\x4d\x70\x0b"
188 "\xf0\x04\x11\x41\x58\x70\x16\xf8\x05\x15"
189 "\xe0\x5a\xb0\x14\x00\x04\xc1\x80\x30\x30"
190 "\x0c\x10\x03\xc5\x80\x31\xe0\x12\x30\x04"
191 "\xc9\x00\x46\xb0\x0c\x58\x04\xcd\x80\x33"
192 "\xf0\x0c\x40\x03\x45\x01\x53\x30\x0d\x50"
193 "\x03\xd5\x80\x35\x80\x16\x60\x03\x72\x81"
194 "\x36\xb0\x0d\x70\x03\xdd\x80\x37\xf0\x0d"
195 "\x04\x04\xe1\x80\x38\x30\x0e\x90\x03\xe5"
196 "\x80\x39\xf0\x12\x34\x04\xe9\x40\x46\xb0"
197 "\x0e\x5c\x04\xed\x80\x3b\xf0\x0e\xc0\x03"
198 "\x46\x41\x53\x30\x0f\xd0\x03\xf5\x80\x3d"
199 "\x90\x16\xe0\x03\x73\x81\x3e\xb0\x0f\xf0"
200 "\x03\xfd\x80\x3f\x80\x13\x00"
201
202 "iso885913\0\0\x0e\x21"
203 "\x1d\xa0\x28\x30\x0a\x90\x02\x1e\xa0\x29"
204 "\x70\x0a\x60\x03\xa9\x80\x55\xb0\x0a\xb0"
205 "\x02\xad\x80\x2b\x60\x0c\xc0\x02\xb1\x80"
206 "\x2c\x30\x0b\x70\x80\xb5\x80\x2d\x70\x0b"
207 "\xe0\x03\xb9\xc0\x55\xb0\x0b\xf0\x02\xbd"
208 "\x80\x2f\x60\x0e\x10\x04\x2e\x01\x40\x60"
209 "\x10\x10\x03\xc5\x00\x46\x20\x11\x30\x04"
210 "\xc9\x40\x5e\x60\x11\x88\x04\x36\x81\x4a"
211 "\xb0\x13\x80\x05\x43\x41\x51\x30\x0d\x30"
212 "\x05\xd5\x80\x35\x70\x0d\xc8\x05\x41\x81"
213 "\x56\xa0\x16\x70\x03\x7b\x41\x5f\xf0\x0d"
214 "\x14\x04\x2f\x41\x40\x70\x10\x90\x03\xe5"
215 "\x40\x46\x30\x11\x34\x04\xe9\x80\x5e\x70"
216 "\x11\x8c\x04\x37\xc1\x4a\xc0\x13\x84\x05"
217 "\x44\x81\x51\x30\x0f\x34\x05\xf5\x80\x3d"
218 "\x70\x0f\xcc\x05\x42\xc1\x56\xb0\x16\xf0"
219 "\x03\x7c\x81\x5f\x90\x01\x02"
220
221 "iso885914\0\0\x0d\x21"
222 "\x02\x7e\xc0\x8f\x02\x85\xb0\x10\x14\xfc"
223 "\x29\x00\xf4\xa9\x40\xd0\x2f\x78\x79\xdf"
224 "\x0a\x5c\x01\x5e\xf0\xf0\x1f\x1e\x24\x84"
225 "\x04\x20\x1f\xe4\x6d\x81\x95\x0f\xf4\x57"
226 "\x7e\xd0\x83\xf9\x79\x4f\xe8\x0b\x7d\x98"
227 "\x07\x06\xc1\x40\x18\x0c\x03\x62\x50\x0c"
228 "\x8c\xc1\x31\x40\x06\xc9\x40\x19\x2c\x03"
229 "\x66\xd0\x0c\x9c\xc1\x33\xa0\x0b\xd1\x40"
230 "\x1a\x4c\x03\x6a\x50\x0d\xac\x81\x9a\xc7"
231 "\x06\xd9\x40\x1b\x6c\x03\x6e\xd0\x0d\xec"
232 "\xc2\x37\x00\x07\xe1\x40\x1c\x8c\x03\x72"
233 "\x50\x0e\xcc\xc1\x39\x40\x07\xe9\x40\x1d"
234 "\xac\x03\x76\xd0\x0e\xdc\xc1\x3b\xa8\x0b"
235 "\xf1\x40\x1e\xcc\x03\x7a\x50\x0f\xec\xc1"
236 "\x9a\xc7\x07\xf9\x40\x1f\xec\x03\x7e\xd0"
237 "\x0f\xee\xc2\x3f\x00"
238
239 "iso885916\0\0\x0e\x21"
240 "\x04\x41\x41\x10\x14\xb0\x82\x1e\x20\x58"
241 "\x70\x0a\x84\x05\xa9\x00\x86\xb0\x0a\xe4"
242 "\x05\xad\x80\x5e\xb0\x17\xc0\x02\xb1\x00"
243 "\x43\x20\x14\xf4\x05\x1d\xa0\x2d\x70\x0b"
244 "\xf8\x05\x0d\x41\x86\xb0\x0b\x48\x05\x53"
245 "\x01\x5e\xc0\x17\x00\x03\xc1\x80\x30\x20"
246 "\x10\x10\x03\x06\x81\x31\x70\x0c\x20\x03"
247 "\xc9\x80\x32\xb0\x0c\x30\x03\xcd\x80\x33"
248 "\xf0\x0c\x40\x04\x43\x81\x34\x30\x0d\x50"
249 "\x03\x50\x81\x35\xa0\x15\xc0\x05\xd9\x80"
250 "\x36\xb0\x0d\x70\x03\x18\x81\x86\xf0\x0d"
251 "\x80\x03\xe1\x80\x38\x30\x10\x90\x03\x07"
252 "\x81\x39\x70\x0e\xa0\x03\xe9\x80\x3a\xb0"
253 "\x0e\xb0\x03\xed\x80\x3b\xf0\x0e\x44\x04"
254 "\x44\x81\x3c\x30\x0f\xd0\x03\x51\x81\x3d"
255 "\xb0\x15\xc4\x05\xf9\x80\x3e\xb0\x0f\xf0"
256 "\x03\x19\xc1\x86\xf0\x0f\x00"
257
258 "windows1252\0\0\x0e\x00"
259 "\xac\x20\x00\xa0\x01\x4a\x06\x1e\xa0\x09"
260 "\x08\x02\x86\x80\xc6\x02\x0c\x08\x16\xe4"
261 "\x80\x52\x01\x00\xd0\x17\x00\x00\x00\x00"
262 "\x06\x98\x01\x72\x80\x1d\xa0\x08\x38\x01"
263 "\x52\x80\xdc\x82\x48\x18\x16\xe8\x80\x53"
264 "\x01\x00\xe0\x17\xe0\x05\xa0\x40\x28\x20"
265 "\x0a\x8c\x02\xa4\x40\x29\x60\x0a\x9c\x02"
266 "\xa8\x40\x2a\xa0\x0a\xac\x02\xac\x40\x2b"
267 "\xe0\x0a\xbc\x02\xb0\x40\x2c\x20\x0b\xcc"
268 "\x02\xb4\x40\x2d\x60\x0b\xdc\x02\xb8\x40"
269 "\x2e\xa0\x0b\xec\x02\xbc\x40\x2f\xe0\x0b"
270 "\xfc\x02\xc0\x40\x30\x20\x0c\x0c\x03\xc4"
271 "\x40\x31\x60\x0c\x1c\x03\xc8\x40\x32\xa0"
272 "\x0c\x2c\x03\xcc\x40\x33\xe0\x0c\x3c\x03"
273 "\xd0\x40\x34\x20\x0d\x4c\x03\xd4\x40\x35"
274 "\x60\x0d\x5c\x03\xd8\x40\x36\xa0\x0d\x6c"
275 "\x03\xdc\x40\x37\xe0\x0d\x7c\x03\xe0\x40"
276 "\x38\x20\x0e\x8c\x03\xe4\x40\x39\x60\x0e"
277 "\x9c\x03\xe8\x40\x3a\xa0\x0e\xac\x03\xec"
278 "\x40\x3b\xe0\x0e\xbc\x03\xf0\x40\x3c\x20"
279 "\x0f\xcc\x03\xf4\x40\x3d\x60\x0f\xdc\x03"
280 "\xf8\x40\x3e\xa0\x0f\xec\x03\xfc\x40\x3f"
281 "\xe0\x0f\xfc\x03"
282 ;
283
284
285
286 static int fuzzycmp(const unsigned char *a, const unsigned char *b)
287 {
288         for (; *a && *b; a++, b++) {
289                 while (*a && (*a|32U)-'a'>26 && *a-'0'>10U) a++;
290                 if ((*a|32U) != *b) return 1;
291         }
292         return *a != *b;
293 }
294
295 static size_t find_charmap(const void *name)
296 {
297         const unsigned char *s;
298         for (s=charmaps; *s; ) {
299                 if (!fuzzycmp(name, s)) {
300                         for (; *s; s+=strlen((void *)s)+1);
301                         return s+1-charmaps;
302                 }
303                 s += strlen((void *)s)+1;
304                 if (!*s) s += ((128-s[2])*s[1]+7)/8 + 3;
305         }
306         return -1;
307 }
308
309 iconv_t iconv_open(const char *to, const char *from)
310 {
311         size_t f, t;
312
313         if ((t = find_charmap(to))==-1 || (f = find_charmap(from))==-1) {
314                 errno = EINVAL;
315                 return (iconv_t)-1;
316         }
317
318         return (void *)(f<<16 | t);
319 }
320
321 int iconv_close(iconv_t cd)
322 {
323         return 0;
324 }
325
326 static unsigned get_16(const unsigned char *s, int e)
327 {
328         e &= 1;
329         return s[e]<<8 | s[1-e];
330 }
331
332 static void put_16(unsigned char *s, unsigned c, int e)
333 {
334         e &= 1;
335         s[e] = c>>8;
336         s[1-e] = c;
337 }
338
339 static unsigned get_32(const unsigned char *s, int e)
340 {
341         return s[e]+0U<<24 | s[e^1]<<16 | s[e^2]<<8 | s[e^3];
342 }
343
344 static void put_32(unsigned char *s, unsigned c, int e)
345 {
346         s[e^0] = c>>24;
347         s[e^1] = c>>16;
348         s[e^2] = c>>8;
349         s[e^3] = c;
350 }
351
352
353
354 #define GET_MAPPING(m, i, n) ( (1<<(n))-1 & ( \
355         (m)[(i)*(n)/8] >> ((n)%8*(i)%8) | \
356         (m)[(i)*(n)/8+1] << 8-((n)%8*(i)%8) | \
357         (m)[(i)*(n)/8+2] << 16-((n)%8*(i)%8)     ) )
358
359 static unsigned get_mapping(const unsigned char *m, unsigned c, unsigned n)
360 {
361         switch (n) {
362         default:
363         case 9:  return m[c*9/8]>>c%8 | m[c*9/8+1]<<8-c%8 & (1<<n)-1;
364         case 10: return m[c*10/8]>>2*c%8 | m[c*10/8+1]<<8-2*c%8 & (1<<n)-1;
365         case 11: return GET_MAPPING(m, c, 11);
366         case 13: return GET_MAPPING(m, c, 13);
367         case 14: return GET_MAPPING(m, c, 14);
368         }
369 }
370
371 /* Adapt as needed */
372 #define mbrtowc_utf8 mbrtowc
373 #define wctomb_utf8 wctomb
374
375 #include <stdio.h>
376 size_t iconv(iconv_t cd0, char **in, size_t *inb, char **out, size_t *outb)
377 {
378         size_t x=0;
379         unsigned long cd = (unsigned long)cd0;
380         unsigned to = cd & 0xffff;
381         unsigned from = cd >> 16;
382         const unsigned char *map = charmaps+from+2;
383         const unsigned char *tomap = charmaps+to+2;
384         mbstate_t st = {0};
385         wchar_t wc;
386         unsigned c, d;
387         size_t k, l;
388         int err;
389         unsigned elide = map[-1] + 128;
390         unsigned toelide = tomap[-1] + 128;
391         unsigned char type = map[-2];
392         unsigned char totype = tomap[-2];
393
394         if (!in || !*in || !*inb) return 0;
395
396         for (; *inb; *in+=l, *inb-=l) {
397                 c = *(unsigned char *)*in;
398                 l = 1;
399                 if (type < 8 || c >= 0x80) switch (type) {
400                 case UTF_8:
401                         l = mbrtowc_utf8(&wc, *in, *inb, &st);
402                         if (!l) l++;
403                         else if (l == (size_t)-1) goto ilseq;
404                         else if (l == (size_t)-2) goto starved;
405                         c = wc;
406                         break;
407                 case LATIN_9:
408                         if ((unsigned)c - 0xa4 <= 0xbe - 0xa4) {
409                                 static const unsigned char map[] = {
410                                         0, 0x60, 0, 0x61, 0, 0, 0, 0, 0, 0, 0,
411                                         0, 0, 0, 0, 0x7d, 0, 0, 0, 0x7e, 0, 0, 0,
412                                         0x52, 0x53, 0x78
413                                 };
414                                 if (c == 0xa4) c = 0x20ac;
415                                 else if (map[c-0xa5]) c = 0x100 | map[c-0xa5];
416                         }
417                         break;
418                 case TIS_620:
419                         if (c >= 0xa1) c += 0x0e01-0xa1;
420                         break;
421                 case JIS_0201:
422                         if (c >= 0xa1)
423                                 if (c <= 0xdf) c += 0xff61-0xa1;
424                                 else goto ilseq;
425                         break;
426                 case 9: case 10: case 11: case 13: case 14:
427                         if (c < elide) break;
428                         c = get_mapping(map, c-elide, type);
429                         if (!c) {
430                 case US_ASCII:
431                                 goto ilseq;
432                         }
433                         break;
434                 case WCHAR_T:
435                         l = sizeof(wchar_t);
436                         if (*inb < l) goto starved;
437                         c = *(wchar_t *)*in;
438                         if (0) {
439                 case UTF_32BE:
440                 case UTF_32LE:
441                         l = 4;
442                         if (*inb < 4) goto starved;
443                         c = get_32((void *)*in, type);
444                         }
445                         if (c-0xd800u < 0x800u || c >= 0x110000u) goto ilseq;
446                         break;
447                 case UCS2BE:
448                 case UCS2LE:
449                 case UTF_16BE:
450                 case UTF_16LE:
451                         l = 2;
452                         if (*inb < 2) goto starved;
453                         c = get_16((void *)*in, type);
454                         if ((unsigned)(c-0xdc00) < 0x400) goto ilseq;
455                         if ((unsigned)(c-0xd800) < 0x400) {
456                                 if (type-UCS2BE < 2U) goto ilseq;
457                                 l = 4;
458                                 if (*inb < 4) goto starved;
459                                 d = get_16((void *)(*in + 2), from);
460                                 if ((unsigned)(c-0xdc00) >= 0x400) goto ilseq;
461                                 c = ((c-0xd800)<<10) | (d-0xdc00);
462                         }
463                         break;
464                 }
465
466                 switch (totype) {
467                 case WCHAR_T:
468                         if (*outb < sizeof(wchar_t)) goto toobig;
469                         *(wchar_t *)*out = c;
470                         *out += sizeof(wchar_t);
471                         *outb -= sizeof(wchar_t);
472                         break;
473                 case UTF_8:
474                         if (*outb < 4) {
475                                 char tmp[4];
476                                 k = wctomb_utf8(tmp, c);
477                                 if (*outb < k) goto toobig;
478                                 memcpy(*out, tmp, k);
479                         } else k = wctomb_utf8(*out, c);
480                         *out += k;
481                         *outb -= k;
482                         break;
483                 case TIS_620:
484                         if (c-0xe01u <= 0xff-0xa1)
485                                 c -= 0xe01-0xa1;
486                         else if (c >= 0xa1)
487                                 goto ascii;
488                         goto revout;
489                 case JIS_0201:
490                         if (c-0xff61u <= 0xdf-0xa1)
491                                 c -= 0xff61-0xa1;
492                         else if (c >= 0xa1)
493                                 goto ascii;
494                         goto revout;
495                 case LATIN_9:
496                         if (c == 0x20ac) {
497                                 c=0xa4;
498                         } else if (c-0x150u<=0x12 && (1<<c-0x150 & 0x3000c)) {
499                                 static const unsigned char map[] =
500                                         { 0xa6,0xa8,0xbc,0xbd };
501                                 c = map[c&3];
502                         } else if (c-0x178u<=0x7 && (1<<c-0x178 & 0x61)) {
503                                 static const unsigned char map[] =
504                                         { 0xbe,0,0,0,0,0xb4,0xb8 };
505                                 c = map[c&7];
506                         } else if (c>0x100 ||
507                                 c-0xa5u<=0xbeu-0xa5
508                                 && (1<<c-0xa5 & 0x388800a))
509                 case US_ASCII: ascii:
510                         if (c > 0x7f) x++, c='*';
511                 case 9: case 10: case 11: case 13: case 14:
512                         if (*outb < 1) goto toobig;
513                         if (c < toelide) {
514                         revout:
515                                 *(*out)++ = c;
516                                 *outb -= 1;
517                                 break;
518                         }
519                         for (d=0; d<256-toelide; d++) {
520                                 if (c == get_mapping(tomap, d, totype)) {
521                                         c = d + toelide;
522                                         goto revout;
523                                 }
524                         }
525                         x++;
526                         c = '*';
527                         goto revout;
528                 case UCS2BE:
529                 case UCS2LE:
530                 case UTF_16BE:
531                 case UTF_16LE:
532                         if (c < 0x10000) {
533                                 if (*outb < 2) goto toobig;
534                                 put_16((void *)*out, c, totype);
535                                 *out += 2;
536                                 *outb -= 2;
537                                 break;
538                         }
539                         if (type-UCS2BE < 2U) goto ilseq;
540                         if (*outb < 4) goto toobig;
541                         put_16((void *)*out, (c>>10)|0xd800, totype);
542                         put_16((void *)(*out + 2), (c&0x3ff)|0xdc00, totype);
543                         *out += 4;
544                         *outb -= 4;
545                         break;
546                 case UTF_32BE:
547                 case UTF_32LE:
548                         if (*outb < 4) goto toobig;
549                         put_32((void *)*out, c, totype);
550                         *out += 4;
551                         *outb -= 4;
552                         break;
553                 }
554         }
555         return x;
556 ilseq:
557         err = EILSEQ;
558         x = -1;
559         goto end;
560 toobig:
561         err = E2BIG;
562         goto end;
563 starved:
564         err = EINVAL;
565 end:
566         errno = err;
567         return x;
568 }