X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=src%2Flocale%2Ficonv.c;h=3047c27b2e8b36561dd8feba244f274492be1693;hb=6aeb9c6703670649ee09b3c8575fb428168bb75c;hp=01f17521a17e2255a0ddab6500c190be51171791;hpb=105eff9dec51bc4898a74af2854ab71f927a5c3b;p=musl diff --git a/src/locale/iconv.c b/src/locale/iconv.c index 01f17521..3047c27b 100644 --- a/src/locale/iconv.c +++ b/src/locale/iconv.c @@ -16,6 +16,9 @@ #define WCHAR_T 0306 #define US_ASCII 0307 #define UTF_8 0310 +#define UTF_16 0312 +#define UTF_32 0313 +#define UCS2 0314 #define EUC_JP 0320 #define SHIFT_JIS 0321 #define ISO2022_JP 0322 @@ -35,13 +38,16 @@ static const unsigned char charmaps[] = "utf8\0char\0\0\310" "wchart\0\0\306" -"ucs2\0ucs2be\0\0\304" +"ucs2be\0\0\304" "ucs2le\0\0\305" -"utf16\0utf16be\0\0\302" +"utf16be\0\0\302" "utf16le\0\0\301" -"ucs4\0ucs4be\0utf32\0utf32be\0\0\300" +"ucs4be\0utf32be\0\0\300" "ucs4le\0utf32le\0\0\303" "ascii\0usascii\0iso646\0iso646us\0\0\307" +"utf16\0\0\312" +"ucs4\0utf32\0\0\313" +"ucs2\0\0\314" "eucjp\0\0\320" "shiftjis\0sjis\0\0\321" "iso2022jp\0\0\322" @@ -80,6 +86,10 @@ static const unsigned short ksc[93][94] = { #include "ksc.h" }; +static const unsigned short rev_jis[] = { +#include "revjis.h" +}; + static int fuzzycmp(const unsigned char *a, const unsigned char *b) { for (; *a && *b; a++, b++) { @@ -134,13 +144,16 @@ iconv_t iconv_open(const char *to, const char *from) if ((t = find_charmap(to))==-1 || (f = find_charmap(from))==-1 - || (charmaps[t] >= 0320)) { + || (charmaps[t] >= 0330)) { errno = EINVAL; return (iconv_t)-1; } iconv_t cd = combine_to_from(t, f); switch (charmaps[f]) { + case UTF_16: + case UTF_32: + case UCS2: case ISO2022_JP: scd = malloc(sizeof *scd); if (!scd) return (iconv_t)-1; @@ -192,6 +205,25 @@ static unsigned legacy_map(const unsigned char *map, unsigned c) return x < 256 ? x : legacy_chars[x-256]; } +static unsigned uni_to_jis(unsigned c) +{ + unsigned nel = sizeof rev_jis / sizeof *rev_jis; + unsigned d, j, i, b = 0; + for (;;) { + i = nel/2; + j = rev_jis[b+i]; + d = jis0208[j/256][j%256]; + if (d==c) return j + 0x2121; + else if (nel == 1) return 0; + else if (c < d) + nel /= 2; + else { + b += i; + nel -= nel/2; + } + } +} + size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb) { size_t x=0; @@ -262,6 +294,31 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri c = ((c-0xd7c0)<<10) + (d-0xdc00); } break; + case UCS2: + case UTF_16: + l = 0; + if (!scd->state) { + if (*inb < 2) goto starved; + c = get_16((void *)*in, 0); + scd->state = type==UCS2 + ? c==0xfffe ? UCS2LE : UCS2BE + : c==0xfffe ? UTF_16LE : UTF_16BE; + if (c == 0xfffe || c == 0xfeff) + l = 2; + } + type = scd->state; + continue; + case UTF_32: + l = 0; + if (!scd->state) { + if (*inb < 4) goto starved; + c = get_32((void *)*in, 0); + scd->state = c==0xfffe0000 ? UTF_32LE : UTF_32BE; + if (c == 0xfffe0000 || c == 0xfeff) + l = 4; + } + type = scd->state; + continue; case SHIFT_JIS: if (c < 128) break; if (c-0xa1 <= 0xdf-0xa1) { @@ -401,16 +458,24 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri * range in the hkscs table then hard-coded * here. Ugly, yes. */ if (c/256 == 0xdc) { - if (totype-0300U > 8) k = 2; - else k = "\10\4\4\10\4\4\10\2\4"[totype-0300]; - if (k > *outb) goto toobig; - x += iconv(combine_to_from(to, 0), + union { + char c[8]; + wchar_t wc[2]; + } tmp; + char *ptmp = tmp.c; + size_t tmpx = iconv(combine_to_from(to, find_charmap("utf8")), &(char *){"\303\212\314\204" "\303\212\314\214" "\303\252\314\204" "\303\252\314\214" +c%256}, &(size_t){4}, - out, outb); + &ptmp, &(size_t){sizeof tmp}); + size_t tmplen = ptmp - tmp.c; + if (tmplen > *outb) goto toobig; + if (tmpx) x++; + memcpy(*out, &tmp, tmplen); + *out += tmplen; + *outb -= tmplen; continue; } if (!c) goto ilseq; @@ -482,6 +547,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri if (*outb < 1) goto toobig; if (c<256 && c==legacy_map(tomap, c)) { revout: + if (*outb < 1) goto toobig; *(*out)++ = c; *outb -= 1; break; @@ -493,11 +559,87 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri } } goto subst; + case SHIFT_JIS: + if (c < 128) goto revout; + if (c == 0xa5) { + x++; + c = '\\'; + goto revout; + } + if (c == 0x203e) { + x++; + c = '~'; + goto revout; + } + if (c-0xff61 <= 0xdf-0xa1) { + c += 0xa1 - 0xff61; + goto revout; + } + c = uni_to_jis(c); + if (!c) goto subst; + if (*outb < 2) goto toobig; + d = c%256; + c = c/256; + *(*out)++ = (c+1)/2 + (c<95 ? 112 : 176); + *(*out)++ = c%2 ? d + 31 + d/96 : d + 126; + *outb -= 2; + break; + case EUC_JP: + if (c < 128) goto revout; + if (c-0xff61 <= 0xdf-0xa1) { + c += 0x0e00 + 0x21 - 0xff61; + } else { + c = uni_to_jis(c); + } + if (!c) goto subst; + if (*outb < 2) goto toobig; + *(*out)++ = c/256 + 0x80; + *(*out)++ = c%256 + 0x80; + *outb -= 2; + break; + case ISO2022_JP: + if (c < 128) goto revout; + if (c-0xff61 <= 0xdf-0xa1 || c==0xa5 || c==0x203e) { + if (*outb < 7) goto toobig; + *(*out)++ = '\033'; + *(*out)++ = '('; + if (c==0xa5) { + *(*out)++ = 'J'; + *(*out)++ = '\\'; + } else if (c==0x203e) { + *(*out)++ = 'J'; + *(*out)++ = '~'; + } else { + *(*out)++ = 'I'; + *(*out)++ = c-0xff61+0x21; + } + *(*out)++ = '\033'; + *(*out)++ = '('; + *(*out)++ = 'B'; + *outb -= 7; + break; + } + c = uni_to_jis(c); + if (!c) goto subst; + if (*outb < 8) goto toobig; + *(*out)++ = '\033'; + *(*out)++ = '$'; + *(*out)++ = 'B'; + *(*out)++ = c/256; + *(*out)++ = c%256; + *(*out)++ = '\033'; + *(*out)++ = '('; + *(*out)++ = 'B'; + *outb -= 8; + break; + case UCS2: + totype = UCS2BE; case UCS2BE: case UCS2LE: + case UTF_16: case UTF_16BE: case UTF_16LE: - if (c < 0x10000 || type-UCS2BE < 2U) { + if (c < 0x10000 || totype-UCS2BE < 2U) { if (c >= 0x10000) c = 0xFFFD; if (*outb < 2) goto toobig; put_16((void *)*out, c, totype); @@ -512,6 +654,8 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri *out += 4; *outb -= 4; break; + case UTF_32: + totype = UTF_32BE; case UTF_32BE: case UTF_32LE: if (*outb < 4) goto toobig;