use restrict everywhere it's required by c99 and/or posix 2008
[musl] / src / locale / iconv.c
index 4498e40..7b341fe 100644 (file)
 #define UTF_32LE    0303
 #define UCS2BE      0304
 #define UCS2LE      0305
-#define US_ASCII    0306
-#define WCHAR_T     0307
+#define WCHAR_T     0306
+#define US_ASCII    0307
 #define UTF_8       0310
+#define EUC_JP      0320
+#define SHIFT_JIS   0321
+#define GB18030     0330
+#define GBK         0331
+#define GB2312      0332
 
 /* FIXME: these are not implemented yet
  * EUC:   A1-FE A1-FE
 
 static const unsigned char charmaps[] =
 "utf8\0\0\310"
-"wchart\0\0\307"
+"wchart\0\0\306"
 "ucs2\0ucs2be\0\0\304"
 "ucs2le\0\0\305"
 "utf16\0utf16be\0\0\302"
 "utf16le\0\0\301"
 "ucs4\0ucs4be\0utf32\0utf32be\0\0\300"
 "ucs4le\0utf32le\0\0\303"
-"ascii\0usascii\0iso646\0iso646us\0\0\306"
+"ascii\0usascii\0iso646\0iso646us\0\0\307"
+"eucjp\0\0\320"
+"shiftjis\0sjis\0\0\321"
+"gb18030\0\0\330"
+"gbk\0\0\331"
+"gb2312\0\0\332"
 #include "codepages.h"
 ;
 
@@ -44,6 +54,14 @@ static const unsigned short legacy_chars[] = {
 #include "legacychars.h"
 };
 
+static const unsigned short jis0208[84][94] = {
+#include "jis0208.h"
+};
+
+static const unsigned short gb18030[126][190] = {
+#include "gb18030.h"
+};
+
 static int fuzzycmp(const unsigned char *a, const unsigned char *b)
 {
        for (; *a && *b; a++, b++) {
@@ -74,7 +92,9 @@ iconv_t iconv_open(const char *to, const char *from)
 {
        size_t f, t;
 
-       if ((t = find_charmap(to))==-1 || (f = find_charmap(from))==-1) {
+       if ((t = find_charmap(to))==-1
+        || (f = find_charmap(from))==-1
+        || (charmaps[t] >= 0320)) {
                errno = EINVAL;
                return (iconv_t)-1;
        }
@@ -119,8 +139,7 @@ static void put_32(unsigned char *s, unsigned c, int e)
 #define mbrtowc_utf8 mbrtowc
 #define wctomb_utf8 wctomb
 
-#include <stdio.h>
-size_t iconv(iconv_t cd0, char **in, size_t *inb, char **out, size_t *outb)
+size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)
 {
        size_t x=0;
        unsigned long cd = (unsigned long)cd0;
@@ -142,7 +161,7 @@ size_t iconv(iconv_t cd0, char **in, size_t *inb, char **out, size_t *outb)
                c = *(unsigned char *)*in;
                l = 1;
 
-               if (c >= 128) switch (type) {
+               if (c >= 128 || type-UTF_32BE < 7U) switch (type) {
                case UTF_8:
                        l = mbrtowc_utf8(&wc, *in, *inb, &st);
                        if (!l) l++;
@@ -177,10 +196,87 @@ size_t iconv(iconv_t cd0, char **in, size_t *inb, char **out, size_t *outb)
                                if (type-UCS2BE < 2U) goto ilseq;
                                l = 4;
                                if (*inb < 4) goto starved;
-                               d = get_16((void *)(*in + 2), from);
-                               if ((unsigned)(c-0xdc00) >= 0x400) goto ilseq;
-                               c = ((c-0xd800)<<10) | (d-0xdc00);
+                               d = get_16((void *)(*in + 2), type);
+                               if ((unsigned)(d-0xdc00) >= 0x400) goto ilseq;
+                               c = ((c-0xd7c0)<<10) + (d-0xdc00);
+                       }
+                       break;
+               case SHIFT_JIS:
+                       if (c-0xa1 <= 0xdf-0xa1) {
+                               c += 0xff61-0xa1;
+                               break;
+                       }
+                       l = 2;
+                       if (*inb < 2) goto starved;
+                       d = *((unsigned char *)*in + 1);
+                       if (c-129 <= 159-129) c -= 129;
+                       else if (c-224 <= 239-224) c -= 193;
+                       else goto ilseq;
+                       c *= 2;
+                       if (d-64 <= 158-64) {
+                               if (d==127) goto ilseq;
+                               if (d>127) d--;
+                               d -= 64;
+                       } else if (d-159 <= 252-159) {
+                               c++;
+                               d -= 159;
+                       }
+                       c = jis0208[c][d];
+                       if (!c) goto ilseq;
+                       break;
+               case EUC_JP:
+                       l = 2;
+                       if (*inb < 2) goto starved;
+                       d = *((unsigned char *)*in + 1);
+                       if (c==0x8e) {
+                               c = d;
+                               if (c-0xa1 > 0xdf-0xa1) goto ilseq;
+                               c += 0xff61 - 0xa1;
+                               break;
+                       }
+                       c -= 0xa1;
+                       d -= 0xa1;
+                       if (c >= 84 || d >= 94) goto ilseq;
+                       c = jis0208[c][d];
+                       if (!c) goto ilseq;
+                       break;
+               case GB2312:
+                       if (c < 0xa1) goto ilseq;
+               case GBK:
+               case GB18030:
+                       c -= 0x81;
+                       if (c >= 126) goto ilseq;
+                       l = 2;
+                       if (*inb < 2) goto starved;
+                       d = *((unsigned char *)*in + 1);
+                       if (d < 0xa1 && type == GB2312) goto ilseq;
+                       if (d-0x40>=191 || d==127) {
+                               if (d-'0'>9 || type != GB18030)
+                                       goto ilseq;
+                               l = 4;
+                               if (*inb < 4) goto starved;
+                               c = (10*c + d-'0') * 1260;
+                               d = *((unsigned char *)*in + 2);
+                               if (d-0x81>126) goto ilseq;
+                               c += 10*(d-0x81);
+                               d = *((unsigned char *)*in + 3);
+                               if (d-'0'>9) goto ilseq;
+                               c += d-'0';
+                               c += 128;
+                               for (d=0; d<=c; ) {
+                                       k = 0;
+                                       for (int i=0; i<126; i++)
+                                               for (int j=0; j<190; j++)
+                                                       if (gb18030[i][j]-d <= c-d)
+                                                               k++;
+                                       d = c+1;
+                                       c += k;
+                               }
+                               break;
                        }
+                       d -= 0x40;
+                       if (d>63) d--;
+                       c = gb18030[c][d];
                        break;
                default:
                        if (c < 128+type) break;
@@ -231,15 +327,16 @@ size_t iconv(iconv_t cd0, char **in, size_t *inb, char **out, size_t *outb)
                case UCS2LE:
                case UTF_16BE:
                case UTF_16LE:
-                       if (c < 0x10000) {
+                       if (c < 0x10000 || type-UCS2BE < 2U) {
+                               if (c >= 0x10000) c = 0xFFFD;
                                if (*outb < 2) goto toobig;
                                put_16((void *)*out, c, totype);
                                *out += 2;
                                *outb -= 2;
                                break;
                        }
-                       if (type-UCS2BE < 2U) goto ilseq;
                        if (*outb < 4) goto toobig;
+                       c -= 0x10000;
                        put_16((void *)*out, (c>>10)|0xd800, totype);
                        put_16((void *)(*out + 2), (c&0x3ff)|0xdc00, totype);
                        *out += 4;