From 73ec1d0495bb676012fd69491900ca8beb989ef7 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 17 Apr 2012 14:19:46 -0400 Subject: [PATCH] introduce new wide scanf code and remove the last remnants of old scanf at this point, strto* and all scanf family functions are using the new unified integer and floating point parser/converter code. the wide scanf is largely a wrapper for ordinary byte-based scanf; since numbers can only contain ascii characters, only strings need to be handled specially. --- src/stdio/__scanf.c | 483 ------------------------------------------- src/stdio/__scanf.h | 16 -- src/stdio/vfwscanf.c | 297 ++++++++++++++++++++++++-- src/stdio/vswscanf.c | 40 ++-- 4 files changed, 312 insertions(+), 524 deletions(-) delete mode 100644 src/stdio/__scanf.c delete mode 100644 src/stdio/__scanf.h diff --git a/src/stdio/__scanf.c b/src/stdio/__scanf.c deleted file mode 100644 index 7c82cca4..00000000 --- a/src/stdio/__scanf.c +++ /dev/null @@ -1,483 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "__scanf.h" - -static int read(rctx_t *r) -{ - if (--r->w < 0) return r->w = -1; - if (r->u) r->u = 0; - else r->read(r); - return r->c; -} - -static void unread(rctx_t *r) -{ - if (r->c < 0 || r->w < 0) return; - r->w++; - r->u = 1; -} - -#define SIZE_hh -2 -#define SIZE_h -1 -#define SIZE_def 0 -#define SIZE_l 1 -#define SIZE_ll 2 -#define SIZE_L 3 - -static void store_int(void *dest, int size, int neg, unsigned long long i) -{ - if (!dest) return; - if (neg) i = -i; - switch (size) { - case SIZE_hh: - *(char *)dest = i; - break; - case SIZE_h: - *(short *)dest = i; - break; - case SIZE_def: - *(int *)dest = i; - break; - case SIZE_l: - *(long *)dest = i; - break; - case SIZE_ll: - *(long long *)dest = i; - break; - } -} - -static void *arg_n(va_list ap, unsigned int n) -{ - void *p; - unsigned int i; - va_list ap2; - va_copy(ap2, ap); - for (i=n; i>1; i--) va_arg(ap2, void *); - p = va_arg(ap2, void *); - va_end(ap2); - return p; -} - -int __scanf(rctx_t *r, const wchar_t *fmt, va_list ap) -{ - int mode=0; - int width; - int size; - const wchar_t *p, *z; - int c, l, t, m; - long long dummy; - char *s; - wchar_t *wcs; - mbstate_t st; - int wide = r->wide; - void *dest=NULL; - int invert; - unsigned long long i=0; - int neg=0; - int matches=0; - long double f; - int (*is_space)(int) = r->is_space; - - for (p=fmt; *p; ) { - if (is_space(*p)) { - do p++; while (is_space(*p)); - do r->w=1; while (is_space(read(r))); - unread(r); - continue; - } else if (*p != '%' || p[1] == '%') { - if (*p == '%') p++; - r->w = 1; - if ((c = read(r)) < 0) - goto input_fail; - if (*p++ != c) - goto match_fail; - continue; - } - p++; - if (mode != 1) { - for (z=p; isdigit(*z); z++); - if (*z != '$' && *z != '*') { - if (mode == 0) mode = 1; - else goto fmt_fail; - } else if (*z != '*') { - int pos = 0; - mode = 2; - for (; pw = width; - - if (t != 'n') { - if (read(r) < 0) goto input_fail; - unread(r); - } - - switch (t) { - case 'n': - store_int(dest, size, 0, r->l - r->u); - /* do not increment match count, etc! */ - continue; - case 'C': - wcs = dest ? dest : (void *)&dummy; - st = (mbstate_t){ 0 }; - while ((c=read(r)) >= 0) { - if (wide) { - if (dest) *wcs++ = c; - } else { - char ch = c; - switch (mbrtowc(wcs, &ch, 1, &st)) { - case -1: - goto enc_fail; - case -2: - break; - default: - if (dest) wcs++; - } - } - } - if (r->w > 0) goto match_fail; - break; - case 'c': - s = dest ? dest : (void *)&dummy; - while ((c=read(r)) >= 0) { - if (wide) { - if ((l=wctomb(s, c)) < 0) - goto enc_fail; - if (dest) s += l; - } else { - if (dest) *s++ = c; - } - } - if (r->w > 0) goto match_fail; - break; - case '[': - wcs = dest ? dest : (void *)&dummy; - s = dest ? dest : (void *)&dummy; - if (!wide && size == SIZE_l) st = (mbstate_t){ 0 }; - - if (*p == '^') p++, invert = 1; - else invert = 0; - - if (wide) { - for (m=0; (c=read(r)) >= 0; m=1) { - for (z=p; *z && *z != c && (*z != ']' || z==p); z++); - if (!*z) goto fmt_fail; - if (*z == c && (*z != ']' || z==p)) { - if (invert) break; - } else { - if (!invert) break; - } - if (size == SIZE_l) { - if (dest) *wcs++ = c; - } else { - if ((l=wctomb(s, c)) < 0) - goto enc_fail; - if (dest) s += l; - } - } - for (p++; *p && *p != ']'; p++); - p++; - } else { - unsigned char scanset[257]; - memset(scanset, invert, sizeof scanset); - scanset[0] = 0; - for (z=p; *z && (*z != ']' || z==p); z++) - scanset[1+*z] = 1-invert; - if (!*z) goto fmt_fail; - p=z+1; - c=0; - for (m=0; scanset[(c=read(r))+1]; m=1) { - if (size == SIZE_l) { - char ch = c; - switch (mbrtowc(wcs, &ch, 1, &st)) { - case -1: - goto enc_fail; - case -2: - break; - default: - if (dest) wcs++; - } - } else { - if (dest) *s++ = c; - } - } - } - if (!m) goto match_fail; - if (dest) { - if (size == SIZE_l) *wcs++ = 0; - else *s++ = 0; - } - break; - default: - /* read unlimited number of spaces, then reset width */ - do r->w = 1; while (is_space(c = read(r))); - if (c < 0) goto input_fail; - unread(r); - r->w = width; - } - - switch (t) { - case 'p': - case 'X': - t = 'x'; - case 'd': - case 'i': - case 'o': - case 'u': - case 'x': - i = m = neg = 0; - if ((c=read(r)) == '-') neg=1; - else if (c != '+') unread(r); - switch (t) { - case 'i': - case 'x': - if ((c=read(r)) != '0') { - if (t == 'i') t = 'd'; - unread(r); - break; - } - m = 1; - if (((c=read(r))|0x20) != 'x') { - if (t == 'i') t = 'o'; - unread(r); - break; - } - t = 'x'; - m = 0; - } - } - - switch (t) { - case 'd': - case 'u': - for (; isdigit(c=read(r)); m=1) - i = 10*i + c-'0'; - goto int_finish; - case 'o': - for (; (unsigned)(c=read(r))-'0' < 8; m=1) - i = (i<<3) + c-'0'; - goto int_finish; - case 'x': - for (; ; m=1) { - if (isdigit(c=read(r))) { - i = (i<<4) + c-'0'; - } else if ((unsigned)(c|0x20)-'a' < 6) { - i = (i<<4) + (c|0x20)-'a'+10; - } else break; - } - int_finish: - if (!m) goto match_fail; - store_int(dest, size, neg, i); - break; - case 'a': - case 'e': - case 'f': - case 'g': - f = 0.0; - neg = m = 0; - if ((c=read(r)) == '-') neg=1; - else if (c != '+') unread(r); - /* FIXME: check for INF/NAN strings here */ - if (read(r)=='0' && (m=1, (read(r)|0x20) == 'x')) - goto hexfloat; - else unread(r); - for (; isdigit(c=read(r)); m=1) - f = 10.0 * f + (c-'0'); - if (c=='.') { - double mag = 10.0; - for (; isdigit(c=read(r)); mag*=10.0) - f += (c-'0')/mag; - } - if ((c|0x20)=='e') { - int ex=0, en=0; - m = 0; - if ((c=read(r))=='-') en=1; - else if (c!='+') unread(r); - for (; isdigit(c=read(r)); m=1) - if (ex < LDBL_MAX_10_EXP) - ex = 10 * ex + (c-'0'); - if (ex > LDBL_MAX_10_EXP) - f = en ? 0 : INFINITY; - else { - if (en) while (ex--) f/=10.0; - else while (ex--) f*=10.0; - } - } - goto writefloat; -hexfloat: - m = 0; - for (; isxdigit(c=read(r)); m=1) - if (isdigit(c)) f = 16.0*f + (c-'0'); - else f = 16.0*f + ((c|32)-'a'+10); - if (c=='.') { - double mag = 1/16.0; - for (; isxdigit(c=read(r)); mag*=1/16.0) - if (isdigit(c)) f += (c-'0')*mag; - else f += ((c|32)-'a'+10)*mag; - } - if ((c|0x20)=='p') { - int ex=0, en=0; - m = 0; - if ((c=read(r))=='-') en=1; - else if (c!='+') unread(r); - for (; isdigit(c=read(r)); m=1) - if (ex < LDBL_MAX_EXP) - ex = 10 * ex + (c-'0'); - if (ex > LDBL_MAX_EXP) - f = en ? 0 : INFINITY; - else { - if (en) while (ex--) f*=0.5; - else while (ex--) f*=2.0; - } - } -writefloat: - if (!m) goto match_fail; - if (neg) f *= -1.0; - if (dest) switch (size) { - case SIZE_def: - *(float *)dest = f; - break; - case SIZE_l: - *(double *)dest = f; - break; - case SIZE_L: - *(long double *)dest = f; - break; - } - break; - case 'S': - wcs = dest ? dest : (void *)&dummy; - st = (mbstate_t){ 0 }; - while((c=read(r)) >= 0) { - if (wide) { - if (is_space(c)) break; - if (dest) *wcs++ = c; - } else { - char ch = c; - if (is_space(c)) break; - switch (mbrtowc(wcs, &ch, 1, &st)) { - case -1: - goto enc_fail; - case -2: - break; - default: - if (dest) wcs++; - } - } - } - if (dest) *wcs++ = 0; - break; - case 's': - s = dest ? dest : (void *)&dummy; - while((c=read(r)) >= 0) { - if (wide) { - if (is_space(c)) break; - if ((l=wctomb(s, c)) < 0) - goto enc_fail; - if (dest) s += l; - } else { - if (is_space(c)) break; - if (dest) *s++ = c; - } - } - if (dest) *s++ = 0; - break; - } - - /* unread will do nothing if field width was exhausted */ - unread(r); - if (dest) matches++; - } - return matches; -enc_fail: - errno = EILSEQ; -fmt_fail: -input_fail: - if (!matches) matches--; -match_fail: - unread(r); - return matches; -} diff --git a/src/stdio/__scanf.h b/src/stdio/__scanf.h deleted file mode 100644 index e549b979..00000000 --- a/src/stdio/__scanf.h +++ /dev/null @@ -1,16 +0,0 @@ -#include - -typedef struct rctx -{ - void (*read)(struct rctx *); - void *opaque; - int wide; - int (*is_space)(); - int l; - int e; - int c; - int u; - int w; -} rctx_t; - -int __scanf(rctx_t *, const wchar_t *, va_list); diff --git a/src/stdio/vfwscanf.c b/src/stdio/vfwscanf.c index 491c1403..4426a129 100644 --- a/src/stdio/vfwscanf.c +++ b/src/stdio/vfwscanf.c @@ -1,28 +1,299 @@ #include -#include -#include +#include +#include +#include #include #include +#include +#include +#include +#include +#include #include "stdio_impl.h" -#include "__scanf.h" +#include "shgetc.h" +#include "intscan.h" +#include "floatscan.h" + +#define SIZE_hh -2 +#define SIZE_h -1 +#define SIZE_def 0 +#define SIZE_l 1 +#define SIZE_L 2 +#define SIZE_ll 3 + +static void store_int(void *dest, int size, unsigned long long i) +{ + if (!dest) return; + switch (size) { + case SIZE_hh: + *(char *)dest = i; + break; + case SIZE_h: + *(short *)dest = i; + break; + case SIZE_def: + *(int *)dest = i; + break; + case SIZE_l: + *(long *)dest = i; + break; + case SIZE_ll: + *(long long *)dest = i; + break; + } +} + +static void *arg_n(va_list ap, unsigned int n) +{ + void *p; + unsigned int i; + va_list ap2; + va_copy(ap2, ap); + for (i=n; i>1; i--) va_arg(ap2, void *); + p = va_arg(ap2, void *); + va_end(ap2); + return p; +} -static void f_read(rctx_t *r) +static int in_set(const wchar_t *set, int c) { - FILE *f = r->opaque; - if ((r->c = fgetwc(f)) >= 0) r->l++; + int j; + const wchar_t *p = set; + if (*p == '-') { + if (c=='-') return 1; + p++; + } else if (*p == ']') { + if (c==']') return 1; + p++; + } + for (; *p && *p != ']'; p++) { + if (*p=='-' && p[1] && p[1] != ']') + for (j=p++[-1]; j<*p; j++) + if (c==j) return 1; + if (c==*p) return 1; + } + return 0; } +#if 1 +#undef getwc +#define getwc(f) \ + ((f)->rpos < (f)->rend && *(f)->rpos < 128 ? *(f)->rpos++ : (getwc)(f)) + +#undef ungetwc +#define ungetwc(c,f) \ + ((f)->rend && (c)<128 ? *--(f)->rpos : ungetwc((c),(f))) +#endif + int vfwscanf(FILE *f, const wchar_t *fmt, va_list ap) { - rctx_t r = { f_read, (void *)f, 1, iswspace }; - int result; + int width; + int size; + int alloc; + const wchar_t *p; + int c, t; + char *s; + wchar_t *wcs; + void *dest=NULL; + int invert; + int matches=0; + off_t pos = 0, cnt; + static const char size_pfx[][3] = { "hh", "h", "", "l", "L", "ll" }; + char tmp[3*sizeof(int)+10]; - result = __scanf(&r, fmt, ap); + FLOCK(f); - if (r.u && r.c >= 0) { - ungetwc(r.c, f); - } + for (p=fmt; *p; p++) { + + if (iswspace(*p)) { + while (iswspace(p[1])) p++; + while (iswspace((c=getwc(f)))) pos++; + ungetwc(c, f); + continue; + } + if (*p != '%' || p[1] == '%') { + p += *p=='%'; + c = getwc(f); + if (c!=*p) { + ungetwc(c, f); + if (c<0) goto input_fail; + goto match_fail; + } + pos++; + continue; + } + + p++; + if (*p=='*') { + dest = 0; p++; + } else if (iswdigit(*p) && p[1]=='$') { + dest = arg_n(ap, *p-'0'); p+=2; + } else { + dest = va_arg(ap, void *); + } + + for (width=0; iswdigit(*p); p++) { + width = 10*width + *p - '0'; + } - return result; + if (*p=='m') { + alloc = 1; + p++; + } else { + alloc = 0; + } + + size = SIZE_def; + switch (*p++) { + case 'h': + if (*p == 'h') p++, size = SIZE_hh; + else size = SIZE_h; + break; + case 'l': + if (*p == 'l') p++, size = SIZE_ll; + else size = SIZE_l; + break; + case 'j': + size = SIZE_ll; + break; + case 'z': + case 't': + size = SIZE_l; + break; + case 'L': + size = SIZE_L; + break; + case 'd': case 'i': case 'o': case 'u': case 'x': + case 'a': case 'e': case 'f': case 'g': + case 'A': case 'E': case 'F': case 'G': case 'X': + case 's': case 'c': case '[': + case 'S': case 'C': + case 'p': case 'n': + p--; + break; + default: + goto fmt_fail; + } + + t = *p; + + /* Transform ls,lc -> S,C */ + if (size==SIZE_l && (t&15)==3) t&=~32; + + if (t != 'n' && t != '[' && (t|32) != 'c') { + while (iswspace((c=getwc(f)))) pos++; + if (c < 0) goto input_fail; + ungetwc(c, f); + } + + switch (t) { + case 'n': + store_int(dest, size, pos); + /* do not increment match count, etc! */ + continue; + + case 'c': + if (width < 1) width = 1; + s = dest; + for (; width && (c=getwc(f)) >= 0; width--) { + int l = wctomb(s?s:tmp, c); + if (l<0) goto input_fail; + if (s) s+=l; + pos++; + } + if (width) goto match_fail; + break; + + case 'C': + if (width < 1) width = 1; + wcs = dest; + for (; width && (c=getwc(f)) >= 0; width--) + pos++, wcs && (*wcs++ = c); + if (width) goto match_fail; + break; + + case 's': + s = dest; + while (!iswspace(c=getwc(f)) && c!=EOF) { + int l = wctomb(s?s:tmp, c); + if (l<0) goto input_fail; + if (s) s+=l; + pos++; + } + if (s) *s = 0; + break; + + case 'S': + wcs = dest; + while (!iswspace(c=getwc(f)) && c!=EOF) + pos++, *wcs++ = c; + if (wcs) *wcs = 0; + break; + + case '[': + s = (size == SIZE_def) ? dest : 0; + wcs = (size == SIZE_l) ? dest : 0; + + if (*++p == '^') p++, invert = 1; + else invert = 0; + + int gotmatch = 0; + + for (;;) { + if ((c=getwc(f))<0) break; + if (in_set(p, c) == invert) + break; + if (wcs) { + *wcs++ = c; + } else if (size != SIZE_l) { + int l = wctomb(s?s:tmp, c); + if (l<0) goto input_fail; + if (s) s+=l; + } + pos++; + gotmatch=1; + } + ungetwc(c, f); + + if (!gotmatch) goto match_fail; + + if (*p==']') p++; + while (*p!=']') { + if (!*p) goto fmt_fail; + p++; + } + + if (wcs) *wcs++ = 0; + if (s) *s++ = 0; + break; + + case 'd': case 'i': case 'o': case 'u': case 'x': + case 'a': case 'e': case 'f': case 'g': + case 'A': case 'E': case 'F': case 'G': case 'X': + case 'p': + if (width < 1) width = 0; + snprintf(tmp, sizeof tmp, "%.*s%.0d%s%c%%lln", + 1+!dest, "%*", width, size_pfx[size+2], t); + cnt = 0; + if (fscanf(f, tmp, dest?dest:&cnt, &cnt) == -1) + goto input_fail; + else if (!cnt) + goto match_fail; + pos += cnt; + break; + default: + goto fmt_fail; + } + + if (dest) matches++; + } + if (0) { +fmt_fail: +input_fail: + if (!matches) matches--; + } +match_fail: + FUNLOCK(f); + return matches; } diff --git a/src/stdio/vswscanf.c b/src/stdio/vswscanf.c index 2c4ffbe0..4396d7df 100644 --- a/src/stdio/vswscanf.c +++ b/src/stdio/vswscanf.c @@ -1,19 +1,35 @@ -#include -#include -#include -#include +#include "stdio_impl.h" -#include "__scanf.h" - -static void s_read(rctx_t *r) +static size_t wstring_read(FILE *f, unsigned char *buf, size_t len) { - wchar_t *s = r->opaque; - if (!s[r->l]) r->c = -1; - else r->c = s[r->l++]; + const wchar_t *src = f->cookie; + size_t k; + + if (!src) return 0; + + k = wcsrtombs((void *)f->buf, &src, f->buf_size, 0); + if (k==(size_t)-1) { + f->rpos = f->rend = 0; + return 0; + } + + f->rpos = f->buf; + f->rend = f->buf + k; + f->cookie = (void *)src; + + if (!len) return 0; + + *buf = *f->rpos++; + return 1; } int vswscanf(const wchar_t *s, const wchar_t *fmt, va_list ap) { - rctx_t r = { s_read, (void *)s, 1, iswspace }; - return __scanf(&r, fmt, ap); + unsigned char buf[256]; + FILE f = { + .buf = buf, .buf_size = sizeof buf, + .cookie = (void *)s, + .read = wstring_read, .lock = -1 + }; + return vfwscanf(&f, fmt, ap); } -- 2.20.1