initial check-in, version 0.5.0
[musl] / src / stdio / __scanf.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <stdarg.h>
4 #include <ctype.h>
5 #include <wchar.h>
6 #include <wctype.h>
7 #include <limits.h>
8 #include <string.h>
9 #include <errno.h>
10 #include <math.h>
11 #include <float.h>
12
13 #include "__scanf.h"
14
15 static int read(rctx_t *r)
16 {
17         if (--r->w < 0) return r->w = -1;
18         if (r->u) r->u = 0;
19         else r->read(r);
20         return r->c;
21 }
22
23 static void unread(rctx_t *r)
24 {
25         //if (r->u || r->w < 0) return;
26         if (r->w < 0) return;
27         r->w++;
28         r->u = 1;
29 }
30
31 #define SIZE_hh -2
32 #define SIZE_h  -1
33 #define SIZE_def 0
34 #define SIZE_l   1
35 #define SIZE_ll  2
36 #define SIZE_L   3
37
38 static void store_int(void *dest, int size, int neg, unsigned long long i)
39 {
40         if (!dest) return;
41         if (neg) i = -i;
42         switch (size) {
43         case SIZE_hh:
44                 *(char *)dest = i;
45                 break;
46         case SIZE_h:
47                 *(short *)dest = i;
48                 break;
49         case SIZE_def:
50                 *(int *)dest = i;
51                 break;
52         case SIZE_l:
53                 *(long *)dest = i;
54                 break;
55         case SIZE_ll:
56                 *(long long *)dest = i;
57                 break;
58         }
59 }
60
61 static void *arg_n(va_list ap, unsigned int n)
62 {
63         void *p;
64         unsigned int i;
65         va_list ap2;
66         va_copy(ap2, ap);
67         for (i=n; i>1; i--) va_arg(ap2, void *);
68         p = va_arg(ap2, void *);
69         va_end(ap2);
70         return p;
71 }
72
73 int __scanf(rctx_t *r, const wchar_t *fmt, va_list ap)
74 {
75         int mode=0;
76         int width;
77         int size;
78         const wchar_t *p, *z;
79         int c, l, t, m;
80         long long dummy;
81         char *s;
82         wchar_t *wcs;
83         mbstate_t st;
84         int wide = r->wide;
85         void *dest=NULL;
86         int invert;
87         unsigned long long i=0;
88         int neg=0;
89         int matches=0;
90         long double f;
91         int (*is_space)(int) = r->is_space;
92
93         for (p=fmt; *p; ) {
94                 if (is_space(*p)) {
95                         do p++; while (is_space(*p));
96                         do r->w=1; while (is_space(read(r)));
97                         unread(r);
98                         continue;
99                 } else if (*p != '%' || p[1] == '%') {
100                         if (*p == '%') p++;
101                         r->w = 1;
102                         if (*p++ != read(r))
103                                 goto match_fail;
104                         continue;
105                 }
106                 p++;
107                 if (mode != 1) {
108                         for (z=p; isdigit(*z); z++);
109                         if (*z != '$' && *z != '*') {
110                                 if (mode == 0) mode = 1;
111                                 else goto fmt_fail;
112                         } else if (*z != '*') {
113                                 int pos = 0;
114                                 mode = 2;
115                                 for (; p<z; p++) {
116                                         pos = 10*pos + *p - '0';
117                                 }
118                                 p++;
119                                 if (!pos) goto fmt_fail;
120                                 dest = arg_n(ap, pos);
121                         }
122                 }
123                 if (*p == '*') {
124                         dest = NULL;
125                         p++;
126                 } else if (mode == 1) {
127                         dest = va_arg(ap, void *);
128                 }
129                 
130                 if (!*p) goto fmt_fail;
131
132                 width = 0;
133                 for (; isdigit(*p); p++) {
134                         width = 10*width + *p - '0';
135                 }
136
137                 size = 0;
138                 switch (*p++) {
139                 case 0:
140                         goto fmt_fail;
141                 case 'h':
142                         if (*p == 'h') p++, size = SIZE_hh;
143                         else size = SIZE_h;
144                         break;
145                 case 'l':
146                         if (*p == 'l') p++, size = SIZE_ll;
147                         else size = SIZE_l;
148                         break;
149                 case 'j':
150                         size = SIZE_ll;
151                         break;
152                 case 'z':
153                 case 't':
154                         size = SIZE_l;
155                         break;
156                 case 'L':
157                         size = SIZE_L;
158                         break;
159                 case 'd': case 'i': case 'o': case 'u': case 'x':
160                 case 'a': case 'e': case 'f': case 'g':
161                 case 'A': case 'E': case 'F': case 'G': case 'X':
162                 case 's': case 'c': case '[':
163                 case 'S': case 'C':
164                 case 'p': case 'n':
165                         p--;
166                         break;
167                 default:
168                         goto fmt_fail;
169                 }
170
171                 t = *p++;
172
173                 switch (t) {
174                 case 'C':
175                 case 'c':
176                         if (width < 1) width = 1;
177                 case 's':
178                         if (size == SIZE_l) t &= ~0x20;
179                 case 'd': case 'i': case 'o': case 'u': case 'x':
180                 case 'a': case 'e': case 'f': case 'g':
181                 case 'A': case 'E': case 'F': case 'G': case 'X':
182                 case '[': case 'S':
183                 case 'p': case 'n':
184                         if (width < 1) width = INT_MAX;
185                         break;
186                 default:
187                         goto fmt_fail;
188                 }
189
190                 r->w = width;
191
192                 if (t != 'n') {
193                         if (read(r) < 0) goto input_fail;
194                         unread(r);
195                 }
196
197                 switch (t) {
198                 case 'n':
199                         store_int(dest, size, 0, r->l - r->u);
200                         /* do not increment match count, etc! */
201                         continue;
202                 case 'C':
203                         wcs = dest ? dest : (void *)&dummy;
204                         st = (mbstate_t){ 0 };
205                         while ((c=read(r)) >= 0) {
206                                 if (wide) {
207                                         if (dest) *wcs++ = c;
208                                 } else {
209                                         char ch = c;
210                                         switch (mbrtowc(wcs, &ch, 1, &st)) {
211                                         case -1:
212                                                 goto enc_fail;
213                                         case -2:
214                                                 break;
215                                         default:
216                                                 if (dest) wcs++;
217                                         }
218                                 }
219                         }
220                         if (r->w > 0) goto match_fail;
221                         break;
222                 case 'c':
223                         s = dest ? dest : (void *)&dummy;
224                         while ((c=read(r)) >= 0) {
225                                 if (wide) {
226                                         if ((l=wctomb(s, c)) < 0)
227                                                 goto enc_fail;
228                                         if (dest) s += l;
229                                 } else {
230                                         if (dest) *s++ = c;
231                                 }
232                         }
233                         if (r->w > 0) goto match_fail;
234                         break;
235                 case '[':
236                         wcs = dest ? dest : (void *)&dummy;
237                         s = dest ? dest : (void *)&dummy;
238                         if (!wide && size == SIZE_l) st = (mbstate_t){ 0 };
239
240                         if (*p == '^') p++, invert = 1;
241                         else invert = 0;
242
243                         if (wide) {
244                                 for (m=0; (c=read(r)) >= 0; m=1) {
245                                         for (z=p; *z && *z != c && (*z != ']' || z==p); z++);
246                                         if (!*z) goto fmt_fail;
247                                         if (*z == c && (*z != ']' || z==p)) {
248                                                 if (invert) break;
249                                         } else {
250                                                 if (!invert) break;
251                                         }
252                                         if (size == SIZE_l) {
253                                                 if (dest) *wcs++ = c;
254                                         } else {
255                                                 if ((l=wctomb(s, c)) < 0)
256                                                         goto enc_fail;
257                                                 if (dest) s += l;
258                                         }
259                                 }
260                                 for (p++; *p && *p != ']'; p++);
261                                 p++;
262                         } else {
263                                 unsigned char scanset[257];
264                                 memset(scanset, invert, sizeof scanset);
265                                 scanset[0] = 0;
266                                 for (z=p; *z && (*z != ']' || z==p); z++)
267                                         scanset[1+*z] = 1-invert;
268                                 if (!*z) goto fmt_fail;
269                                 p=z+1;
270                                 c=0;
271                                 for (m=0; scanset[(c=read(r))+1]; m=1) {
272                                         if (size == SIZE_l) {
273                                                 char ch = c;
274                                                 switch (mbrtowc(wcs, &ch, 1, &st)) {
275                                                 case -1:
276                                                         goto enc_fail;
277                                                 case -2:
278                                                         break;
279                                                 default:
280                                                         if (dest) wcs++;
281                                                 }
282                                         } else {
283                                                 if (dest) *s++ = c;
284                                         }
285                                 }
286                         }
287                         if (!m) goto match_fail;
288                         if (dest) {
289                                 if (size == SIZE_l) *wcs++ = 0;
290                                 else *s++ = 0;
291                         }
292                         break;
293                 default:
294                         /* read unlimited number of spaces, then reset width */
295                         do r->w = 1; while (is_space(c = read(r)));
296                         if (c < 0) goto input_fail;
297                         unread(r);
298                         r->w = width;
299                 }
300
301                 switch (t) {
302                 case 'p':
303                 case 'X':
304                         t = 'x';
305                 case 'd':
306                 case 'i':
307                 case 'o':
308                 case 'u':
309                 case 'x':
310                         i = m = neg = 0;
311                         if ((c=read(r)) == '-') neg=1;
312                         else if (c != '+') unread(r);
313                         switch (t) {
314                         case 'i':
315                         case 'x':
316                                 if ((c=read(r)) != '0') {
317                                         if (t == 'i') t = 'd';
318                                         unread(r);
319                                         break;
320                                 }
321                                 if (((c=read(r))|0x20) != 'x') {
322                                         if (t == 'i') {
323                                                 t = 'o';
324                                                 /* lone 0 is valid octal */
325                                                 if ((unsigned)(c-'0') >= 8) {
326                                                         m = 1;
327                                                         goto int_finish;
328                                                 }
329                                         }
330                                         unread(r);
331                                         break;
332                                 }
333                                 t = 'x';
334                         }
335                 }
336                 
337                 switch (t) {
338                 case 'd':
339                 case 'u':
340                         for (m=0; isdigit(c=read(r)); m=1)
341                                 i = 10*i + c-'0';
342                         goto int_finish;
343                 case 'o':
344                         for (m=0; (unsigned)(c=read(r))-'0' < 8; m=1)
345                                 i = (i<<3) + c-'0';
346                         goto int_finish;
347                 case 'x':
348                         for (m=0; ; m=1) {
349                                 if (isdigit(c=read(r))) {
350                                         i = (i<<4) + c-'0';
351                                 } else if ((unsigned)(c|0x20)-'a' < 6) {
352                                         i = (i<<4) + (c|0x20)-'a'+10;
353                                 } else break;
354                         }
355                 int_finish:
356                         if (!m) goto match_fail;
357                         store_int(dest, size, neg, i);
358                         break;
359                 case 'a':
360                 case 'e':
361                 case 'f':
362                 case 'g':
363                         f = 0.0;
364                         neg = m = 0;
365                         if ((c=read(r)) == '-') neg=1;
366                         else if (c != '+') unread(r);
367                         /* FIXME: check for INF/NAN strings here */
368                         if (read(r)=='0' && (m=1, (read(r)|0x20) == 'x'))
369                                 goto hexfloat;
370                         else unread(r);
371                         for (; isdigit(c=read(r)); m=1)
372                                 f = 10.0 * f + (c-'0');
373                         if (c=='.') {
374                                 double mag = 10.0;
375                                 for (; isdigit(c=read(r)); mag*=10.0)
376                                         f += (c-'0')/mag;
377                         }
378                         if ((c|0x20)=='e') {
379                                 int ex=0, en=0;
380                                 m = 0;
381                                 if ((c=read(r))=='-') en=1;
382                                 else if (c!='+') unread(r);
383                                 for (; isdigit(c=read(r)); m=1)
384                                         if (ex < LDBL_MAX_10_EXP)
385                                                 ex = 10 * ex + (c-'0');
386                                 if (ex > LDBL_MAX_10_EXP)
387                                         f = en ? 0 : INFINITY;
388                                 else {
389                                         if (en) while (ex--) f/=10.0;
390                                         else while (ex--) f*=10.0;
391                                 }
392                         }
393                         goto writefloat;
394 hexfloat:
395                         m = 0;
396                         for (; isxdigit(c=read(r)); m=1)
397                                 if (isdigit(c)) f = 16.0*f + (c-'0');
398                                 else f = 16.0*f + ((c|32)-'a'+10);
399                         if (c=='.') {
400                                 double mag = 1/16.0;
401                                 for (; isxdigit(c=read(r)); mag*=1/16.0)
402                                         if (isdigit(c)) f += (c-'0')*mag;
403                                         else f += ((c|32)-'a'+10)*mag;
404                         }
405                         if ((c|0x20)=='p') {
406                                 int ex=0, en=0;
407                                 m = 0;
408                                 if ((c=read(r))=='-') en=1;
409                                 else if (c!='+') unread(r);
410                                 for (; isdigit(c=read(r)); m=1)
411                                         if (ex < LDBL_MAX_EXP)
412                                                 ex = 10 * ex + (c-'0');
413                                 if (ex > LDBL_MAX_EXP)
414                                         f = en ? 0 : INFINITY;
415                                 else {
416                                         if (en) while (ex--) f*=0.5;
417                                         else while (ex--) f*=2.0;
418                                 }
419                         }
420 writefloat:
421                         if (!m) goto match_fail;
422                         if (neg) f *= -1.0;
423                         if (dest) switch (size) {
424                         case SIZE_def:
425                                 *(float *)dest = f;
426                                 break;
427                         case SIZE_l:
428                                 *(double *)dest = f;
429                                 break;
430                         case SIZE_L:
431                                 *(long double *)dest = f;
432                                 break;
433                         }
434                         break;
435                 case 'S':
436                         wcs = dest ? dest : (void *)&dummy;
437                         st = (mbstate_t){ 0 };
438                         while((c=read(r)) >= 0) {
439                                 if (wide) {
440                                         if (is_space(c)) break;
441                                         if (dest) *wcs++ = c;
442                                 } else {
443                                         char ch = c;
444                                         if (is_space(c)) break;
445                                         switch (mbrtowc(wcs, &ch, 1, &st)) {
446                                         case -1:
447                                                 goto enc_fail;
448                                         case -2:
449                                                 break;
450                                         default:
451                                                 if (dest) wcs++;
452                                         }
453                                 }
454                         }
455                         if (dest) *wcs++ = 0;
456                         break;
457                 case 's':
458                         s = dest ? dest : (void *)&dummy;
459                         while((c=read(r)) >= 0) {
460                                 if (wide) {
461                                         if (is_space(c)) break;
462                                         if ((l=wctomb(s, c)) < 0)
463                                                 goto enc_fail;
464                                         if (dest) s += l;
465                                 } else {
466                                         if (is_space(c)) break;
467                                         if (dest) *s++ = c;
468                                 }
469                         }
470                         if (dest) *s++ = 0;
471                         break;
472                 }
473
474                 /* unread will do nothing if field width was exhausted */
475                 unread(r);
476                 if (dest) matches++;
477         }
478         return matches;
479 enc_fail:
480         errno = EILSEQ;
481 fmt_fail:
482 input_fail:
483         if (!matches) matches--;
484 match_fail:
485         unread(r);
486         return matches;
487 }