- 2009 patch
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2009 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <ctype.h>
21 #include <wctype.h>
22
23 #include "adt/util.h"
24 #include "format_check.h"
25 #include "symbol_t.h"
26 #include "ast_t.h"
27 #include "entity_t.h"
28 #include "diagnostic.h"
29 #include "types.h"
30 #include "type_t.h"
31 #include "warning.h"
32 #include "lang_features.h"
33
34 typedef enum format_flag_t {
35         FMT_FLAG_NONE  = 0,
36         FMT_FLAG_HASH  = 1U << 0,
37         FMT_FLAG_ZERO  = 1U << 1,
38         FMT_FLAG_MINUS = 1U << 2,
39         FMT_FLAG_SPACE = 1U << 3,
40         FMT_FLAG_PLUS  = 1U << 4,
41         FMT_FLAG_TICK  = 1U << 5
42 } format_flag_t;
43
44 typedef unsigned format_flags_t;
45
46 typedef enum format_length_modifier_t {
47         FMT_MOD_NONE,
48         FMT_MOD_L,
49         FMT_MOD_hh,
50         FMT_MOD_h,
51         FMT_MOD_l,
52         FMT_MOD_ll,
53         FMT_MOD_j,
54         FMT_MOD_t,
55         FMT_MOD_z,
56         FMT_MOD_q,
57         /* only in microsoft mode */
58         FMT_MOD_w,
59         FMT_MOD_I,
60         FMT_MOD_I32,
61         FMT_MOD_I64
62 } format_length_modifier_t;
63
64 typedef struct format_spec_t {
65         const char    *name;     /**< name of the function */
66         format_kind_t  fmt_kind; /**< kind */
67         unsigned       fmt_idx;  /**< index of the format string */
68         unsigned       arg_idx;  /**< index of the first argument */
69 } format_spec_t;
70
71 static const char* get_length_modifier_name(const format_length_modifier_t mod)
72 {
73         static const char* const names[] = {
74                 [FMT_MOD_NONE] = "",
75                 [FMT_MOD_L]    = "L",
76                 [FMT_MOD_hh]   = "hh",
77                 [FMT_MOD_h]    = "h",
78                 [FMT_MOD_l]    = "l",
79                 [FMT_MOD_ll]   = "ll",
80                 [FMT_MOD_j]    = "j",
81                 [FMT_MOD_t]    = "t",
82                 [FMT_MOD_z]    = "z",
83                 [FMT_MOD_q]    = "q",
84                 /* only in microsoft mode */
85                 [FMT_MOD_w]    = "w",
86                 [FMT_MOD_I]    = "I",
87                 [FMT_MOD_I32]  = "I32",
88                 [FMT_MOD_I64]  = "I64"
89         };
90         assert(mod < lengthof(names));
91         return names[mod];
92 }
93
94 static void warn_invalid_length_modifier(const source_position_t *pos,
95                                          const format_length_modifier_t mod,
96                                          const wchar_rep_t conversion)
97 {
98         warningf(pos,
99                 "invalid length modifier '%s' for conversion specifier '%%%c'",
100                 get_length_modifier_name(mod), conversion
101         );
102 }
103
104 typedef struct vchar_t vchar_t;
105 struct vchar_t {
106         const void *string;   /**< the string */
107         size_t     position;  /**< current position */
108         size_t     size;      /**< size of the string */
109
110         /** return the first character of the string and setthe position to 0. */
111         unsigned (*first)(vchar_t *self);
112         /** return the next character of the string */
113         unsigned (*next)(vchar_t *self);
114         /** return non_zero if the given character is a digit */
115         int (*is_digit)(unsigned vchar);
116 };
117
118 static unsigned string_first(vchar_t *self)
119 {
120         self->position = 0;
121         const string_t *string = self->string;
122         return string->begin[0];
123 }
124
125 static unsigned string_next(vchar_t *self)
126 {
127         ++self->position;
128         const string_t *string = self->string;
129         return string->begin[self->position];
130 }
131
132 static int string_isdigit(unsigned vchar)
133 {
134         return isdigit(vchar);
135 }
136
137 static unsigned wstring_first(vchar_t *self)
138 {
139         self->position = 0;
140         const wide_string_t *wstring = self->string;
141         return wstring->begin[0];
142 }
143
144 static unsigned wstring_next(vchar_t *self)
145 {
146         ++self->position;
147         const wide_string_t *wstring = self->string;
148         return wstring->begin[self->position];
149 }
150
151 static int wstring_isdigit(unsigned vchar)
152 {
153         return iswdigit(vchar);
154 }
155
156 static bool atend(vchar_t *self)
157 {
158         return self->position + 1 == self->size;
159 }
160
161 /**
162  * Check printf-style format.
163  */
164 static void check_printf_format(const call_argument_t *arg, const format_spec_t *spec)
165 {
166         /* find format arg */
167         unsigned idx = 0;
168         for (; idx < spec->fmt_idx; ++idx) {
169                 if (arg == NULL)
170                         return;
171                 arg = arg->next;
172         }
173
174         const expression_t *fmt_expr = arg->expression;
175         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
176                 fmt_expr = fmt_expr->unary.value;
177         }
178
179         vchar_t vchar;
180         if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
181                 vchar.string   = &fmt_expr->wide_string.value;
182                 vchar.size     = fmt_expr->wide_string.value.size;
183                 vchar.first    = wstring_first;
184                 vchar.next     = wstring_next;
185                 vchar.is_digit = wstring_isdigit;
186         } else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
187                 vchar.string   = &fmt_expr->string.value;
188                 vchar.size     = fmt_expr->string.value.size;
189                 vchar.first    = string_first;
190                 vchar.next     = string_next;
191                 vchar.is_digit = string_isdigit;
192         } else {
193                 return;
194         }
195         /* find the real args */
196         for(; idx < spec->arg_idx && arg != NULL; ++idx)
197                 arg = arg->next;
198
199         const source_position_t *pos = &fmt_expr->base.source_position;
200         unsigned fmt     = vchar.first(&vchar);
201         unsigned num_fmt = 0;
202         for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
203                 if (fmt != '%')
204                         continue;
205                 fmt = vchar.next(&vchar);
206
207                 if (fmt == '%')
208                         continue;
209
210                 ++num_fmt;
211
212                 format_flags_t fmt_flags = FMT_FLAG_NONE;
213                 if (fmt == '0') {
214                         fmt = vchar.next(&vchar);
215                         fmt_flags |= FMT_FLAG_ZERO;
216                 }
217
218                 /* argument selector or minimum field width */
219                 if (vchar.is_digit(fmt)) {
220                         do {
221                                 fmt = vchar.next(&vchar);
222                         } while (vchar.is_digit(fmt));
223
224                         /* digit string was ... */
225                         if (fmt == '$') {
226                                 /* ... argument selector */
227                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
228                                 /* TODO implement */
229                                 return;
230                         }
231                         /* ... minimum field width */
232                 } else {
233                         /* flags */
234                         for (;;) {
235                                 format_flags_t flag;
236                                 switch (fmt) {
237                                         case '#':  flag = FMT_FLAG_HASH;  break;
238                                         case '0':  flag = FMT_FLAG_ZERO;  break;
239                                         case '-':  flag = FMT_FLAG_MINUS; break;
240                                         case '\'': flag = FMT_FLAG_TICK;  break;
241
242                                         case ' ':
243                                                 if (fmt_flags & FMT_FLAG_PLUS) {
244                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
245                                                 }
246                                                 flag = FMT_FLAG_SPACE;
247                                                 break;
248
249                                         case '+':
250                                                 if (fmt_flags & FMT_FLAG_SPACE) {
251                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
252                                                 }
253                                                 flag = FMT_FLAG_PLUS;
254                                                 break;
255
256                                         default: goto break_fmt_flags;
257                                 }
258                                 if (fmt_flags & flag) {
259                                         warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
260                                 }
261                                 fmt_flags |= flag;
262                                 fmt = vchar.next(&vchar);
263                         }
264 break_fmt_flags:
265
266                         /* minimum field width */
267                         if (fmt == '*') {
268                                 fmt = vchar.next(&vchar);
269                                 if (arg == NULL) {
270                                         warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
271                                         return;
272                                 }
273                                 const type_t *const arg_type = arg->expression->base.type;
274                                 if (arg_type != type_int) {
275                                         warningf(pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
276                                 }
277                                 arg = arg->next;
278                         } else {
279                                 while (vchar.is_digit(fmt)) {
280                                         fmt = vchar.next(&vchar);
281                                 }
282                         }
283                 }
284
285                 /* precision */
286                 if (fmt == '.') {
287                         fmt = vchar.next(&vchar);
288                         if (fmt == '*') {
289                                 fmt = vchar.next(&vchar);
290                                 if (arg == NULL) {
291                                         warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
292                                         return;
293                                 }
294                                 const type_t *const arg_type = arg->expression->base.type;
295                                 if (arg_type != type_int) {
296                                         warningf(pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
297                                 }
298                                 arg = arg->next;
299                         } else {
300                                 /* digit string may be omitted */
301                                 while (vchar.is_digit(fmt)) {
302                                         fmt = vchar.next(&vchar);
303                                 }
304                         }
305                 }
306
307                 /* length modifier */
308                 format_length_modifier_t fmt_mod;
309                 switch (fmt) {
310                         case 'h':
311                                 fmt = vchar.next(&vchar);
312                                 if (fmt == 'h') {
313                                         fmt = vchar.next(&vchar);
314                                         fmt_mod = FMT_MOD_hh;
315                                 } else {
316                                         fmt_mod = FMT_MOD_h;
317                                 }
318                                 break;
319
320                         case 'l':
321                                 fmt = vchar.next(&vchar);
322                                 if (fmt == 'l') {
323                                         fmt = vchar.next(&vchar);
324                                         fmt_mod = FMT_MOD_ll;
325                                 } else {
326                                         fmt_mod = FMT_MOD_l;
327                                 }
328                                 break;
329
330                         case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
331                         case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
332                         case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
333                         case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
334                         case 'q': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_q;    break;
335                         /* microsoft mode */
336                         case 'w':
337                                 if (c_mode & _MS) {
338                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
339                                 } else {
340                                         fmt_mod = FMT_MOD_NONE;
341                                 }
342                                 break;
343                         case 'I':
344                                 if (c_mode & _MS) {
345                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
346                                         if (fmt == '3') {
347                                                 fmt = vchar.next(&vchar);
348                                                 if (fmt == '2') {
349                                                         fmt = vchar.next(&vchar);
350                                                         fmt_mod = FMT_MOD_I32;
351                                                 } else {
352                                                         /* rewind */
353                                                         --vchar.position;
354                                                 }
355                                         } else if (fmt == '6') {
356                                                 fmt = vchar.next(&vchar);
357                                                 if (fmt == '4') {
358                                                         fmt = vchar.next(&vchar);
359                                                         fmt_mod = FMT_MOD_I64;
360                                                 } else {
361                                                         /* rewind */
362                                                         --vchar.position;
363                                                 }
364                                         }
365                                 } else {
366                                         fmt_mod = FMT_MOD_NONE;
367                                 }
368                                 break;
369                         default:
370                                 fmt_mod = FMT_MOD_NONE;
371                                 break;
372                 }
373
374                 if (fmt == '\0') {
375                         warningf(pos, "dangling %% in format string");
376                         break;
377                 }
378
379                 type_t            *expected_type;
380                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
381                 format_flags_t     allowed_flags;
382                 switch (fmt) {
383                         case 'd':
384                         case 'i':
385                                 switch (fmt_mod) {
386                                         case FMT_MOD_NONE: expected_type = type_int;       break;
387                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
388                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
389                                         case FMT_MOD_l:    expected_type = type_long;      break;
390                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
391                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
392                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
393                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
394                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t; break;
395                                         case FMT_MOD_I32:  expected_type = type_int32;     break;
396                                         case FMT_MOD_I64:  expected_type = type_int64;     break;
397
398                                         default:
399                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
400                                                 goto next_arg;
401                                 }
402                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
403                                 break;
404
405                         case 'o':
406                         case 'X':
407                         case 'x':
408                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
409                                 goto eval_fmt_mod_unsigned;
410
411                         case 'u':
412                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
413 eval_fmt_mod_unsigned:
414                                 switch (fmt_mod) {
415                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
416                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
417                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
418                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
419                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
420                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
421                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
422                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
423                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
424                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
425                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
426
427                                         default:
428                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
429                                                 goto next_arg;
430                                 }
431                                 break;
432
433                         case 'A':
434                         case 'a':
435                         case 'E':
436                         case 'e':
437                         case 'F':
438                         case 'f':
439                         case 'G':
440                         case 'g':
441                                 switch (fmt_mod) {
442                                         case FMT_MOD_l:    /* l modifier is ignored */
443                                         case FMT_MOD_NONE: expected_type = type_double;      break;
444                                         case FMT_MOD_L:    expected_type = type_long_double; break;
445
446                                         default:
447                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
448                                                 goto next_arg;
449                                 }
450                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
451                                 break;
452
453                         case 'C':
454                                 if (fmt_mod != FMT_MOD_NONE) {
455                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
456                                         goto next_arg;
457                                 }
458                                 expected_type = type_wchar_t;
459                                 allowed_flags = FMT_FLAG_NONE;
460                                 break;
461
462                         case 'c':
463                                 expected_type = type_int;
464                                 switch (fmt_mod) {
465                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
466                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
467                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
468
469                                         default:
470                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
471                                                 goto next_arg;
472                                 }
473                                 allowed_flags = FMT_FLAG_NONE;
474                                 break;
475
476                         case 'S':
477                                 if (fmt_mod != FMT_MOD_NONE) {
478                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
479                                         goto next_arg;
480                                 }
481                                 expected_type = type_wchar_t_ptr;
482                                 expected_qual = TYPE_QUALIFIER_CONST;
483                                 allowed_flags = FMT_FLAG_MINUS;
484                                 break;
485
486                         case 's':
487                                 switch (fmt_mod) {
488                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
489                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
490                                         case FMT_MOD_w:    expected_type = type_wchar_t_ptr; break;
491
492                                         default:
493                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
494                                                 goto next_arg;
495                                 }
496                                 expected_qual = TYPE_QUALIFIER_CONST;
497                                 allowed_flags = FMT_FLAG_MINUS;
498                                 break;
499
500                         case 'p':
501                                 if (fmt_mod != FMT_MOD_NONE) {
502                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
503                                         goto next_arg;
504                                 }
505                                 expected_type = type_void_ptr;
506                                 allowed_flags = FMT_FLAG_NONE;
507                                 break;
508
509                         case 'n':
510                                 switch (fmt_mod) {
511                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
512                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
513                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
514                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
515                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
516                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
517                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
518                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
519
520                                         default:
521                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
522                                                 goto next_arg;
523                                 }
524                                 allowed_flags = FMT_FLAG_NONE;
525                                 break;
526
527                         default:
528                                 warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
529                                 if (arg == NULL) {
530                                         warningf(pos, "too few arguments for format string");
531                                         return;
532                                 }
533                                 goto next_arg;
534                 }
535
536                 format_flags_t wrong_flags = fmt_flags & ~allowed_flags;
537                 if (wrong_flags != 0) {
538                         char  wrong[8];
539                         char *p = wrong;
540                         if (wrong_flags & FMT_FLAG_HASH)  *p++ = '#';
541                         if (wrong_flags & FMT_FLAG_ZERO)  *p++ = '0';
542                         if (wrong_flags & FMT_FLAG_MINUS) *p++ = '-';
543                         if (wrong_flags & FMT_FLAG_SPACE) *p++ = ' ';
544                         if (wrong_flags & FMT_FLAG_PLUS)  *p++ = '+';
545                         if (wrong_flags & FMT_FLAG_TICK)  *p++ = '\'';
546                         *p = '\0';
547
548                         warningf(pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
549                 }
550
551                 if (arg == NULL) {
552                         warningf(pos, "too few arguments for format string");
553                         return;
554                 }
555
556                 {       /* create a scope here to prevent warning about the jump to next_arg */
557                         type_t *const arg_type           = arg->expression->base.type;
558                         type_t *const arg_skip           = skip_typeref(arg_type);
559                         type_t *const expected_type_skip = skip_typeref(expected_type);
560
561                         if (fmt == 'p') {
562                                 /* allow any pointer type for %p, not just void */
563                                 if (is_type_pointer(arg_skip))
564                                         goto next_arg;
565                         }
566
567                         if (is_type_pointer(expected_type_skip)) {
568                                 if (is_type_pointer(arg_skip)) {
569                                         type_t *const exp_to = skip_typeref(expected_type_skip->pointer.points_to);
570                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
571                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
572                                                 get_unqualified_type(arg_to) == exp_to) {
573                                                 goto next_arg;
574                                         }
575                                 }
576                         } else {
577                                 if (get_unqualified_type(arg_skip) == expected_type_skip) {
578                                         goto next_arg;
579                                 }
580                         }
581                         if (is_type_valid(arg_skip)) {
582                                 warningf(pos,
583                                         "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
584                                         arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
585                         }
586                 }
587 next_arg:
588                 arg = arg->next;
589         }
590         if (!atend(&vchar)) {
591                 warningf(pos, "format string contains '\\0'");
592         }
593         if (arg != NULL) {
594                 unsigned num_args = num_fmt;
595                 while (arg != NULL) {
596                         ++num_args;
597                         arg = arg->next;
598                 }
599                 warningf(pos, "%u argument%s but only %u format specifier%s",
600                         num_args, num_args != 1 ? "s" : "",
601                         num_fmt, num_fmt != 1 ? "s" : "");
602         }
603 }
604
605 /**
606  * Check scanf-style format.
607  */
608 static void check_scanf_format(const call_argument_t *arg, const format_spec_t *spec)
609 {
610         /* find format arg */
611         unsigned idx = 0;
612         for (; idx < spec->fmt_idx; ++idx) {
613                 if (arg == NULL)
614                         return;
615                 arg = arg->next;
616         }
617
618         const expression_t *fmt_expr = arg->expression;
619         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
620                 fmt_expr = fmt_expr->unary.value;
621         }
622
623         vchar_t vchar;
624         if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
625                 vchar.string   = &fmt_expr->wide_string.value;
626                 vchar.size     = fmt_expr->wide_string.value.size;
627                 vchar.first    = wstring_first;
628                 vchar.next     = wstring_next;
629                 vchar.is_digit = wstring_isdigit;
630         } else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
631                 vchar.string   = &fmt_expr->string.value;
632                 vchar.size     = fmt_expr->string.value.size;
633                 vchar.first    = string_first;
634                 vchar.next     = string_next;
635                 vchar.is_digit = string_isdigit;
636         } else {
637                 return;
638         }
639         /* find the real args */
640         for (; idx < spec->arg_idx && arg != NULL; ++idx)
641                 arg = arg->next;
642
643         const source_position_t *pos = &fmt_expr->base.source_position;
644         unsigned fmt     = vchar.first(&vchar);
645         unsigned num_fmt = 0;
646         for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
647                 if (fmt != '%')
648                         continue;
649                 fmt = vchar.next(&vchar);
650
651                 if (fmt == '%')
652                         continue;
653
654                 ++num_fmt;
655
656                 /* length modifier */
657                 format_length_modifier_t fmt_mod;
658                 switch (fmt) {
659                         case 'h':
660                                 fmt = vchar.next(&vchar);
661                                 if (fmt == 'h') {
662                                         fmt = vchar.next(&vchar);
663                                         fmt_mod = FMT_MOD_hh;
664                                 } else {
665                                         fmt_mod = FMT_MOD_h;
666                                 }
667                                 break;
668
669                         case 'l':
670                                 fmt = vchar.next(&vchar);
671                                 if (fmt == 'l') {
672                                         fmt = vchar.next(&vchar);
673                                         fmt_mod = FMT_MOD_ll;
674                                 } else {
675                                         fmt_mod = FMT_MOD_l;
676                                 }
677                                 break;
678
679                         case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
680                         case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
681                         case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
682                         case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
683                         /* microsoft mode */
684                         case 'w':
685                                 if (c_mode & _MS) {
686                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
687                                 } else {
688                                         fmt_mod = FMT_MOD_NONE;
689                                 }
690                                 break;
691                         case 'I':
692                                 if (c_mode & _MS) {
693                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
694                                         if (fmt == '3') {
695                                                 fmt = vchar.next(&vchar);
696                                                 if (fmt == '2') {
697                                                         fmt = vchar.next(&vchar);
698                                                         fmt_mod = FMT_MOD_I32;
699                                                 } else {
700                                                         /* rewind */
701                                                         --vchar.position;
702                                                 }
703                                         } else if (fmt == '6') {
704                                                 fmt = vchar.next(&vchar);
705                                                 if (fmt == '4') {
706                                                         fmt = vchar.next(&vchar);
707                                                         fmt_mod = FMT_MOD_I64;
708                                                 } else {
709                                                         /* rewind */
710                                                         --vchar.position;
711                                                 }
712                                         }
713                                 } else {
714                                         fmt_mod = FMT_MOD_NONE;
715                                 }
716                                 break;
717                         default:
718                                 fmt_mod = FMT_MOD_NONE;
719                                 break;
720                 }
721
722                 if (fmt == '\0') {
723                         warningf(pos, "dangling %% in format string");
724                         break;
725                 }
726
727                 type_t            *expected_type;
728                 switch (fmt) {
729                         case 'd':
730                         case 'i':
731                                 switch (fmt_mod) {
732                                         case FMT_MOD_NONE: expected_type = type_int;         break;
733                                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
734                                         case FMT_MOD_h:    expected_type = type_short;       break;
735                                         case FMT_MOD_l:    expected_type = type_long;        break;
736                                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
737                                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
738                                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
739                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
740                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
741                                         case FMT_MOD_I32:  expected_type = type_int32;       break;
742                                         case FMT_MOD_I64:  expected_type = type_int64;       break;
743
744                                         default:
745                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
746                                                 goto next_arg;
747                                 }
748                                 break;
749
750                         case 'o':
751                         case 'X':
752                         case 'x':
753                                 goto eval_fmt_mod_unsigned;
754
755                         case 'u':
756 eval_fmt_mod_unsigned:
757                                 switch (fmt_mod) {
758                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
759                                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
760                                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
761                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
762                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
763                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
764                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
765                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
766                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
767                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
768                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
769
770                                         default:
771                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
772                                                 goto next_arg;
773                                 }
774                                 break;
775
776                         case 'A':
777                         case 'a':
778                         case 'E':
779                         case 'e':
780                         case 'F':
781                         case 'f':
782                         case 'G':
783                         case 'g':
784                                 switch (fmt_mod) {
785                                         case FMT_MOD_l:    /* l modifier is ignored */
786                                         case FMT_MOD_NONE: expected_type = type_double;      break;
787                                         case FMT_MOD_L:    expected_type = type_long_double; break;
788
789                                         default:
790                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
791                                                 goto next_arg;
792                                 }
793                                 break;
794
795                         case 'C':
796                                 if (fmt_mod != FMT_MOD_NONE) {
797                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
798                                         goto next_arg;
799                                 }
800                                 expected_type = type_wchar_t;
801                                 break;
802
803                         case 'c':
804                                 expected_type = type_int;
805                                 switch (fmt_mod) {
806                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
807                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
808                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
809
810                                         default:
811                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
812                                                 goto next_arg;
813                                 }
814                                 break;
815
816                         case 'S':
817                                 if (fmt_mod != FMT_MOD_NONE) {
818                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
819                                         goto next_arg;
820                                 }
821                                 expected_type = type_wchar_t;
822                                 break;
823
824                         case 's':
825                         case '[':
826                                 switch (fmt_mod) {
827                                         case FMT_MOD_NONE: expected_type = type_char;    break;
828                                         case FMT_MOD_l:    expected_type = type_wchar_t; break;
829                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
830
831                                         default:
832                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
833                                                 goto next_arg;
834                                 }
835                                 break;
836
837                         case 'p':
838                                 if (fmt_mod != FMT_MOD_NONE) {
839                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
840                                         goto next_arg;
841                                 }
842                                 expected_type = type_void_ptr;
843                                 break;
844
845                         case 'n':
846                                 switch (fmt_mod) {
847                                         case FMT_MOD_NONE: expected_type = type_int;         break;
848                                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
849                                         case FMT_MOD_h:    expected_type = type_short;       break;
850                                         case FMT_MOD_l:    expected_type = type_long;        break;
851                                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
852                                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
853                                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
854                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
855
856                                         default:
857                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
858                                                 goto next_arg;
859                                 }
860                                 break;
861
862                         default:
863                                 warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
864                                 if (arg == NULL) {
865                                         warningf(pos, "too few arguments for format string");
866                                         return;
867                                 }
868                                 goto next_arg;
869                 }
870
871                 if (arg == NULL) {
872                         warningf(pos, "too few arguments for format string");
873                         return;
874                 }
875
876                 {       /* create a scope here to prevent warning about the jump to next_arg */
877                         type_t *const arg_type           = arg->expression->base.type;
878                         type_t *const arg_skip           = skip_typeref(arg_type);
879                         type_t *const expected_type_skip = skip_typeref(expected_type);
880
881                         if (! is_type_pointer(arg_skip))
882                                 goto error_arg_type;
883                         type_t *const ptr_skip = skip_typeref(arg_skip->pointer.points_to);
884
885                         if (fmt == 'p') {
886                                 /* allow any pointer type for %p, not just void */
887                                 if (is_type_pointer(ptr_skip))
888                                         goto next_arg;
889                         }
890
891                         /* do NOT allow const or restrict, all other should be ok */
892                         if (ptr_skip->base.qualifiers & (TYPE_QUALIFIER_CONST | TYPE_QUALIFIER_VOLATILE))
893                                 goto error_arg_type;
894                         type_t *const unqual_ptr = get_unqualified_type(ptr_skip);
895                         if (unqual_ptr == expected_type_skip) {
896                                 goto next_arg;
897                         } else if (expected_type_skip == type_char) {
898                                 /* char matches with unsigned char AND signed char */
899                                 if (unqual_ptr == type_signed_char || unqual_ptr == type_unsigned_char)
900                                         goto next_arg;
901                         }
902 error_arg_type:
903                         if (is_type_valid(arg_skip)) {
904                                 warningf(pos,
905                                         "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
906                                         arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
907                         }
908                 }
909 next_arg:
910                 arg = arg->next;
911         }
912         if (!atend(&vchar)) {
913                 warningf(pos, "format string contains '\\0'");
914         }
915         if (arg != NULL) {
916                 unsigned num_args = num_fmt;
917                 while (arg != NULL) {
918                         ++num_args;
919                         arg = arg->next;
920                 }
921                 warningf(pos, "%u argument%s but only %u format specifier%s",
922                         num_args, num_args != 1 ? "s" : "",
923                         num_fmt, num_fmt != 1 ? "s" : "");
924         }
925 }
926
927 static const format_spec_t builtin_table[] = {
928         { "printf",        FORMAT_PRINTF,   0, 1 },
929         { "wprintf",       FORMAT_PRINTF,   0, 1 },
930         { "sprintf",       FORMAT_PRINTF,   1, 2 },
931         { "swprintf",      FORMAT_PRINTF,   1, 2 },
932         { "snprintf",      FORMAT_PRINTF,   2, 3 },
933         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
934         { "fprintf",       FORMAT_PRINTF,   1, 2 },
935         { "fwprintf",      FORMAT_PRINTF,   1, 2 },
936         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
937         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
938
939         { "scanf",         FORMAT_SCANF,    0, 1 },
940         { "wscanf",        FORMAT_SCANF,    0, 1 },
941         { "sscanf",        FORMAT_SCANF,    1, 2 },
942         { "swscanf",       FORMAT_SCANF,    1, 2 },
943         { "fscanf",        FORMAT_SCANF,    1, 2 },
944         { "fwscanf",       FORMAT_SCANF,    1, 2 },
945
946         { "strftime",      FORMAT_STRFTIME, 3, 4 },
947         { "wcstrftime",    FORMAT_STRFTIME, 3, 4 },
948
949         { "strfmon",       FORMAT_STRFMON,  3, 4 },
950
951         /* MS extensions */
952         { "_snprintf",     FORMAT_PRINTF,   2, 3 },
953         { "_snwprintf",    FORMAT_PRINTF,   2, 3 },
954         { "_scrintf",      FORMAT_PRINTF,   0, 1 },
955         { "_scwprintf",    FORMAT_PRINTF,   0, 1 },
956         { "printf_s",      FORMAT_PRINTF,   0, 1 },
957         { "wprintf_s",     FORMAT_PRINTF,   0, 1 },
958         { "sprintf_s",     FORMAT_PRINTF,   3, 4 },
959         { "swprintf_s",    FORMAT_PRINTF,   3, 4 },
960         { "fprintf_s",     FORMAT_PRINTF,   1, 2 },
961         { "fwprintf_s",    FORMAT_PRINTF,   1, 2 },
962         { "_sprintf_l",    FORMAT_PRINTF,   1, 3 },
963         { "_swprintf_l",   FORMAT_PRINTF,   1, 3 },
964         { "_printf_l",     FORMAT_PRINTF,   0, 2 },
965         { "_wprintf_l",    FORMAT_PRINTF,   0, 2 },
966         { "_fprintf_l",    FORMAT_PRINTF,   1, 3 },
967         { "_fwprintf_l",   FORMAT_PRINTF,   1, 3 },
968         { "_printf_s_l",   FORMAT_PRINTF,   0, 2 },
969         { "_wprintf_s_l",  FORMAT_PRINTF,   0, 2 },
970         { "_sprintf_s_l",  FORMAT_PRINTF,   3, 5 },
971         { "_swprintf_s_l", FORMAT_PRINTF,   3, 5 },
972         { "_fprintf_s_l",  FORMAT_PRINTF,   1, 3 },
973         { "_fwprintf_s_l", FORMAT_PRINTF,   1, 3 },
974 };
975
976 void check_format(const call_expression_t *const call)
977 {
978         if (!warning.format)
979                 return;
980
981         const expression_t *const func_expr = call->function;
982         if (func_expr->kind != EXPR_REFERENCE)
983                 return;
984
985         const entity_t        *const entity = func_expr->reference.entity;
986         const call_argument_t *      arg    = call->arguments;
987
988         if (false) {
989                 /* the declaration has a GNU format attribute, check it */
990         } else {
991                 /*
992                  * For some functions we always check the format, even if it was not specified.
993                  * This allows to check format even in MS mode or without header included.
994                  */
995                 const char *const name = entity->base.symbol->string;
996                 for (size_t i = 0; i < lengthof(builtin_table); ++i) {
997                         if (strcmp(name, builtin_table[i].name) == 0) {
998                                 switch (builtin_table[i].fmt_kind) {
999                                 case FORMAT_PRINTF:
1000                                         check_printf_format(arg, &builtin_table[i]);
1001                                         break;
1002                                 case FORMAT_SCANF:
1003                                         check_scanf_format(arg, &builtin_table[i]);
1004                                         break;
1005                                 case FORMAT_STRFTIME:
1006                                 case FORMAT_STRFMON:
1007                                         /* TODO: implement other cases */
1008                                         break;
1009                                 }
1010                                 break;
1011                         }
1012                 }
1013         }
1014 }