Avoid null pointer access in the format string checker when an unknown format specifi...
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2008 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <ctype.h>
21 #include <wctype.h>
22
23 #include "format_check.h"
24 #include "symbol_t.h"
25 #include "ast_t.h"
26 #include "entity_t.h"
27 #include "diagnostic.h"
28 #include "types.h"
29 #include "type_t.h"
30 #include "warning.h"
31 #include "lang_features.h"
32
33 typedef enum format_flag_t {
34         FMT_FLAG_NONE  = 0,
35         FMT_FLAG_HASH  = 1U << 0,
36         FMT_FLAG_ZERO  = 1U << 1,
37         FMT_FLAG_MINUS = 1U << 2,
38         FMT_FLAG_SPACE = 1U << 3,
39         FMT_FLAG_PLUS  = 1U << 4,
40         FMT_FLAG_TICK  = 1U << 5
41 } format_flag_t;
42
43 typedef unsigned format_flags_t;
44
45 typedef enum format_length_modifier_t {
46         FMT_MOD_NONE,
47         FMT_MOD_L,
48         FMT_MOD_hh,
49         FMT_MOD_h,
50         FMT_MOD_l,
51         FMT_MOD_ll,
52         FMT_MOD_j,
53         FMT_MOD_t,
54         FMT_MOD_z,
55         FMT_MOD_q,
56         /* only in microsoft mode */
57         FMT_MOD_w,
58         FMT_MOD_I,
59         FMT_MOD_I32,
60         FMT_MOD_I64
61 } format_length_modifier_t;
62
63 typedef struct format_spec_t {
64         const char    *name;     /**< name of the function */
65         format_kind_t  fmt_kind; /**< kind */
66         unsigned       fmt_idx;  /**< index of the format string */
67         unsigned       arg_idx;  /**< index of the first argument */
68 } format_spec_t;
69
70 static const char* get_length_modifier_name(const format_length_modifier_t mod)
71 {
72         static const char* const names[] = {
73                 [FMT_MOD_NONE] = "",
74                 [FMT_MOD_L]    = "L",
75                 [FMT_MOD_hh]   = "hh",
76                 [FMT_MOD_h]    = "h",
77                 [FMT_MOD_l]    = "l",
78                 [FMT_MOD_ll]   = "ll",
79                 [FMT_MOD_j]    = "j",
80                 [FMT_MOD_t]    = "t",
81                 [FMT_MOD_z]    = "z",
82                 [FMT_MOD_q]    = "q",
83                 /* only in microsoft mode */
84                 [FMT_MOD_w]    = "w",
85                 [FMT_MOD_I]    = "I",
86                 [FMT_MOD_I32]  = "I32",
87                 [FMT_MOD_I64]  = "I64"
88         };
89         assert(mod < sizeof(names) / sizeof(*names));
90         return names[mod];
91 }
92
93 static void warn_invalid_length_modifier(const source_position_t *pos,
94                                          const format_length_modifier_t mod,
95                                          const wchar_rep_t conversion)
96 {
97         warningf(pos,
98                 "invalid length modifier '%s' for conversion specifier '%%%c'",
99                 get_length_modifier_name(mod), conversion
100         );
101 }
102
103 typedef struct vchar_t vchar_t;
104 struct vchar_t {
105         const void *string;   /**< the string */
106         size_t     position;  /**< current position */
107         size_t     size;      /**< size of the string */
108
109         /** return the first character of the string and setthe position to 0. */
110         unsigned (*first)(vchar_t *self);
111         /** return the next character of the string */
112         unsigned (*next)(vchar_t *self);
113         /** return non_zero if the given character is a digit */
114         int (*is_digit)(unsigned vchar);
115 };
116
117 static unsigned string_first(vchar_t *self) {
118         self->position = 0;
119         const string_t *string = self->string;
120         return string->begin[0];
121 }
122
123 static unsigned string_next(vchar_t *self) {
124         ++self->position;
125         const string_t *string = self->string;
126         return string->begin[self->position];
127 }
128
129 static int string_isdigit(unsigned vchar) {
130         return isdigit(vchar);
131 }
132
133 static unsigned wstring_first(vchar_t *self) {
134         self->position = 0;
135         const wide_string_t *wstring = self->string;
136         return wstring->begin[0];
137 }
138
139 static unsigned wstring_next(vchar_t *self) {
140         ++self->position;
141         const wide_string_t *wstring = self->string;
142         return wstring->begin[self->position];
143 }
144
145 static int wstring_isdigit(unsigned vchar) {
146         return iswdigit(vchar);
147 }
148
149 static bool atend(vchar_t *self) {
150         return self->position + 1 == self->size;
151 }
152
153 /**
154  * Check printf-style format.
155  */
156 static void check_printf_format(const call_argument_t *arg, const format_spec_t *spec)
157 {
158         /* find format arg */
159         unsigned idx = 0;
160         for (; idx < spec->fmt_idx; ++idx) {
161                 if (arg == NULL)
162                         return;
163                 arg = arg->next;
164         }
165
166         const expression_t *fmt_expr = arg->expression;
167         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
168                 fmt_expr = fmt_expr->unary.value;
169         }
170
171         vchar_t vchar;
172         if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
173                 vchar.string   = &fmt_expr->wide_string.value;
174                 vchar.size     = fmt_expr->wide_string.value.size;
175                 vchar.first    = wstring_first;
176                 vchar.next     = wstring_next;
177                 vchar.is_digit = wstring_isdigit;
178         } else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
179                 vchar.string   = &fmt_expr->string.value;
180                 vchar.size     = fmt_expr->string.value.size;
181                 vchar.first    = string_first;
182                 vchar.next     = string_next;
183                 vchar.is_digit = string_isdigit;
184         } else {
185                 return;
186         }
187         /* find the real args */
188         for(; idx < spec->arg_idx && arg != NULL; ++idx)
189                 arg = arg->next;
190
191         const source_position_t *pos = &fmt_expr->base.source_position;
192         unsigned fmt     = vchar.first(&vchar);
193         unsigned num_fmt = 0;
194         for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
195                 if (fmt != '%')
196                         continue;
197                 fmt = vchar.next(&vchar);
198
199                 if (fmt == '%')
200                         continue;
201
202                 ++num_fmt;
203
204                 format_flags_t fmt_flags = FMT_FLAG_NONE;
205                 if (fmt == '0') {
206                         fmt = vchar.next(&vchar);
207                         fmt_flags |= FMT_FLAG_ZERO;
208                 }
209
210                 /* argument selector or minimum field width */
211                 if (vchar.is_digit(fmt)) {
212                         do {
213                                 fmt = vchar.next(&vchar);
214                         } while (vchar.is_digit(fmt));
215
216                         /* digit string was ... */
217                         if (fmt == '$') {
218                                 /* ... argument selector */
219                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
220                                 /* TODO implement */
221                                 return;
222                         }
223                         /* ... minimum field width */
224                 } else {
225                         /* flags */
226                         for (;;) {
227                                 format_flags_t flag;
228                                 switch (fmt) {
229                                         case '#':  flag = FMT_FLAG_HASH;  break;
230                                         case '0':  flag = FMT_FLAG_ZERO;  break;
231                                         case '-':  flag = FMT_FLAG_MINUS; break;
232                                         case '\'': flag = FMT_FLAG_TICK;  break;
233
234                                         case ' ':
235                                                 if (fmt_flags & FMT_FLAG_PLUS) {
236                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
237                                                 }
238                                                 flag = FMT_FLAG_SPACE;
239                                                 break;
240
241                                         case '+':
242                                                 if (fmt_flags & FMT_FLAG_SPACE) {
243                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
244                                                 }
245                                                 flag = FMT_FLAG_PLUS;
246                                                 break;
247
248                                         default: goto break_fmt_flags;
249                                 }
250                                 if (fmt_flags & flag) {
251                                         warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
252                                 }
253                                 fmt_flags |= flag;
254                                 fmt = vchar.next(&vchar);
255                         }
256 break_fmt_flags:
257
258                         /* minimum field width */
259                         if (fmt == '*') {
260                                 fmt = vchar.next(&vchar);
261                                 if (arg == NULL) {
262                                         warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
263                                         return;
264                                 }
265                                 const type_t *const arg_type = arg->expression->base.type;
266                                 if (arg_type != type_int) {
267                                         warningf(pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
268                                 }
269                                 arg = arg->next;
270                         } else {
271                                 while (vchar.is_digit(fmt)) {
272                                         fmt = vchar.next(&vchar);
273                                 }
274                         }
275                 }
276
277                 /* precision */
278                 if (fmt == '.') {
279                         fmt = vchar.next(&vchar);
280                         if (fmt == '*') {
281                                 fmt = vchar.next(&vchar);
282                                 if (arg == NULL) {
283                                         warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
284                                         return;
285                                 }
286                                 const type_t *const arg_type = arg->expression->base.type;
287                                 if (arg_type != type_int) {
288                                         warningf(pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
289                                 }
290                                 arg = arg->next;
291                         } else {
292                                 /* digit string may be omitted */
293                                 while (vchar.is_digit(fmt)) {
294                                         fmt = vchar.next(&vchar);
295                                 }
296                         }
297                 }
298
299                 /* length modifier */
300                 format_length_modifier_t fmt_mod;
301                 switch (fmt) {
302                         case 'h':
303                                 fmt = vchar.next(&vchar);
304                                 if (fmt == 'h') {
305                                         fmt = vchar.next(&vchar);
306                                         fmt_mod = FMT_MOD_hh;
307                                 } else {
308                                         fmt_mod = FMT_MOD_h;
309                                 }
310                                 break;
311
312                         case 'l':
313                                 fmt = vchar.next(&vchar);
314                                 if (fmt == 'l') {
315                                         fmt = vchar.next(&vchar);
316                                         fmt_mod = FMT_MOD_ll;
317                                 } else {
318                                         fmt_mod = FMT_MOD_l;
319                                 }
320                                 break;
321
322                         case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
323                         case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
324                         case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
325                         case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
326                         case 'q': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_q;    break;
327                         /* microsoft mode */
328                         case 'w':
329                                 if (c_mode & _MS) {
330                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
331                                 } else {
332                                         fmt_mod = FMT_MOD_NONE;
333                                 }
334                                 break;
335                         case 'I':
336                                 if (c_mode & _MS) {
337                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
338                                         if (fmt == '3') {
339                                                 fmt = vchar.next(&vchar);
340                                                 if (fmt == '2') {
341                                                         fmt = vchar.next(&vchar);
342                                                         fmt_mod = FMT_MOD_I32;
343                                                 } else {
344                                                         /* rewind */
345                                                         --vchar.position;
346                                                 }
347                                         } else if (fmt == '6') {
348                                                 fmt = vchar.next(&vchar);
349                                                 if (fmt == '4') {
350                                                         fmt = vchar.next(&vchar);
351                                                         fmt_mod = FMT_MOD_I64;
352                                                 } else {
353                                                         /* rewind */
354                                                         --vchar.position;
355                                                 }
356                                         }
357                                 } else {
358                                         fmt_mod = FMT_MOD_NONE;
359                                 }
360                                 break;
361                         default:
362                                 fmt_mod = FMT_MOD_NONE;
363                                 break;
364                 }
365
366                 if (fmt == '\0') {
367                         warningf(pos, "dangling %% in format string");
368                         break;
369                 }
370
371                 type_t            *expected_type;
372                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
373                 format_flags_t     allowed_flags;
374                 switch (fmt) {
375                         case 'd':
376                         case 'i':
377                                 switch (fmt_mod) {
378                                         case FMT_MOD_NONE: expected_type = type_int;       break;
379                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
380                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
381                                         case FMT_MOD_l:    expected_type = type_long;      break;
382                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
383                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
384                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
385                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
386                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t; break;
387                                         case FMT_MOD_I32:  expected_type = type_int32;     break;
388                                         case FMT_MOD_I64:  expected_type = type_int64;     break;
389
390                                         default:
391                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
392                                                 goto next_arg;
393                                 }
394                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
395                                 break;
396
397                         case 'o':
398                         case 'X':
399                         case 'x':
400                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
401                                 goto eval_fmt_mod_unsigned;
402
403                         case 'u':
404                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
405 eval_fmt_mod_unsigned:
406                                 switch (fmt_mod) {
407                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
408                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
409                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
410                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
411                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
412                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
413                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
414                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
415                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
416                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
417                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
418
419                                         default:
420                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
421                                                 goto next_arg;
422                                 }
423                                 break;
424
425                         case 'A':
426                         case 'a':
427                         case 'E':
428                         case 'e':
429                         case 'F':
430                         case 'f':
431                         case 'G':
432                         case 'g':
433                                 switch (fmt_mod) {
434                                         case FMT_MOD_l:    /* l modifier is ignored */
435                                         case FMT_MOD_NONE: expected_type = type_double;      break;
436                                         case FMT_MOD_L:    expected_type = type_long_double; break;
437
438                                         default:
439                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
440                                                 goto next_arg;
441                                 }
442                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
443                                 break;
444
445                         case 'C':
446                                 if (fmt_mod != FMT_MOD_NONE) {
447                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
448                                         goto next_arg;
449                                 }
450                                 expected_type = type_wchar_t;
451                                 allowed_flags = FMT_FLAG_NONE;
452                                 break;
453
454                         case 'c':
455                                 expected_type = type_int;
456                                 switch (fmt_mod) {
457                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
458                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
459                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
460
461                                         default:
462                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
463                                                 goto next_arg;
464                                 }
465                                 allowed_flags = FMT_FLAG_NONE;
466                                 break;
467
468                         case 'S':
469                                 if (fmt_mod != FMT_MOD_NONE) {
470                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
471                                         goto next_arg;
472                                 }
473                                 expected_type = type_wchar_t_ptr;
474                                 expected_qual = TYPE_QUALIFIER_CONST;
475                                 allowed_flags = FMT_FLAG_MINUS;
476                                 break;
477
478                         case 's':
479                                 switch (fmt_mod) {
480                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
481                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
482                                         case FMT_MOD_w:    expected_type = type_wchar_t_ptr; break;
483
484                                         default:
485                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
486                                                 goto next_arg;
487                                 }
488                                 expected_qual = TYPE_QUALIFIER_CONST;
489                                 allowed_flags = FMT_FLAG_MINUS;
490                                 break;
491
492                         case 'p':
493                                 if (fmt_mod != FMT_MOD_NONE) {
494                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
495                                         goto next_arg;
496                                 }
497                                 expected_type = type_void_ptr;
498                                 allowed_flags = FMT_FLAG_NONE;
499                                 break;
500
501                         case 'n':
502                                 switch (fmt_mod) {
503                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
504                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
505                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
506                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
507                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
508                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
509                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
510                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
511
512                                         default:
513                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
514                                                 goto next_arg;
515                                 }
516                                 allowed_flags = FMT_FLAG_NONE;
517                                 break;
518
519                         default:
520                                 warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
521                                 if (arg == NULL) {
522                                         warningf(pos, "too few arguments for format string");
523                                         return;
524                                 }
525                                 goto next_arg;
526                 }
527
528                 format_flags_t wrong_flags = fmt_flags & ~allowed_flags;
529                 if (wrong_flags != 0) {
530                         char  wrong[8];
531                         char *p = wrong;
532                         if (wrong_flags & FMT_FLAG_HASH)  *p++ = '#';
533                         if (wrong_flags & FMT_FLAG_ZERO)  *p++ = '0';
534                         if (wrong_flags & FMT_FLAG_MINUS) *p++ = '-';
535                         if (wrong_flags & FMT_FLAG_SPACE) *p++ = ' ';
536                         if (wrong_flags & FMT_FLAG_PLUS)  *p++ = '+';
537                         if (wrong_flags & FMT_FLAG_TICK)  *p++ = '\'';
538                         *p = '\0';
539
540                         warningf(pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
541                 }
542
543                 if (arg == NULL) {
544                         warningf(pos, "too few arguments for format string");
545                         return;
546                 }
547
548                 {       /* create a scope here to prevent warning about the jump to next_arg */
549                         type_t *const arg_type           = arg->expression->base.type;
550                         type_t *const arg_skip           = skip_typeref(arg_type);
551                         type_t *const expected_type_skip = skip_typeref(expected_type);
552
553                         if (fmt == 'p') {
554                                 /* allow any pointer type for %p, not just void */
555                                 if (is_type_pointer(arg_skip))
556                                         goto next_arg;
557                         }
558
559                         if (is_type_pointer(expected_type_skip)) {
560                                 if (is_type_pointer(arg_skip)) {
561                                         type_t *const exp_to = skip_typeref(expected_type_skip->pointer.points_to);
562                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
563                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
564                                                 get_unqualified_type(arg_to) == exp_to) {
565                                                 goto next_arg;
566                                         }
567                                 }
568                         } else {
569                                 if (get_unqualified_type(arg_skip) == expected_type_skip) {
570                                         goto next_arg;
571                                 }
572                         }
573                         if (is_type_valid(arg_skip)) {
574                                 warningf(pos,
575                                         "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
576                                         arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
577                         }
578                 }
579 next_arg:
580                 arg = arg->next;
581         }
582         if (!atend(&vchar)) {
583                 warningf(pos, "format string contains NUL");
584         }
585         if (arg != NULL) {
586                 unsigned num_args = num_fmt;
587                 while (arg != NULL) {
588                         ++num_args;
589                         arg = arg->next;
590                 }
591                 warningf(pos, "%u argument%s but only %u format specifier%s",
592                         num_args, num_args != 1 ? "s" : "",
593                         num_fmt, num_fmt != 1 ? "s" : "");
594         }
595 }
596
597 /**
598  * Check scanf-style format.
599  */
600 static void check_scanf_format(const call_argument_t *arg, const format_spec_t *spec)
601 {
602         /* find format arg */
603         unsigned idx = 0;
604         for (; idx < spec->fmt_idx; ++idx) {
605                 if (arg == NULL)
606                         return;
607                 arg = arg->next;
608         }
609
610         const expression_t *fmt_expr = arg->expression;
611         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
612                 fmt_expr = fmt_expr->unary.value;
613         }
614
615         vchar_t vchar;
616         if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
617                 vchar.string   = &fmt_expr->wide_string.value;
618                 vchar.size     = fmt_expr->wide_string.value.size;
619                 vchar.first    = wstring_first;
620                 vchar.next     = wstring_next;
621                 vchar.is_digit = wstring_isdigit;
622         } else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
623                 vchar.string   = &fmt_expr->string.value;
624                 vchar.size     = fmt_expr->string.value.size;
625                 vchar.first    = string_first;
626                 vchar.next     = string_next;
627                 vchar.is_digit = string_isdigit;
628         } else {
629                 return;
630         }
631         /* find the real args */
632         for (; idx < spec->arg_idx && arg != NULL; ++idx)
633                 arg = arg->next;
634
635         const source_position_t *pos = &fmt_expr->base.source_position;
636         unsigned fmt     = vchar.first(&vchar);
637         unsigned num_fmt = 0;
638         for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
639                 if (fmt != '%')
640                         continue;
641                 fmt = vchar.next(&vchar);
642
643                 if (fmt == '%')
644                         continue;
645
646                 ++num_fmt;
647
648                 /* length modifier */
649                 format_length_modifier_t fmt_mod;
650                 switch (fmt) {
651                         case 'h':
652                                 fmt = vchar.next(&vchar);
653                                 if (fmt == 'h') {
654                                         fmt = vchar.next(&vchar);
655                                         fmt_mod = FMT_MOD_hh;
656                                 } else {
657                                         fmt_mod = FMT_MOD_h;
658                                 }
659                                 break;
660
661                         case 'l':
662                                 fmt = vchar.next(&vchar);
663                                 if (fmt == 'l') {
664                                         fmt = vchar.next(&vchar);
665                                         fmt_mod = FMT_MOD_ll;
666                                 } else {
667                                         fmt_mod = FMT_MOD_l;
668                                 }
669                                 break;
670
671                         case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
672                         case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
673                         case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
674                         case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
675                         /* microsoft mode */
676                         case 'w':
677                                 if (c_mode & _MS) {
678                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
679                                 } else {
680                                         fmt_mod = FMT_MOD_NONE;
681                                 }
682                                 break;
683                         case 'I':
684                                 if (c_mode & _MS) {
685                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
686                                         if (fmt == '3') {
687                                                 fmt = vchar.next(&vchar);
688                                                 if (fmt == '2') {
689                                                         fmt = vchar.next(&vchar);
690                                                         fmt_mod = FMT_MOD_I32;
691                                                 } else {
692                                                         /* rewind */
693                                                         --vchar.position;
694                                                 }
695                                         } else if (fmt == '6') {
696                                                 fmt = vchar.next(&vchar);
697                                                 if (fmt == '4') {
698                                                         fmt = vchar.next(&vchar);
699                                                         fmt_mod = FMT_MOD_I64;
700                                                 } else {
701                                                         /* rewind */
702                                                         --vchar.position;
703                                                 }
704                                         }
705                                 } else {
706                                         fmt_mod = FMT_MOD_NONE;
707                                 }
708                                 break;
709                         default:
710                                 fmt_mod = FMT_MOD_NONE;
711                                 break;
712                 }
713
714                 if (fmt == '\0') {
715                         warningf(pos, "dangling %% in format string");
716                         break;
717                 }
718
719                 type_t            *expected_type;
720                 switch (fmt) {
721                         case 'd':
722                         case 'i':
723                                 switch (fmt_mod) {
724                                         case FMT_MOD_NONE: expected_type = type_int;         break;
725                                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
726                                         case FMT_MOD_h:    expected_type = type_short;       break;
727                                         case FMT_MOD_l:    expected_type = type_long;        break;
728                                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
729                                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
730                                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
731                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
732                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
733                                         case FMT_MOD_I32:  expected_type = type_int32;       break;
734                                         case FMT_MOD_I64:  expected_type = type_int64;       break;
735
736                                         default:
737                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
738                                                 goto next_arg;
739                                 }
740                                 break;
741
742                         case 'o':
743                         case 'X':
744                         case 'x':
745                                 goto eval_fmt_mod_unsigned;
746
747                         case 'u':
748 eval_fmt_mod_unsigned:
749                                 switch (fmt_mod) {
750                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
751                                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
752                                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
753                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
754                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
755                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
756                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
757                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
758                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
759                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
760                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
761
762                                         default:
763                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
764                                                 goto next_arg;
765                                 }
766                                 break;
767
768                         case 'A':
769                         case 'a':
770                         case 'E':
771                         case 'e':
772                         case 'F':
773                         case 'f':
774                         case 'G':
775                         case 'g':
776                                 switch (fmt_mod) {
777                                         case FMT_MOD_l:    /* l modifier is ignored */
778                                         case FMT_MOD_NONE: expected_type = type_double;      break;
779                                         case FMT_MOD_L:    expected_type = type_long_double; break;
780
781                                         default:
782                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
783                                                 goto next_arg;
784                                 }
785                                 break;
786
787                         case 'C':
788                                 if (fmt_mod != FMT_MOD_NONE) {
789                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
790                                         goto next_arg;
791                                 }
792                                 expected_type = type_wchar_t;
793                                 break;
794
795                         case 'c':
796                                 expected_type = type_int;
797                                 switch (fmt_mod) {
798                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
799                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
800                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
801
802                                         default:
803                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
804                                                 goto next_arg;
805                                 }
806                                 break;
807
808                         case 'S':
809                                 if (fmt_mod != FMT_MOD_NONE) {
810                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
811                                         goto next_arg;
812                                 }
813                                 expected_type = type_wchar_t;
814                                 break;
815
816                         case 's':
817                         case '[':
818                                 switch (fmt_mod) {
819                                         case FMT_MOD_NONE: expected_type = type_char;    break;
820                                         case FMT_MOD_l:    expected_type = type_wchar_t; break;
821                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
822
823                                         default:
824                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
825                                                 goto next_arg;
826                                 }
827                                 break;
828
829                         case 'p':
830                                 if (fmt_mod != FMT_MOD_NONE) {
831                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
832                                         goto next_arg;
833                                 }
834                                 expected_type = type_void_ptr;
835                                 break;
836
837                         case 'n':
838                                 switch (fmt_mod) {
839                                         case FMT_MOD_NONE: expected_type = type_int;         break;
840                                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
841                                         case FMT_MOD_h:    expected_type = type_short;       break;
842                                         case FMT_MOD_l:    expected_type = type_long;        break;
843                                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
844                                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
845                                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
846                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
847
848                                         default:
849                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
850                                                 goto next_arg;
851                                 }
852                                 break;
853
854                         default:
855                                 warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
856                                 if (arg == NULL) {
857                                         warningf(pos, "too few arguments for format string");
858                                         return;
859                                 }
860                                 goto next_arg;
861                 }
862
863                 if (arg == NULL) {
864                         warningf(pos, "too few arguments for format string");
865                         return;
866                 }
867
868                 {       /* create a scope here to prevent warning about the jump to next_arg */
869                         type_t *const arg_type           = arg->expression->base.type;
870                         type_t *const arg_skip           = skip_typeref(arg_type);
871                         type_t *const expected_type_skip = skip_typeref(expected_type);
872
873                         if (! is_type_pointer(arg_skip))
874                                 goto error_arg_type;
875                         type_t *const ptr_skip = skip_typeref(arg_skip->pointer.points_to);
876
877                         if (fmt == 'p') {
878                                 /* allow any pointer type for %p, not just void */
879                                 if (is_type_pointer(ptr_skip))
880                                         goto next_arg;
881                         }
882
883                         /* do NOT allow const or restrict, all other should be ok */
884                         if (ptr_skip->base.qualifiers & (TYPE_QUALIFIER_CONST | TYPE_QUALIFIER_VOLATILE))
885                                 goto error_arg_type;
886                         type_t *const unqual_ptr = get_unqualified_type(ptr_skip);
887                         if (unqual_ptr == expected_type_skip) {
888                                 goto next_arg;
889                         } else if (expected_type_skip == type_char) {
890                                 /* char matches with unsigned char AND signed char */
891                                 if (unqual_ptr == type_signed_char || unqual_ptr == type_unsigned_char)
892                                         goto next_arg;
893                         }
894 error_arg_type:
895                         if (is_type_valid(arg_skip)) {
896                                 warningf(pos,
897                                         "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
898                                         arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
899                         }
900                 }
901 next_arg:
902                 arg = arg->next;
903         }
904         if (!atend(&vchar)) {
905                 warningf(pos, "format string contains NUL");
906         }
907         if (arg != NULL) {
908                 unsigned num_args = num_fmt;
909                 while (arg != NULL) {
910                         ++num_args;
911                         arg = arg->next;
912                 }
913                 warningf(pos, "%u argument%s but only %u format string%s",
914                         num_args, num_args != 1 ? "s" : "",
915                         num_fmt, num_fmt != 1 ? "s" : "");
916         }
917 }
918
919 static const format_spec_t builtin_table[] = {
920         { "printf",        FORMAT_PRINTF,   0, 1 },
921         { "wprintf",       FORMAT_PRINTF,   0, 1 },
922         { "sprintf",       FORMAT_PRINTF,   1, 2 },
923         { "swprintf",      FORMAT_PRINTF,   1, 2 },
924         { "snprintf",      FORMAT_PRINTF,   2, 3 },
925         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
926         { "fprintf",       FORMAT_PRINTF,   1, 2 },
927         { "fwprintf",      FORMAT_PRINTF,   1, 2 },
928         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
929         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
930
931         { "scanf",         FORMAT_SCANF,    0, 1 },
932         { "wscanf",        FORMAT_SCANF,    0, 1 },
933         { "sscanf",        FORMAT_SCANF,    1, 2 },
934         { "swscanf",       FORMAT_SCANF,    1, 2 },
935         { "fscanf",        FORMAT_SCANF,    1, 2 },
936         { "fwscanf",       FORMAT_SCANF,    1, 2 },
937
938         { "strftime",      FORMAT_STRFTIME, 3, 4 },
939         { "wcstrftime",    FORMAT_STRFTIME, 3, 4 },
940
941         { "strfmon",       FORMAT_STRFMON,  3, 4 },
942
943         /* MS extensions */
944         { "_snprintf",     FORMAT_PRINTF,   2, 3 },
945         { "_snwprintf",    FORMAT_PRINTF,   2, 3 },
946         { "_scrintf",      FORMAT_PRINTF,   0, 1 },
947         { "_scwprintf",    FORMAT_PRINTF,   0, 1 },
948         { "printf_s",      FORMAT_PRINTF,   0, 1 },
949         { "wprintf_s",     FORMAT_PRINTF,   0, 1 },
950         { "sprintf_s",     FORMAT_PRINTF,   3, 4 },
951         { "swprintf_s",    FORMAT_PRINTF,   3, 4 },
952         { "fprintf_s",     FORMAT_PRINTF,   1, 2 },
953         { "fwprintf_s",    FORMAT_PRINTF,   1, 2 },
954         { "_sprintf_l",    FORMAT_PRINTF,   1, 3 },
955         { "_swprintf_l",   FORMAT_PRINTF,   1, 3 },
956         { "_printf_l",     FORMAT_PRINTF,   0, 2 },
957         { "_wprintf_l",    FORMAT_PRINTF,   0, 2 },
958         { "_fprintf_l",    FORMAT_PRINTF,   1, 3 },
959         { "_fwprintf_l",   FORMAT_PRINTF,   1, 3 },
960         { "_printf_s_l",   FORMAT_PRINTF,   0, 2 },
961         { "_wprintf_s_l",  FORMAT_PRINTF,   0, 2 },
962         { "_sprintf_s_l",  FORMAT_PRINTF,   3, 5 },
963         { "_swprintf_s_l", FORMAT_PRINTF,   3, 5 },
964         { "_fprintf_s_l",  FORMAT_PRINTF,   1, 3 },
965         { "_fwprintf_s_l", FORMAT_PRINTF,   1, 3 },
966 };
967
968 void check_format(const call_expression_t *const call)
969 {
970         if (!warning.format)
971                 return;
972
973         const expression_t *const func_expr = call->function;
974         if (func_expr->kind != EXPR_REFERENCE)
975                 return;
976
977         const entity_t        *const entity = func_expr->reference.entity;
978         const call_argument_t *      arg    = call->arguments;
979
980         if (false) {
981                 /* the declaration has a GNU format attribute, check it */
982         } else {
983                 /*
984                  * For some functions we always check the format, even if it was not specified.
985                  * This allows to check format even in MS mode or without header included.
986                  */
987                 const char *const name = entity->base.symbol->string;
988                 for (size_t i = 0; i < sizeof(builtin_table) / sizeof(builtin_table[0]); ++i) {
989                         if (strcmp(name, builtin_table[i].name) == 0) {
990                                 switch (builtin_table[i].fmt_kind) {
991                                 case FORMAT_PRINTF:
992                                         check_printf_format(arg, &builtin_table[i]);
993                                         break;
994                                 case FORMAT_SCANF:
995                                         check_scanf_format(arg, &builtin_table[i]);
996                                         break;
997                                 case FORMAT_STRFTIME:
998                                 case FORMAT_STRFMON:
999                                         /* TODO: implement other cases */
1000                                         break;
1001                                 }
1002                                 break;
1003                         }
1004                 }
1005         }
1006 }