Support maximum field width in scanf() check.
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2009 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <config.h>
21
22 #include <ctype.h>
23
24 #include "adt/util.h"
25 #include "format_check.h"
26 #include "symbol_t.h"
27 #include "ast_t.h"
28 #include "entity_t.h"
29 #include "diagnostic.h"
30 #include "parser.h"
31 #include "types.h"
32 #include "type_t.h"
33 #include "warning.h"
34 #include "lang_features.h"
35
36 typedef enum format_flag_t {
37         FMT_FLAG_NONE  = 0,
38         FMT_FLAG_HASH  = 1U << 0,
39         FMT_FLAG_ZERO  = 1U << 1,
40         FMT_FLAG_MINUS = 1U << 2,
41         FMT_FLAG_SPACE = 1U << 3,
42         FMT_FLAG_PLUS  = 1U << 4,
43         FMT_FLAG_TICK  = 1U << 5
44 } format_flag_t;
45
46 typedef unsigned format_flags_t;
47
48 typedef enum format_length_modifier_t {
49         FMT_MOD_NONE,
50         FMT_MOD_L,
51         FMT_MOD_hh,
52         FMT_MOD_h,
53         FMT_MOD_l,
54         FMT_MOD_ll,
55         FMT_MOD_j,
56         FMT_MOD_t,
57         FMT_MOD_z,
58         FMT_MOD_q,
59         /* only in microsoft mode */
60         FMT_MOD_w,
61         FMT_MOD_I,
62         FMT_MOD_I32,
63         FMT_MOD_I64
64 } format_length_modifier_t;
65
66 typedef struct format_spec_t {
67         const char    *name;     /**< name of the function */
68         format_kind_t  fmt_kind; /**< kind */
69         unsigned       fmt_idx;  /**< index of the format string */
70         unsigned       arg_idx;  /**< index of the first argument */
71 } format_spec_t;
72
73 static const char* get_length_modifier_name(const format_length_modifier_t mod)
74 {
75         static const char* const names[] = {
76                 [FMT_MOD_NONE] = "",
77                 [FMT_MOD_L]    = "L",
78                 [FMT_MOD_hh]   = "hh",
79                 [FMT_MOD_h]    = "h",
80                 [FMT_MOD_l]    = "l",
81                 [FMT_MOD_ll]   = "ll",
82                 [FMT_MOD_j]    = "j",
83                 [FMT_MOD_t]    = "t",
84                 [FMT_MOD_z]    = "z",
85                 [FMT_MOD_q]    = "q",
86                 /* only in microsoft mode */
87                 [FMT_MOD_w]    = "w",
88                 [FMT_MOD_I]    = "I",
89                 [FMT_MOD_I32]  = "I32",
90                 [FMT_MOD_I64]  = "I64"
91         };
92         assert((size_t)mod < lengthof(names));
93         return names[mod];
94 }
95
96 static void warn_invalid_length_modifier(const source_position_t *pos,
97                                          const format_length_modifier_t mod,
98                                          const utf32 conversion)
99 {
100         char const *const lmod = get_length_modifier_name(mod);
101         warningf(WARN_FORMAT, pos, "invalid length modifier '%s' for conversion specifier '%%%c'", lmod, conversion);
102 }
103
104 /**
105  * Check printf-style format. Returns number of expected arguments.
106  */
107 static int internal_check_printf_format(const expression_t *fmt_expr,
108                                         const call_argument_t *arg,
109                                         const format_spec_t *spec)
110 {
111         while (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
112                 fmt_expr = fmt_expr->unary.value;
113         }
114
115         /*
116          * gettext results in expressions like (X ? "format_string" : Y)
117          * we assume the left part is the format string
118          */
119         if (fmt_expr->kind == EXPR_CONDITIONAL) {
120                 conditional_expression_t const *const c = &fmt_expr->conditional;
121                 expression_t             const *      t = c->true_expression;
122                 if (t == NULL)
123                         t = c->condition;
124                 int const nt = internal_check_printf_format(t,                   arg, spec);
125                 int const nf = internal_check_printf_format(c->false_expression, arg, spec);
126                 return nt > nf ? nt : nf;
127         }
128
129         if (fmt_expr->kind != EXPR_STRING_LITERAL
130                         && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
131                 return -1;
132
133         const char *string = fmt_expr->literal.value.begin;
134         size_t      size   = fmt_expr->literal.value.size;
135         const char *c      = string;
136
137         const source_position_t *pos = &fmt_expr->base.source_position;
138         unsigned num_fmt  = 0;
139         unsigned num_args = 0;
140         char     fmt;
141         for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
142                 if (fmt != '%')
143                         continue;
144                 fmt = *(++c);
145
146                 if (fmt == '\0') {
147                         warningf(WARN_FORMAT, pos, "dangling %% in format string");
148                         break;
149                 }
150                 if (fmt == '%')
151                         continue;
152
153                 ++num_fmt;
154                 ++num_args;
155
156                 format_flags_t fmt_flags = FMT_FLAG_NONE;
157                 if (fmt == '0') {
158                         fmt = *(++c);
159                         fmt_flags |= FMT_FLAG_ZERO;
160                 }
161
162                 /* argument selector or minimum field width */
163                 if (isdigit(fmt)) {
164                         do {
165                                 fmt = *(++c);
166                         } while (isdigit(fmt));
167
168                         /* digit string was ... */
169                         if (fmt == '$') {
170                                 /* ... argument selector */
171                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
172                                 /* TODO implement */
173                                 return -1;
174                         }
175                         /* ... minimum field width */
176                 } else {
177                         /* flags */
178                         for (;;) {
179                                 format_flags_t flag;
180                                 switch (fmt) {
181                                         case '#':  flag = FMT_FLAG_HASH;  break;
182                                         case '0':  flag = FMT_FLAG_ZERO;  break;
183                                         case '-':  flag = FMT_FLAG_MINUS; break;
184                                         case '\'': flag = FMT_FLAG_TICK;  break;
185
186                                         case ' ':
187                                                 if (fmt_flags & FMT_FLAG_PLUS) {
188                                                         warningf(WARN_FORMAT, pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
189                                                 }
190                                                 flag = FMT_FLAG_SPACE;
191                                                 break;
192
193                                         case '+':
194                                                 if (fmt_flags & FMT_FLAG_SPACE) {
195                                                         warningf(WARN_FORMAT, pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
196                                                 }
197                                                 flag = FMT_FLAG_PLUS;
198                                                 break;
199
200                                         default: goto break_fmt_flags;
201                                 }
202                                 if (fmt_flags & flag) {
203                                         warningf(WARN_FORMAT, pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
204                                 }
205                                 fmt_flags |= flag;
206                                 fmt = *(++c);
207                         }
208 break_fmt_flags:
209
210                         /* minimum field width */
211                         if (fmt == '*') {
212                                 ++num_args;
213                                 fmt = *(++c);
214                                 if (arg == NULL) {
215                                         warningf(WARN_FORMAT, pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
216                                         return -1;
217                                 }
218                                 const type_t *const arg_type = arg->expression->base.type;
219                                 if (arg_type != type_int) {
220                                         warningf(WARN_FORMAT, pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
221                                 }
222                                 arg = arg->next;
223                         } else {
224                                 while (isdigit(fmt)) {
225                                         fmt = *(++c);
226                                 }
227                         }
228                 }
229
230                 /* precision */
231                 if (fmt == '.') {
232                         if (fmt_flags & FMT_FLAG_ZERO) {
233                                 warningf(WARN_FORMAT, pos, "'0' flag ignored with precision in conversion specification %u", num_fmt);
234                         }
235
236                         ++num_args;
237                         fmt = *(++c);
238                         if (fmt == '*') {
239                                 fmt = *(++c);
240                                 if (arg == NULL) {
241                                         warningf(WARN_FORMAT, pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
242                                         return -1;
243                                 }
244                                 const type_t *const arg_type = arg->expression->base.type;
245                                 if (arg_type != type_int) {
246                                         warningf(WARN_FORMAT, pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
247                                 }
248                                 arg = arg->next;
249                         } else {
250                                 /* digit string may be omitted */
251                                 while (isdigit(fmt)) {
252                                         fmt = *(++c);
253                                 }
254                         }
255                 }
256
257                 /* length modifier */
258                 format_length_modifier_t fmt_mod;
259                 switch (fmt) {
260                         case 'h':
261                                 fmt = *(++c);
262                                 if (fmt == 'h') {
263                                         fmt = *(++c);
264                                         fmt_mod = FMT_MOD_hh;
265                                 } else {
266                                         fmt_mod = FMT_MOD_h;
267                                 }
268                                 break;
269
270                         case 'l':
271                                 fmt = *(++c);
272                                 if (fmt == 'l') {
273                                         fmt = *(++c);
274                                         fmt_mod = FMT_MOD_ll;
275                                 } else {
276                                         fmt_mod = FMT_MOD_l;
277                                 }
278                                 break;
279
280                         case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L;    break;
281                         case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j;    break;
282                         case 't': fmt = *(++c); fmt_mod = FMT_MOD_t;    break;
283                         case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z;    break;
284                         case 'q': fmt = *(++c); fmt_mod = FMT_MOD_q;    break;
285                         /* microsoft mode */
286                         case 'w':
287                                 if (c_mode & _MS) {
288                                         fmt = *(++c); fmt_mod = FMT_MOD_w;
289                                 } else {
290                                         fmt_mod = FMT_MOD_NONE;
291                                 }
292                                 break;
293                         case 'I':
294                                 if (c_mode & _MS) {
295                                         fmt = *(++c); fmt_mod = FMT_MOD_I;
296                                         if (fmt == '3') {
297                                                 fmt = *(++c);
298                                                 if (fmt == '2') {
299                                                         fmt = *(++c);
300                                                         fmt_mod = FMT_MOD_I32;
301                                                 } else {
302                                                         /* rewind */
303                                                         fmt = *(--c);
304                                                 }
305                                         } else if (fmt == '6') {
306                                                 fmt = *(++c);
307                                                 if (fmt == '4') {
308                                                         fmt = *(++c);
309                                                         fmt_mod = FMT_MOD_I64;
310                                                 } else {
311                                                         /* rewind */
312                                                         fmt = *(--c);
313                                                 }
314                                         }
315                                 } else {
316                                         fmt_mod = FMT_MOD_NONE;
317                                 }
318                                 break;
319                         default:
320                                 fmt_mod = FMT_MOD_NONE;
321                                 break;
322                 }
323
324
325                 type_t            *expected_type;
326                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
327                 format_flags_t     allowed_flags;
328                 switch (fmt) {
329                         case 'd':
330                         case 'i':
331                                 switch (fmt_mod) {
332                                         case FMT_MOD_NONE: expected_type = type_int;         break;
333                                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
334                                         case FMT_MOD_h:    expected_type = type_short;       break;
335                                         case FMT_MOD_l:    expected_type = type_long;        break;
336                                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
337                                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
338                                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
339                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
340                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
341                                         case FMT_MOD_I32:  expected_type = type_int32;       break;
342                                         case FMT_MOD_I64:  expected_type = type_int64;       break;
343
344                                         default:
345                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
346                                                 goto next_arg;
347                                 }
348                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
349                                 break;
350
351                         case 'o':
352                         case 'X':
353                         case 'x':
354                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
355                                 goto eval_fmt_mod_unsigned;
356
357                         case 'u':
358                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
359 eval_fmt_mod_unsigned:
360                                 switch (fmt_mod) {
361                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
362                                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
363                                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
364                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
365                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
366                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
367                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
368                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
369                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
370                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
371                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
372
373                                         default:
374                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
375                                                 goto next_arg;
376                                 }
377                                 break;
378
379                         case 'A':
380                         case 'a':
381                         case 'E':
382                         case 'e':
383                         case 'F':
384                         case 'f':
385                         case 'G':
386                         case 'g':
387                                 switch (fmt_mod) {
388                                         case FMT_MOD_l:    /* l modifier is ignored */
389                                         case FMT_MOD_NONE: expected_type = type_double;      break;
390                                         case FMT_MOD_L:    expected_type = type_long_double; break;
391
392                                         default:
393                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
394                                                 goto next_arg;
395                                 }
396                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
397                                 break;
398
399                         case 'C':
400                                 if (fmt_mod != FMT_MOD_NONE) {
401                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
402                                         goto next_arg;
403                                 }
404                                 expected_type = type_wchar_t;
405                                 allowed_flags = FMT_FLAG_NONE;
406                                 break;
407
408                         case 'c':
409                                 expected_type = type_int;
410                                 switch (fmt_mod) {
411                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
412                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
413                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
414
415                                         default:
416                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
417                                                 goto next_arg;
418                                 }
419                                 allowed_flags = FMT_FLAG_NONE;
420                                 break;
421
422                         case 'S':
423                                 if (fmt_mod != FMT_MOD_NONE) {
424                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
425                                         goto next_arg;
426                                 }
427                                 expected_type = type_wchar_t_ptr;
428                                 expected_qual = TYPE_QUALIFIER_CONST;
429                                 allowed_flags = FMT_FLAG_MINUS;
430                                 break;
431
432                         case 's':
433                                 switch (fmt_mod) {
434                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
435                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
436                                         case FMT_MOD_w:    expected_type = type_wchar_t_ptr; break;
437
438                                         default:
439                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
440                                                 goto next_arg;
441                                 }
442                                 expected_qual = TYPE_QUALIFIER_CONST;
443                                 allowed_flags = FMT_FLAG_MINUS;
444                                 break;
445
446                         case 'p':
447                                 if (fmt_mod != FMT_MOD_NONE) {
448                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
449                                         goto next_arg;
450                                 }
451                                 expected_type = type_void_ptr;
452                                 allowed_flags = FMT_FLAG_NONE;
453                                 break;
454
455                         case 'n':
456                                 switch (fmt_mod) {
457                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
458                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
459                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
460                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
461                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
462                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
463                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
464                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
465
466                                         default:
467                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
468                                                 goto next_arg;
469                                 }
470                                 allowed_flags = FMT_FLAG_NONE;
471                                 break;
472
473                         default:
474                                 warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
475                                 if (arg == NULL) {
476                                         goto too_few_args;
477                                 }
478                                 goto next_arg;
479                 }
480
481                 format_flags_t wrong_flags = fmt_flags & ~allowed_flags;
482                 if (wrong_flags != 0) {
483                         char  wrong[8];
484                         char *p = wrong;
485                         if (wrong_flags & FMT_FLAG_HASH)  *p++ = '#';
486                         if (wrong_flags & FMT_FLAG_ZERO)  *p++ = '0';
487                         if (wrong_flags & FMT_FLAG_MINUS) *p++ = '-';
488                         if (wrong_flags & FMT_FLAG_SPACE) *p++ = ' ';
489                         if (wrong_flags & FMT_FLAG_PLUS)  *p++ = '+';
490                         if (wrong_flags & FMT_FLAG_TICK)  *p++ = '\'';
491                         *p = '\0';
492
493                         warningf(WARN_FORMAT, pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
494                 }
495
496                 if (arg == NULL) {
497 too_few_args:
498                         warningf(WARN_FORMAT, pos, "too few arguments for format string");
499                         return -1;
500                 }
501
502                 { /* create a scope here to prevent warning about the jump to next_arg */
503                         type_t *const arg_type           = arg->expression->base.type;
504                         type_t *const arg_skip           = skip_typeref(arg_type);
505                         type_t *const expected_type_skip = skip_typeref(expected_type);
506
507                         if (fmt == 'p') {
508                                 /* allow any pointer type for %p, not just void */
509                                 if (is_type_pointer(arg_skip))
510                                         goto next_arg;
511                         }
512
513                         if (is_type_pointer(expected_type_skip)) {
514                                 if (is_type_pointer(arg_skip)) {
515                                         type_t *const exp_to = skip_typeref(expected_type_skip->pointer.points_to);
516                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
517                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
518                                                 get_unqualified_type(arg_to) == exp_to) {
519                                                 goto next_arg;
520                                         }
521                                 }
522                         } else if (get_unqualified_type(arg_skip) == expected_type_skip) {
523                                 goto next_arg;
524                         } else if (arg->expression->kind == EXPR_UNARY_CAST_IMPLICIT) {
525                                 expression_t const *const expr        = arg->expression->unary.value;
526                                 type_t             *const unprom_type = skip_typeref(expr->base.type);
527                                 if (get_unqualified_type(unprom_type) == expected_type_skip) {
528                                         goto next_arg;
529                                 }
530                                 if (expected_type_skip == type_unsigned_int && !is_type_signed(unprom_type)) {
531                                         goto next_arg;
532                                 }
533                         }
534                         if (is_type_valid(arg_skip)) {
535                                 source_position_t const *const apos = &arg->expression->base.source_position;
536                                 char              const *const mod  = get_length_modifier_name(fmt_mod);
537                                 warningf(WARN_FORMAT, apos, "argument type '%T' does not match conversion specifier '%%%s%c' at position %u", arg_type, mod, (char)fmt, num_fmt);
538                         }
539                 }
540 next_arg:
541                 arg = arg->next;
542         }
543         assert(fmt == '\0');
544         if (c+1 < string + size) {
545                 warningf(WARN_FORMAT, pos, "format string contains '\\0'");
546         }
547         return num_args;
548 }
549
550 /**
551  * Check printf-style format.
552  */
553 static void check_printf_format(call_argument_t const *arg,
554                                 format_spec_t const *const spec)
555 {
556         /* find format arg */
557         size_t idx = 0;
558         for (; idx < spec->fmt_idx; ++idx) {
559                 if (arg == NULL)
560                         return;
561                 arg = arg->next;
562         }
563
564         expression_t const *const fmt_expr = arg->expression;
565
566         /* find the real args */
567         for (; idx < spec->arg_idx && arg != NULL; ++idx)
568                 arg = arg->next;
569
570         int const num_fmt = internal_check_printf_format(fmt_expr, arg, spec);
571         if (num_fmt < 0)
572                 return;
573
574         size_t num_args = 0;
575         for (; arg != NULL; arg = arg->next)
576                 ++num_args;
577         if (num_args > (size_t)num_fmt) {
578                 source_position_t const *const pos = &fmt_expr->base.source_position;
579                 warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt,  num_fmt  != 1 ? "s" : "");
580         }
581 }
582
583 /**
584  * Check scanf-style format.
585  */
586 static void check_scanf_format(const call_argument_t *arg,
587                                const format_spec_t *spec)
588 {
589         /* find format arg */
590         unsigned idx = 0;
591         for (; idx < spec->fmt_idx; ++idx) {
592                 if (arg == NULL)
593                         return;
594                 arg = arg->next;
595         }
596
597         const expression_t *fmt_expr = arg->expression;
598         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
599                 fmt_expr = fmt_expr->unary.value;
600         }
601
602         if (fmt_expr->kind != EXPR_STRING_LITERAL
603                         && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
604                 return;
605
606         const char *string = fmt_expr->literal.value.begin;
607         size_t      size   = fmt_expr->literal.value.size;
608         const char *c      = string;
609
610         /* find the real args */
611         for (; idx < spec->arg_idx && arg != NULL; ++idx)
612                 arg = arg->next;
613
614         const source_position_t *pos = &fmt_expr->base.source_position;
615         unsigned num_fmt = 0;
616         char     fmt;
617         for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
618                 if (fmt != '%')
619                         continue;
620                 fmt = *(++c);
621                 if (fmt == '%')
622                         continue;
623
624                 ++num_fmt;
625
626                 bool suppress_assignment = false;
627                 if (fmt == '*') {
628                         fmt = *++c;
629                         suppress_assignment = true;
630                 }
631
632                 size_t width = 0;
633                 if ('0' <= fmt && fmt <= '9') {
634                         do {
635                                 width = width * 10 + (fmt - '0');
636                                 fmt   = *++c;
637                         } while ('0' <= fmt && fmt <= '9');
638                         if (width == 0) {
639                                 warningf(WARN_FORMAT, pos, "field width is zero at format %u", num_fmt);
640                         }
641                 }
642
643                 /* look for length modifiers */
644                 format_length_modifier_t fmt_mod = FMT_MOD_NONE;
645                 switch (fmt) {
646                 case 'h':
647                         fmt = *(++c);
648                         if (fmt == 'h') {
649                                 fmt = *(++c);
650                                 fmt_mod = FMT_MOD_hh;
651                         } else {
652                                 fmt_mod = FMT_MOD_h;
653                         }
654                         break;
655
656                 case 'l':
657                         fmt = *(++c);
658                         if (fmt == 'l') {
659                                 fmt = *(++c);
660                                 fmt_mod = FMT_MOD_ll;
661                         } else {
662                                 fmt_mod = FMT_MOD_l;
663                         }
664                         break;
665
666                 case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break;
667                 case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break;
668                 case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break;
669                 case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break;
670                 /* microsoft mode */
671                 case 'w':
672                         if (c_mode & _MS) {
673                                 fmt = *(++c);
674                                 fmt_mod = FMT_MOD_w;
675                         }
676                         break;
677                 case 'I':
678                         if (c_mode & _MS) {
679                                 fmt = *(++c);
680                                 fmt_mod = FMT_MOD_I;
681                                 if (fmt == '3') {
682                                         fmt = *(++c);
683                                         if (fmt == '2') {
684                                                 fmt = *(++c);
685                                                 fmt_mod = FMT_MOD_I32;
686                                         } else {
687                                                 /* rewind */
688                                                 fmt = *(--c);
689                                         }
690                                 } else if (fmt == '6') {
691                                         fmt = *(++c);
692                                         if (fmt == '4') {
693                                                 fmt = *(++c);
694                                                 fmt_mod = FMT_MOD_I64;
695                                         } else {
696                                                 /* rewind */
697                                                 fmt = *(--c);
698                                         }
699                                 }
700                         }
701                         break;
702                 }
703
704                 if (fmt == '\0') {
705                         warningf(WARN_FORMAT, pos, "dangling %% with conversion specififer in format string");
706                         break;
707                 }
708
709                 type_t *expected_type;
710                 switch (fmt) {
711                 case 'd':
712                 case 'i':
713                         switch (fmt_mod) {
714                         case FMT_MOD_NONE: expected_type = type_int;         break;
715                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
716                         case FMT_MOD_h:    expected_type = type_short;       break;
717                         case FMT_MOD_l:    expected_type = type_long;        break;
718                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
719                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
720                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
721                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
722                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
723                         case FMT_MOD_I32:  expected_type = type_int32;       break;
724                         case FMT_MOD_I64:  expected_type = type_int64;       break;
725
726                         default:
727                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
728                                 goto next_arg;
729                         }
730                         break;
731
732                 case 'o':
733                 case 'X':
734                 case 'x':
735                 case 'u':
736                         switch (fmt_mod) {
737                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
738                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
739                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
740                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
741                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
742                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
743                         case FMT_MOD_z:    expected_type = type_size_t;             break;
744                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
745                         case FMT_MOD_I:    expected_type = type_size_t;             break;
746                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
747                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
748
749                         default:
750                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
751                                 goto next_arg;
752                         }
753                         break;
754
755                 case 'A':
756                 case 'a':
757                 case 'E':
758                 case 'e':
759                 case 'F':
760                 case 'f':
761                 case 'G':
762                 case 'g':
763                         switch (fmt_mod) {
764                         case FMT_MOD_l:    expected_type = type_double;      break;
765                         case FMT_MOD_NONE: expected_type = type_float;       break;
766                         case FMT_MOD_L:    expected_type = type_long_double; break;
767
768                         default:
769                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
770                                 goto next_arg;
771                         }
772                         break;
773
774                 case 'C':
775                         if (fmt_mod != FMT_MOD_NONE) {
776                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
777                                 goto next_arg;
778                         }
779                         expected_type = type_wchar_t;
780                         goto check_c_width;
781
782                 case 'c': {
783                         switch (fmt_mod) {
784                         case FMT_MOD_NONE: expected_type = type_char;    break;
785                         case FMT_MOD_l:    expected_type = type_wchar_t; break;
786                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
787
788                         default:
789                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
790                                 goto next_arg;
791                         }
792
793 check_c_width:
794                         if (width == 0)
795                                 width = 1;
796                         if (!suppress_assignment && arg != NULL) {
797                                 type_t *const type = skip_typeref(revert_automatic_type_conversion(arg->expression));
798                                 if (is_type_array(type)       &&
799                                     type->array.size_constant &&
800                                     width > type->array.size) {
801                                         warningf(WARN_FORMAT, pos, "target buffer '%T' is too small for %u characters at format %u", type, width, num_fmt);
802                                 }
803                         }
804                         break;
805                 }
806
807                 case 'S':
808                         if (fmt_mod != FMT_MOD_NONE) {
809                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
810                                 goto next_arg;
811                         }
812                         expected_type = type_wchar_t;
813                         break;
814
815                 case 's':
816                 case '[': {
817                         switch (fmt_mod) {
818                                 case FMT_MOD_NONE: expected_type = type_char;    break;
819                                 case FMT_MOD_l:    expected_type = type_wchar_t; break;
820                                 case FMT_MOD_w:    expected_type = type_wchar_t; break;
821
822                                 default:
823                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
824                                         goto next_arg;
825                         }
826
827                         if (!suppress_assignment &&
828                             width != 0           &&
829                             arg   != NULL) {
830                                 type_t *const type = skip_typeref(revert_automatic_type_conversion(arg->expression));
831                                 if (is_type_array(type)       &&
832                                     type->array.size_constant &&
833                                     width >= type->array.size) {
834                                         warningf(WARN_FORMAT, pos, "target buffer '%T' is too small for %u characters and \\0 at format %u", type, width, num_fmt);
835                                 }
836                         }
837                         break;
838                 }
839
840                 case 'p':
841                         if (fmt_mod != FMT_MOD_NONE) {
842                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
843                                 goto next_arg;
844                         }
845                         expected_type = type_void_ptr;
846                         break;
847
848                 case 'n': {
849                         if (suppress_assignment) {
850                                 warningf(WARN_FORMAT, pos, "conversion '%n' cannot be suppressed with '*' at format %u", num_fmt);
851                         }
852
853                         switch (fmt_mod) {
854                         case FMT_MOD_NONE: expected_type = type_int;         break;
855                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
856                         case FMT_MOD_h:    expected_type = type_short;       break;
857                         case FMT_MOD_l:    expected_type = type_long;        break;
858                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
859                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
860                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
861                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
862
863                         default:
864                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
865                                 goto next_arg;
866                         }
867                         break;
868                 }
869
870                 default:
871                         warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at format %u", fmt, num_fmt);
872                         if (suppress_assignment)
873                                 continue;
874                         if (arg == NULL)
875                                 goto too_few_args;
876                         goto next_arg;
877                 }
878
879                 if (suppress_assignment)
880                         continue;
881
882                 if (arg == NULL) {
883 too_few_args:
884                         warningf(WARN_FORMAT, pos, "too few arguments for format string");
885                         return;
886                 }
887
888                 { /* create a scope here to prevent warning about the jump to next_arg */
889                         type_t *const arg_type           = arg->expression->base.type;
890                         type_t *const arg_skip           = skip_typeref(arg_type);
891                         type_t *const expected_type_skip = skip_typeref(expected_type);
892
893                         if (! is_type_pointer(arg_skip))
894                                 goto error_arg_type;
895                         type_t *const ptr_skip = skip_typeref(arg_skip->pointer.points_to);
896
897                         if (fmt == 'p') {
898                                 /* allow any pointer type for %p, not just void */
899                                 if (is_type_pointer(ptr_skip))
900                                         goto next_arg;
901                         }
902
903                         /* do NOT allow const or restrict, all other should be ok */
904                         if (ptr_skip->base.qualifiers & (TYPE_QUALIFIER_CONST | TYPE_QUALIFIER_VOLATILE))
905                                 goto error_arg_type;
906                         type_t *const unqual_ptr = get_unqualified_type(ptr_skip);
907                         if (unqual_ptr == expected_type_skip) {
908                                 goto next_arg;
909                         } else if (expected_type_skip == type_char) {
910                                 /* char matches with unsigned char AND signed char */
911                                 if (unqual_ptr == type_signed_char || unqual_ptr == type_unsigned_char)
912                                         goto next_arg;
913                         }
914 error_arg_type:
915                         if (is_type_valid(arg_skip)) {
916                                 source_position_t const *const apos = &arg->expression->base.source_position;
917                                 char              const *const mod  = get_length_modifier_name(fmt_mod);
918                                 warningf(WARN_FORMAT, apos, "argument type '%T' does not match conversion specifier '%%%s%c' at position %u", arg_type, mod, (char)fmt, num_fmt);
919                         }
920                 }
921 next_arg:
922                 arg = arg->next;
923         }
924         assert(fmt == '\0');
925         if (c+1 < string + size) {
926                 warningf(WARN_FORMAT, pos, "format string contains '\\0'");
927         }
928         if (arg != NULL) {
929                 unsigned num_args = num_fmt;
930                 while (arg != NULL) {
931                         ++num_args;
932                         arg = arg->next;
933                 }
934                 warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt, num_fmt != 1 ? "s" : "");
935         }
936 }
937
938 static const format_spec_t builtin_table[] = {
939         { "printf",        FORMAT_PRINTF,   0, 1 },
940         { "wprintf",       FORMAT_PRINTF,   0, 1 },
941         { "sprintf",       FORMAT_PRINTF,   1, 2 },
942         { "swprintf",      FORMAT_PRINTF,   1, 2 },
943         { "snprintf",      FORMAT_PRINTF,   2, 3 },
944         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
945         { "fprintf",       FORMAT_PRINTF,   1, 2 },
946         { "fwprintf",      FORMAT_PRINTF,   1, 2 },
947         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
948         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
949
950         { "scanf",         FORMAT_SCANF,    0, 1 },
951         { "wscanf",        FORMAT_SCANF,    0, 1 },
952         { "sscanf",        FORMAT_SCANF,    1, 2 },
953         { "swscanf",       FORMAT_SCANF,    1, 2 },
954         { "fscanf",        FORMAT_SCANF,    1, 2 },
955         { "fwscanf",       FORMAT_SCANF,    1, 2 },
956
957         { "strftime",      FORMAT_STRFTIME, 3, 4 },
958         { "wcstrftime",    FORMAT_STRFTIME, 3, 4 },
959
960         { "strfmon",       FORMAT_STRFMON,  3, 4 },
961
962         /* MS extensions */
963         { "_snprintf",     FORMAT_PRINTF,   2, 3 },
964         { "_snwprintf",    FORMAT_PRINTF,   2, 3 },
965         { "_scrintf",      FORMAT_PRINTF,   0, 1 },
966         { "_scwprintf",    FORMAT_PRINTF,   0, 1 },
967         { "printf_s",      FORMAT_PRINTF,   0, 1 },
968         { "wprintf_s",     FORMAT_PRINTF,   0, 1 },
969         { "sprintf_s",     FORMAT_PRINTF,   3, 4 },
970         { "swprintf_s",    FORMAT_PRINTF,   3, 4 },
971         { "fprintf_s",     FORMAT_PRINTF,   1, 2 },
972         { "fwprintf_s",    FORMAT_PRINTF,   1, 2 },
973         { "_sprintf_l",    FORMAT_PRINTF,   1, 3 },
974         { "_swprintf_l",   FORMAT_PRINTF,   1, 3 },
975         { "_printf_l",     FORMAT_PRINTF,   0, 2 },
976         { "_wprintf_l",    FORMAT_PRINTF,   0, 2 },
977         { "_fprintf_l",    FORMAT_PRINTF,   1, 3 },
978         { "_fwprintf_l",   FORMAT_PRINTF,   1, 3 },
979         { "_printf_s_l",   FORMAT_PRINTF,   0, 2 },
980         { "_wprintf_s_l",  FORMAT_PRINTF,   0, 2 },
981         { "_sprintf_s_l",  FORMAT_PRINTF,   3, 5 },
982         { "_swprintf_s_l", FORMAT_PRINTF,   3, 5 },
983         { "_fprintf_s_l",  FORMAT_PRINTF,   1, 3 },
984         { "_fwprintf_s_l", FORMAT_PRINTF,   1, 3 },
985 };
986
987 void check_format(const call_expression_t *const call)
988 {
989         if (!is_warn_on(WARN_FORMAT))
990                 return;
991
992         const expression_t *const func_expr = call->function;
993         if (func_expr->kind != EXPR_REFERENCE)
994                 return;
995
996         const entity_t        *const entity = func_expr->reference.entity;
997         const call_argument_t *      arg    = call->arguments;
998
999         /*
1000          * For some functions we always check the format, even if it was not
1001          * specified. This allows to check format even in MS mode or without
1002          * header included.
1003          */
1004         const char *const name = entity->base.symbol->string;
1005         for (size_t i = 0; i < lengthof(builtin_table); ++i) {
1006                 if (strcmp(name, builtin_table[i].name) == 0) {
1007                         switch (builtin_table[i].fmt_kind) {
1008                         case FORMAT_PRINTF:
1009                                 check_printf_format(arg, &builtin_table[i]);
1010                                 break;
1011                         case FORMAT_SCANF:
1012                                 check_scanf_format(arg, &builtin_table[i]);
1013                                 break;
1014                         case FORMAT_STRFTIME:
1015                         case FORMAT_STRFMON:
1016                                 /* TODO: implement other cases */
1017                                 break;
1018                         }
1019                         break;
1020                 }
1021         }
1022 }