Support '*' (suppress assignment) in scanf() check.
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2009 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <config.h>
21
22 #include <ctype.h>
23
24 #include "adt/util.h"
25 #include "format_check.h"
26 #include "symbol_t.h"
27 #include "ast_t.h"
28 #include "entity_t.h"
29 #include "diagnostic.h"
30 #include "types.h"
31 #include "type_t.h"
32 #include "warning.h"
33 #include "lang_features.h"
34
35 typedef enum format_flag_t {
36         FMT_FLAG_NONE  = 0,
37         FMT_FLAG_HASH  = 1U << 0,
38         FMT_FLAG_ZERO  = 1U << 1,
39         FMT_FLAG_MINUS = 1U << 2,
40         FMT_FLAG_SPACE = 1U << 3,
41         FMT_FLAG_PLUS  = 1U << 4,
42         FMT_FLAG_TICK  = 1U << 5
43 } format_flag_t;
44
45 typedef unsigned format_flags_t;
46
47 typedef enum format_length_modifier_t {
48         FMT_MOD_NONE,
49         FMT_MOD_L,
50         FMT_MOD_hh,
51         FMT_MOD_h,
52         FMT_MOD_l,
53         FMT_MOD_ll,
54         FMT_MOD_j,
55         FMT_MOD_t,
56         FMT_MOD_z,
57         FMT_MOD_q,
58         /* only in microsoft mode */
59         FMT_MOD_w,
60         FMT_MOD_I,
61         FMT_MOD_I32,
62         FMT_MOD_I64
63 } format_length_modifier_t;
64
65 typedef struct format_spec_t {
66         const char    *name;     /**< name of the function */
67         format_kind_t  fmt_kind; /**< kind */
68         unsigned       fmt_idx;  /**< index of the format string */
69         unsigned       arg_idx;  /**< index of the first argument */
70 } format_spec_t;
71
72 static const char* get_length_modifier_name(const format_length_modifier_t mod)
73 {
74         static const char* const names[] = {
75                 [FMT_MOD_NONE] = "",
76                 [FMT_MOD_L]    = "L",
77                 [FMT_MOD_hh]   = "hh",
78                 [FMT_MOD_h]    = "h",
79                 [FMT_MOD_l]    = "l",
80                 [FMT_MOD_ll]   = "ll",
81                 [FMT_MOD_j]    = "j",
82                 [FMT_MOD_t]    = "t",
83                 [FMT_MOD_z]    = "z",
84                 [FMT_MOD_q]    = "q",
85                 /* only in microsoft mode */
86                 [FMT_MOD_w]    = "w",
87                 [FMT_MOD_I]    = "I",
88                 [FMT_MOD_I32]  = "I32",
89                 [FMT_MOD_I64]  = "I64"
90         };
91         assert((size_t)mod < lengthof(names));
92         return names[mod];
93 }
94
95 static void warn_invalid_length_modifier(const source_position_t *pos,
96                                          const format_length_modifier_t mod,
97                                          const utf32 conversion)
98 {
99         char const *const lmod = get_length_modifier_name(mod);
100         warningf(WARN_FORMAT, pos, "invalid length modifier '%s' for conversion specifier '%%%c'", lmod, conversion);
101 }
102
103 /**
104  * Check printf-style format. Returns number of expected arguments.
105  */
106 static int internal_check_printf_format(const expression_t *fmt_expr,
107                                         const call_argument_t *arg,
108                                         const format_spec_t *spec)
109 {
110         while (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
111                 fmt_expr = fmt_expr->unary.value;
112         }
113
114         /*
115          * gettext results in expressions like (X ? "format_string" : Y)
116          * we assume the left part is the format string
117          */
118         if (fmt_expr->kind == EXPR_CONDITIONAL) {
119                 conditional_expression_t const *const c = &fmt_expr->conditional;
120                 expression_t             const *      t = c->true_expression;
121                 if (t == NULL)
122                         t = c->condition;
123                 int const nt = internal_check_printf_format(t,                   arg, spec);
124                 int const nf = internal_check_printf_format(c->false_expression, arg, spec);
125                 return nt > nf ? nt : nf;
126         }
127
128         if (fmt_expr->kind != EXPR_STRING_LITERAL
129                         && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
130                 return -1;
131
132         const char *string = fmt_expr->literal.value.begin;
133         size_t      size   = fmt_expr->literal.value.size;
134         const char *c      = string;
135
136         const source_position_t *pos = &fmt_expr->base.source_position;
137         unsigned num_fmt  = 0;
138         unsigned num_args = 0;
139         char     fmt;
140         for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
141                 if (fmt != '%')
142                         continue;
143                 fmt = *(++c);
144
145                 if (fmt == '\0') {
146                         warningf(WARN_FORMAT, pos, "dangling %% in format string");
147                         break;
148                 }
149                 if (fmt == '%')
150                         continue;
151
152                 ++num_fmt;
153                 ++num_args;
154
155                 format_flags_t fmt_flags = FMT_FLAG_NONE;
156                 if (fmt == '0') {
157                         fmt = *(++c);
158                         fmt_flags |= FMT_FLAG_ZERO;
159                 }
160
161                 /* argument selector or minimum field width */
162                 if (isdigit(fmt)) {
163                         do {
164                                 fmt = *(++c);
165                         } while (isdigit(fmt));
166
167                         /* digit string was ... */
168                         if (fmt == '$') {
169                                 /* ... argument selector */
170                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
171                                 /* TODO implement */
172                                 return -1;
173                         }
174                         /* ... minimum field width */
175                 } else {
176                         /* flags */
177                         for (;;) {
178                                 format_flags_t flag;
179                                 switch (fmt) {
180                                         case '#':  flag = FMT_FLAG_HASH;  break;
181                                         case '0':  flag = FMT_FLAG_ZERO;  break;
182                                         case '-':  flag = FMT_FLAG_MINUS; break;
183                                         case '\'': flag = FMT_FLAG_TICK;  break;
184
185                                         case ' ':
186                                                 if (fmt_flags & FMT_FLAG_PLUS) {
187                                                         warningf(WARN_FORMAT, pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
188                                                 }
189                                                 flag = FMT_FLAG_SPACE;
190                                                 break;
191
192                                         case '+':
193                                                 if (fmt_flags & FMT_FLAG_SPACE) {
194                                                         warningf(WARN_FORMAT, pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
195                                                 }
196                                                 flag = FMT_FLAG_PLUS;
197                                                 break;
198
199                                         default: goto break_fmt_flags;
200                                 }
201                                 if (fmt_flags & flag) {
202                                         warningf(WARN_FORMAT, pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
203                                 }
204                                 fmt_flags |= flag;
205                                 fmt = *(++c);
206                         }
207 break_fmt_flags:
208
209                         /* minimum field width */
210                         if (fmt == '*') {
211                                 ++num_args;
212                                 fmt = *(++c);
213                                 if (arg == NULL) {
214                                         warningf(WARN_FORMAT, pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
215                                         return -1;
216                                 }
217                                 const type_t *const arg_type = arg->expression->base.type;
218                                 if (arg_type != type_int) {
219                                         warningf(WARN_FORMAT, pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
220                                 }
221                                 arg = arg->next;
222                         } else {
223                                 while (isdigit(fmt)) {
224                                         fmt = *(++c);
225                                 }
226                         }
227                 }
228
229                 /* precision */
230                 if (fmt == '.') {
231                         if (fmt_flags & FMT_FLAG_ZERO) {
232                                 warningf(WARN_FORMAT, pos, "'0' flag ignored with precision in conversion specification %u", num_fmt);
233                         }
234
235                         ++num_args;
236                         fmt = *(++c);
237                         if (fmt == '*') {
238                                 fmt = *(++c);
239                                 if (arg == NULL) {
240                                         warningf(WARN_FORMAT, pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
241                                         return -1;
242                                 }
243                                 const type_t *const arg_type = arg->expression->base.type;
244                                 if (arg_type != type_int) {
245                                         warningf(WARN_FORMAT, pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
246                                 }
247                                 arg = arg->next;
248                         } else {
249                                 /* digit string may be omitted */
250                                 while (isdigit(fmt)) {
251                                         fmt = *(++c);
252                                 }
253                         }
254                 }
255
256                 /* length modifier */
257                 format_length_modifier_t fmt_mod;
258                 switch (fmt) {
259                         case 'h':
260                                 fmt = *(++c);
261                                 if (fmt == 'h') {
262                                         fmt = *(++c);
263                                         fmt_mod = FMT_MOD_hh;
264                                 } else {
265                                         fmt_mod = FMT_MOD_h;
266                                 }
267                                 break;
268
269                         case 'l':
270                                 fmt = *(++c);
271                                 if (fmt == 'l') {
272                                         fmt = *(++c);
273                                         fmt_mod = FMT_MOD_ll;
274                                 } else {
275                                         fmt_mod = FMT_MOD_l;
276                                 }
277                                 break;
278
279                         case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L;    break;
280                         case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j;    break;
281                         case 't': fmt = *(++c); fmt_mod = FMT_MOD_t;    break;
282                         case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z;    break;
283                         case 'q': fmt = *(++c); fmt_mod = FMT_MOD_q;    break;
284                         /* microsoft mode */
285                         case 'w':
286                                 if (c_mode & _MS) {
287                                         fmt = *(++c); fmt_mod = FMT_MOD_w;
288                                 } else {
289                                         fmt_mod = FMT_MOD_NONE;
290                                 }
291                                 break;
292                         case 'I':
293                                 if (c_mode & _MS) {
294                                         fmt = *(++c); fmt_mod = FMT_MOD_I;
295                                         if (fmt == '3') {
296                                                 fmt = *(++c);
297                                                 if (fmt == '2') {
298                                                         fmt = *(++c);
299                                                         fmt_mod = FMT_MOD_I32;
300                                                 } else {
301                                                         /* rewind */
302                                                         fmt = *(--c);
303                                                 }
304                                         } else if (fmt == '6') {
305                                                 fmt = *(++c);
306                                                 if (fmt == '4') {
307                                                         fmt = *(++c);
308                                                         fmt_mod = FMT_MOD_I64;
309                                                 } else {
310                                                         /* rewind */
311                                                         fmt = *(--c);
312                                                 }
313                                         }
314                                 } else {
315                                         fmt_mod = FMT_MOD_NONE;
316                                 }
317                                 break;
318                         default:
319                                 fmt_mod = FMT_MOD_NONE;
320                                 break;
321                 }
322
323
324                 type_t            *expected_type;
325                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
326                 format_flags_t     allowed_flags;
327                 switch (fmt) {
328                         case 'd':
329                         case 'i':
330                                 switch (fmt_mod) {
331                                         case FMT_MOD_NONE: expected_type = type_int;         break;
332                                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
333                                         case FMT_MOD_h:    expected_type = type_short;       break;
334                                         case FMT_MOD_l:    expected_type = type_long;        break;
335                                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
336                                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
337                                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
338                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
339                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
340                                         case FMT_MOD_I32:  expected_type = type_int32;       break;
341                                         case FMT_MOD_I64:  expected_type = type_int64;       break;
342
343                                         default:
344                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
345                                                 goto next_arg;
346                                 }
347                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
348                                 break;
349
350                         case 'o':
351                         case 'X':
352                         case 'x':
353                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
354                                 goto eval_fmt_mod_unsigned;
355
356                         case 'u':
357                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
358 eval_fmt_mod_unsigned:
359                                 switch (fmt_mod) {
360                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
361                                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
362                                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
363                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
364                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
365                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
366                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
367                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
368                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
369                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
370                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
371
372                                         default:
373                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
374                                                 goto next_arg;
375                                 }
376                                 break;
377
378                         case 'A':
379                         case 'a':
380                         case 'E':
381                         case 'e':
382                         case 'F':
383                         case 'f':
384                         case 'G':
385                         case 'g':
386                                 switch (fmt_mod) {
387                                         case FMT_MOD_l:    /* l modifier is ignored */
388                                         case FMT_MOD_NONE: expected_type = type_double;      break;
389                                         case FMT_MOD_L:    expected_type = type_long_double; break;
390
391                                         default:
392                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
393                                                 goto next_arg;
394                                 }
395                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
396                                 break;
397
398                         case 'C':
399                                 if (fmt_mod != FMT_MOD_NONE) {
400                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
401                                         goto next_arg;
402                                 }
403                                 expected_type = type_wchar_t;
404                                 allowed_flags = FMT_FLAG_NONE;
405                                 break;
406
407                         case 'c':
408                                 expected_type = type_int;
409                                 switch (fmt_mod) {
410                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
411                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
412                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
413
414                                         default:
415                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
416                                                 goto next_arg;
417                                 }
418                                 allowed_flags = FMT_FLAG_NONE;
419                                 break;
420
421                         case 'S':
422                                 if (fmt_mod != FMT_MOD_NONE) {
423                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
424                                         goto next_arg;
425                                 }
426                                 expected_type = type_wchar_t_ptr;
427                                 expected_qual = TYPE_QUALIFIER_CONST;
428                                 allowed_flags = FMT_FLAG_MINUS;
429                                 break;
430
431                         case 's':
432                                 switch (fmt_mod) {
433                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
434                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
435                                         case FMT_MOD_w:    expected_type = type_wchar_t_ptr; break;
436
437                                         default:
438                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
439                                                 goto next_arg;
440                                 }
441                                 expected_qual = TYPE_QUALIFIER_CONST;
442                                 allowed_flags = FMT_FLAG_MINUS;
443                                 break;
444
445                         case 'p':
446                                 if (fmt_mod != FMT_MOD_NONE) {
447                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
448                                         goto next_arg;
449                                 }
450                                 expected_type = type_void_ptr;
451                                 allowed_flags = FMT_FLAG_NONE;
452                                 break;
453
454                         case 'n':
455                                 switch (fmt_mod) {
456                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
457                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
458                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
459                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
460                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
461                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
462                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
463                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
464
465                                         default:
466                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
467                                                 goto next_arg;
468                                 }
469                                 allowed_flags = FMT_FLAG_NONE;
470                                 break;
471
472                         default:
473                                 warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
474                                 if (arg == NULL) {
475                                         goto too_few_args;
476                                 }
477                                 goto next_arg;
478                 }
479
480                 format_flags_t wrong_flags = fmt_flags & ~allowed_flags;
481                 if (wrong_flags != 0) {
482                         char  wrong[8];
483                         char *p = wrong;
484                         if (wrong_flags & FMT_FLAG_HASH)  *p++ = '#';
485                         if (wrong_flags & FMT_FLAG_ZERO)  *p++ = '0';
486                         if (wrong_flags & FMT_FLAG_MINUS) *p++ = '-';
487                         if (wrong_flags & FMT_FLAG_SPACE) *p++ = ' ';
488                         if (wrong_flags & FMT_FLAG_PLUS)  *p++ = '+';
489                         if (wrong_flags & FMT_FLAG_TICK)  *p++ = '\'';
490                         *p = '\0';
491
492                         warningf(WARN_FORMAT, pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
493                 }
494
495                 if (arg == NULL) {
496 too_few_args:
497                         warningf(WARN_FORMAT, pos, "too few arguments for format string");
498                         return -1;
499                 }
500
501                 { /* create a scope here to prevent warning about the jump to next_arg */
502                         type_t *const arg_type           = arg->expression->base.type;
503                         type_t *const arg_skip           = skip_typeref(arg_type);
504                         type_t *const expected_type_skip = skip_typeref(expected_type);
505
506                         if (fmt == 'p') {
507                                 /* allow any pointer type for %p, not just void */
508                                 if (is_type_pointer(arg_skip))
509                                         goto next_arg;
510                         }
511
512                         if (is_type_pointer(expected_type_skip)) {
513                                 if (is_type_pointer(arg_skip)) {
514                                         type_t *const exp_to = skip_typeref(expected_type_skip->pointer.points_to);
515                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
516                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
517                                                 get_unqualified_type(arg_to) == exp_to) {
518                                                 goto next_arg;
519                                         }
520                                 }
521                         } else if (get_unqualified_type(arg_skip) == expected_type_skip) {
522                                 goto next_arg;
523                         } else if (arg->expression->kind == EXPR_UNARY_CAST_IMPLICIT) {
524                                 expression_t const *const expr        = arg->expression->unary.value;
525                                 type_t             *const unprom_type = skip_typeref(expr->base.type);
526                                 if (get_unqualified_type(unprom_type) == expected_type_skip) {
527                                         goto next_arg;
528                                 }
529                                 if (expected_type_skip == type_unsigned_int && !is_type_signed(unprom_type)) {
530                                         goto next_arg;
531                                 }
532                         }
533                         if (is_type_valid(arg_skip)) {
534                                 source_position_t const *const apos = &arg->expression->base.source_position;
535                                 char              const *const mod  = get_length_modifier_name(fmt_mod);
536                                 warningf(WARN_FORMAT, apos, "argument type '%T' does not match conversion specifier '%%%s%c' at position %u", arg_type, mod, (char)fmt, num_fmt);
537                         }
538                 }
539 next_arg:
540                 arg = arg->next;
541         }
542         assert(fmt == '\0');
543         if (c+1 < string + size) {
544                 warningf(WARN_FORMAT, pos, "format string contains '\\0'");
545         }
546         return num_args;
547 }
548
549 /**
550  * Check printf-style format.
551  */
552 static void check_printf_format(call_argument_t const *arg,
553                                 format_spec_t const *const spec)
554 {
555         /* find format arg */
556         size_t idx = 0;
557         for (; idx < spec->fmt_idx; ++idx) {
558                 if (arg == NULL)
559                         return;
560                 arg = arg->next;
561         }
562
563         expression_t const *const fmt_expr = arg->expression;
564
565         /* find the real args */
566         for (; idx < spec->arg_idx && arg != NULL; ++idx)
567                 arg = arg->next;
568
569         int const num_fmt = internal_check_printf_format(fmt_expr, arg, spec);
570         if (num_fmt < 0)
571                 return;
572
573         size_t num_args = 0;
574         for (; arg != NULL; arg = arg->next)
575                 ++num_args;
576         if (num_args > (size_t)num_fmt) {
577                 source_position_t const *const pos = &fmt_expr->base.source_position;
578                 warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt,  num_fmt  != 1 ? "s" : "");
579         }
580 }
581
582 /**
583  * Check scanf-style format.
584  */
585 static void check_scanf_format(const call_argument_t *arg,
586                                const format_spec_t *spec)
587 {
588         /* find format arg */
589         unsigned idx = 0;
590         for (; idx < spec->fmt_idx; ++idx) {
591                 if (arg == NULL)
592                         return;
593                 arg = arg->next;
594         }
595
596         const expression_t *fmt_expr = arg->expression;
597         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
598                 fmt_expr = fmt_expr->unary.value;
599         }
600
601         if (fmt_expr->kind != EXPR_STRING_LITERAL
602                         && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
603                 return;
604
605         const char *string = fmt_expr->literal.value.begin;
606         size_t      size   = fmt_expr->literal.value.size;
607         const char *c      = string;
608
609         /* find the real args */
610         for (; idx < spec->arg_idx && arg != NULL; ++idx)
611                 arg = arg->next;
612
613         const source_position_t *pos = &fmt_expr->base.source_position;
614         unsigned num_fmt = 0;
615         char     fmt;
616         for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
617                 if (fmt != '%')
618                         continue;
619                 fmt = *(++c);
620                 if (fmt == '%')
621                         continue;
622
623                 ++num_fmt;
624
625                 bool suppress_assignment = false;
626                 if (fmt == '*') {
627                         fmt = *++c;
628                         suppress_assignment = true;
629                 }
630
631                 /* look for length modifiers */
632                 format_length_modifier_t fmt_mod = FMT_MOD_NONE;
633                 switch (fmt) {
634                 case 'h':
635                         fmt = *(++c);
636                         if (fmt == 'h') {
637                                 fmt = *(++c);
638                                 fmt_mod = FMT_MOD_hh;
639                         } else {
640                                 fmt_mod = FMT_MOD_h;
641                         }
642                         break;
643
644                 case 'l':
645                         fmt = *(++c);
646                         if (fmt == 'l') {
647                                 fmt = *(++c);
648                                 fmt_mod = FMT_MOD_ll;
649                         } else {
650                                 fmt_mod = FMT_MOD_l;
651                         }
652                         break;
653
654                 case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break;
655                 case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break;
656                 case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break;
657                 case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break;
658                 /* microsoft mode */
659                 case 'w':
660                         if (c_mode & _MS) {
661                                 fmt = *(++c);
662                                 fmt_mod = FMT_MOD_w;
663                         }
664                         break;
665                 case 'I':
666                         if (c_mode & _MS) {
667                                 fmt = *(++c);
668                                 fmt_mod = FMT_MOD_I;
669                                 if (fmt == '3') {
670                                         fmt = *(++c);
671                                         if (fmt == '2') {
672                                                 fmt = *(++c);
673                                                 fmt_mod = FMT_MOD_I32;
674                                         } else {
675                                                 /* rewind */
676                                                 fmt = *(--c);
677                                         }
678                                 } else if (fmt == '6') {
679                                         fmt = *(++c);
680                                         if (fmt == '4') {
681                                                 fmt = *(++c);
682                                                 fmt_mod = FMT_MOD_I64;
683                                         } else {
684                                                 /* rewind */
685                                                 fmt = *(--c);
686                                         }
687                                 }
688                         }
689                         break;
690                 }
691
692                 if (fmt == '\0') {
693                         warningf(WARN_FORMAT, pos, "dangling %% with conversion specififer in format string");
694                         break;
695                 }
696
697                 type_t *expected_type;
698                 switch (fmt) {
699                 case 'd':
700                 case 'i':
701                         switch (fmt_mod) {
702                         case FMT_MOD_NONE: expected_type = type_int;         break;
703                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
704                         case FMT_MOD_h:    expected_type = type_short;       break;
705                         case FMT_MOD_l:    expected_type = type_long;        break;
706                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
707                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
708                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
709                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
710                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
711                         case FMT_MOD_I32:  expected_type = type_int32;       break;
712                         case FMT_MOD_I64:  expected_type = type_int64;       break;
713
714                         default:
715                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
716                                 goto next_arg;
717                         }
718                         break;
719
720                 case 'o':
721                 case 'X':
722                 case 'x':
723                 case 'u':
724                         switch (fmt_mod) {
725                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
726                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
727                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
728                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
729                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
730                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
731                         case FMT_MOD_z:    expected_type = type_size_t;             break;
732                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
733                         case FMT_MOD_I:    expected_type = type_size_t;             break;
734                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
735                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
736
737                         default:
738                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
739                                 goto next_arg;
740                         }
741                         break;
742
743                 case 'A':
744                 case 'a':
745                 case 'E':
746                 case 'e':
747                 case 'F':
748                 case 'f':
749                 case 'G':
750                 case 'g':
751                         switch (fmt_mod) {
752                         case FMT_MOD_l:    expected_type = type_double;      break;
753                         case FMT_MOD_NONE: expected_type = type_float;       break;
754                         case FMT_MOD_L:    expected_type = type_long_double; break;
755
756                         default:
757                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
758                                 goto next_arg;
759                         }
760                         break;
761
762                 case 'C':
763                         if (fmt_mod != FMT_MOD_NONE) {
764                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
765                                 goto next_arg;
766                         }
767                         expected_type = type_wchar_t;
768                         break;
769
770                 case 'c':
771                         expected_type = type_int;
772                         switch (fmt_mod) {
773                         case FMT_MOD_NONE: expected_type = type_char;    break;
774                         case FMT_MOD_l:    expected_type = type_wchar_t; break;
775                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
776
777                         default:
778                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
779                                 goto next_arg;
780                         }
781                         break;
782
783                 case 'S':
784                         if (fmt_mod != FMT_MOD_NONE) {
785                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
786                                 goto next_arg;
787                         }
788                         expected_type = type_wchar_t;
789                         break;
790
791                 case 's':
792                 case '[':
793                         switch (fmt_mod) {
794                                 case FMT_MOD_NONE: expected_type = type_char;    break;
795                                 case FMT_MOD_l:    expected_type = type_wchar_t; break;
796                                 case FMT_MOD_w:    expected_type = type_wchar_t; break;
797
798                                 default:
799                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
800                                         goto next_arg;
801                         }
802                         break;
803
804                 case 'p':
805                         if (fmt_mod != FMT_MOD_NONE) {
806                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
807                                 goto next_arg;
808                         }
809                         expected_type = type_void_ptr;
810                         break;
811
812                 case 'n': {
813                         if (suppress_assignment) {
814                                 warningf(WARN_FORMAT, pos, "conversion '%n' cannot be suppressed with '*' at format %u", num_fmt);
815                         }
816
817                         switch (fmt_mod) {
818                         case FMT_MOD_NONE: expected_type = type_int;         break;
819                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
820                         case FMT_MOD_h:    expected_type = type_short;       break;
821                         case FMT_MOD_l:    expected_type = type_long;        break;
822                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
823                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
824                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
825                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
826
827                         default:
828                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
829                                 goto next_arg;
830                         }
831                         break;
832                 }
833
834                 default:
835                         warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at format %u", fmt, num_fmt);
836                         if (suppress_assignment)
837                                 continue;
838                         if (arg == NULL)
839                                 goto too_few_args;
840                         goto next_arg;
841                 }
842
843                 if (suppress_assignment)
844                         continue;
845
846                 if (arg == NULL) {
847 too_few_args:
848                         warningf(WARN_FORMAT, pos, "too few arguments for format string");
849                         return;
850                 }
851
852                 { /* create a scope here to prevent warning about the jump to next_arg */
853                         type_t *const arg_type           = arg->expression->base.type;
854                         type_t *const arg_skip           = skip_typeref(arg_type);
855                         type_t *const expected_type_skip = skip_typeref(expected_type);
856
857                         if (! is_type_pointer(arg_skip))
858                                 goto error_arg_type;
859                         type_t *const ptr_skip = skip_typeref(arg_skip->pointer.points_to);
860
861                         if (fmt == 'p') {
862                                 /* allow any pointer type for %p, not just void */
863                                 if (is_type_pointer(ptr_skip))
864                                         goto next_arg;
865                         }
866
867                         /* do NOT allow const or restrict, all other should be ok */
868                         if (ptr_skip->base.qualifiers & (TYPE_QUALIFIER_CONST | TYPE_QUALIFIER_VOLATILE))
869                                 goto error_arg_type;
870                         type_t *const unqual_ptr = get_unqualified_type(ptr_skip);
871                         if (unqual_ptr == expected_type_skip) {
872                                 goto next_arg;
873                         } else if (expected_type_skip == type_char) {
874                                 /* char matches with unsigned char AND signed char */
875                                 if (unqual_ptr == type_signed_char || unqual_ptr == type_unsigned_char)
876                                         goto next_arg;
877                         }
878 error_arg_type:
879                         if (is_type_valid(arg_skip)) {
880                                 source_position_t const *const apos = &arg->expression->base.source_position;
881                                 char              const *const mod  = get_length_modifier_name(fmt_mod);
882                                 warningf(WARN_FORMAT, apos, "argument type '%T' does not match conversion specifier '%%%s%c' at position %u", arg_type, mod, (char)fmt, num_fmt);
883                         }
884                 }
885 next_arg:
886                 arg = arg->next;
887         }
888         assert(fmt == '\0');
889         if (c+1 < string + size) {
890                 warningf(WARN_FORMAT, pos, "format string contains '\\0'");
891         }
892         if (arg != NULL) {
893                 unsigned num_args = num_fmt;
894                 while (arg != NULL) {
895                         ++num_args;
896                         arg = arg->next;
897                 }
898                 warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt, num_fmt != 1 ? "s" : "");
899         }
900 }
901
902 static const format_spec_t builtin_table[] = {
903         { "printf",        FORMAT_PRINTF,   0, 1 },
904         { "wprintf",       FORMAT_PRINTF,   0, 1 },
905         { "sprintf",       FORMAT_PRINTF,   1, 2 },
906         { "swprintf",      FORMAT_PRINTF,   1, 2 },
907         { "snprintf",      FORMAT_PRINTF,   2, 3 },
908         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
909         { "fprintf",       FORMAT_PRINTF,   1, 2 },
910         { "fwprintf",      FORMAT_PRINTF,   1, 2 },
911         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
912         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
913
914         { "scanf",         FORMAT_SCANF,    0, 1 },
915         { "wscanf",        FORMAT_SCANF,    0, 1 },
916         { "sscanf",        FORMAT_SCANF,    1, 2 },
917         { "swscanf",       FORMAT_SCANF,    1, 2 },
918         { "fscanf",        FORMAT_SCANF,    1, 2 },
919         { "fwscanf",       FORMAT_SCANF,    1, 2 },
920
921         { "strftime",      FORMAT_STRFTIME, 3, 4 },
922         { "wcstrftime",    FORMAT_STRFTIME, 3, 4 },
923
924         { "strfmon",       FORMAT_STRFMON,  3, 4 },
925
926         /* MS extensions */
927         { "_snprintf",     FORMAT_PRINTF,   2, 3 },
928         { "_snwprintf",    FORMAT_PRINTF,   2, 3 },
929         { "_scrintf",      FORMAT_PRINTF,   0, 1 },
930         { "_scwprintf",    FORMAT_PRINTF,   0, 1 },
931         { "printf_s",      FORMAT_PRINTF,   0, 1 },
932         { "wprintf_s",     FORMAT_PRINTF,   0, 1 },
933         { "sprintf_s",     FORMAT_PRINTF,   3, 4 },
934         { "swprintf_s",    FORMAT_PRINTF,   3, 4 },
935         { "fprintf_s",     FORMAT_PRINTF,   1, 2 },
936         { "fwprintf_s",    FORMAT_PRINTF,   1, 2 },
937         { "_sprintf_l",    FORMAT_PRINTF,   1, 3 },
938         { "_swprintf_l",   FORMAT_PRINTF,   1, 3 },
939         { "_printf_l",     FORMAT_PRINTF,   0, 2 },
940         { "_wprintf_l",    FORMAT_PRINTF,   0, 2 },
941         { "_fprintf_l",    FORMAT_PRINTF,   1, 3 },
942         { "_fwprintf_l",   FORMAT_PRINTF,   1, 3 },
943         { "_printf_s_l",   FORMAT_PRINTF,   0, 2 },
944         { "_wprintf_s_l",  FORMAT_PRINTF,   0, 2 },
945         { "_sprintf_s_l",  FORMAT_PRINTF,   3, 5 },
946         { "_swprintf_s_l", FORMAT_PRINTF,   3, 5 },
947         { "_fprintf_s_l",  FORMAT_PRINTF,   1, 3 },
948         { "_fwprintf_s_l", FORMAT_PRINTF,   1, 3 },
949 };
950
951 void check_format(const call_expression_t *const call)
952 {
953         if (!is_warn_on(WARN_FORMAT))
954                 return;
955
956         const expression_t *const func_expr = call->function;
957         if (func_expr->kind != EXPR_REFERENCE)
958                 return;
959
960         const entity_t        *const entity = func_expr->reference.entity;
961         const call_argument_t *      arg    = call->arguments;
962
963         /*
964          * For some functions we always check the format, even if it was not
965          * specified. This allows to check format even in MS mode or without
966          * header included.
967          */
968         const char *const name = entity->base.symbol->string;
969         for (size_t i = 0; i < lengthof(builtin_table); ++i) {
970                 if (strcmp(name, builtin_table[i].name) == 0) {
971                         switch (builtin_table[i].fmt_kind) {
972                         case FORMAT_PRINTF:
973                                 check_printf_format(arg, &builtin_table[i]);
974                                 break;
975                         case FORMAT_SCANF:
976                                 check_scanf_format(arg, &builtin_table[i]);
977                                 break;
978                         case FORMAT_STRFTIME:
979                         case FORMAT_STRFMON:
980                                 /* TODO: implement other cases */
981                                 break;
982                         }
983                         break;
984                 }
985         }
986 }