simple support for __attribute__((alias("symbol")))
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2012 Matthias Braun <matze@braunis.de>
4  */
5 #include <config.h>
6
7 #include <ctype.h>
8
9 #include "adt/strutil.h"
10 #include "adt/util.h"
11 #include "format_check.h"
12 #include "symbol_t.h"
13 #include "ast_t.h"
14 #include "entity_t.h"
15 #include "diagnostic.h"
16 #include "parser.h"
17 #include "types.h"
18 #include "type_t.h"
19 #include "warning.h"
20 #include "lang_features.h"
21
22 typedef enum format_flag_t {
23         FMT_FLAG_NONE  = 0,
24         FMT_FLAG_HASH  = 1U << 0,
25         FMT_FLAG_ZERO  = 1U << 1,
26         FMT_FLAG_MINUS = 1U << 2,
27         FMT_FLAG_SPACE = 1U << 3,
28         FMT_FLAG_PLUS  = 1U << 4,
29         FMT_FLAG_TICK  = 1U << 5
30 } format_flag_t;
31
32 typedef unsigned format_flags_t;
33
34 typedef enum format_length_modifier_t {
35         FMT_MOD_NONE,
36         FMT_MOD_L,
37         FMT_MOD_hh,
38         FMT_MOD_h,
39         FMT_MOD_l,
40         FMT_MOD_ll,
41         FMT_MOD_j,
42         FMT_MOD_t,
43         FMT_MOD_z,
44         FMT_MOD_q,
45         /* only in microsoft mode */
46         FMT_MOD_w,
47         FMT_MOD_I,
48         FMT_MOD_I32,
49         FMT_MOD_I64
50 } format_length_modifier_t;
51
52 typedef struct format_spec_t {
53         const char    *name;     /**< name of the function */
54         format_kind_t  fmt_kind; /**< kind */
55         unsigned       fmt_idx;  /**< index of the format string */
56         unsigned       arg_idx;  /**< index of the first argument */
57 } format_spec_t;
58
59 static const char* get_length_modifier_name(const format_length_modifier_t mod)
60 {
61         static const char* const names[] = {
62                 [FMT_MOD_NONE] = "",
63                 [FMT_MOD_L]    = "L",
64                 [FMT_MOD_hh]   = "hh",
65                 [FMT_MOD_h]    = "h",
66                 [FMT_MOD_l]    = "l",
67                 [FMT_MOD_ll]   = "ll",
68                 [FMT_MOD_j]    = "j",
69                 [FMT_MOD_t]    = "t",
70                 [FMT_MOD_z]    = "z",
71                 [FMT_MOD_q]    = "q",
72                 /* only in microsoft mode */
73                 [FMT_MOD_w]    = "w",
74                 [FMT_MOD_I]    = "I",
75                 [FMT_MOD_I32]  = "I32",
76                 [FMT_MOD_I64]  = "I64"
77         };
78         assert((size_t)mod < lengthof(names));
79         return names[mod];
80 }
81
82 static void warn_invalid_length_modifier(const position_t *pos,
83                                          const format_length_modifier_t mod,
84                                          const char conversion)
85 {
86         char const *const lmod = get_length_modifier_name(mod);
87         warningf(WARN_FORMAT, pos, "invalid length modifier '%s' for conversion specifier '%%%c'", lmod, conversion);
88 }
89
90 /**
91  * Check printf-style format. Returns number of expected arguments.
92  */
93 static int internal_check_printf_format(const expression_t *fmt_expr,
94                                         const call_argument_t *arg,
95                                         const format_spec_t *spec)
96 {
97         while (fmt_expr->kind == EXPR_UNARY_CAST) {
98                 fmt_expr = fmt_expr->unary.value;
99         }
100
101         /*
102          * gettext results in expressions like (X ? "format_string" : Y)
103          * we assume the left part is the format string
104          */
105         if (fmt_expr->kind == EXPR_CONDITIONAL) {
106                 conditional_expression_t const *const c = &fmt_expr->conditional;
107                 expression_t             const *      t = c->true_expression;
108                 if (t == NULL)
109                         t = c->condition;
110                 int const nt = internal_check_printf_format(t,                   arg, spec);
111                 int const nf = internal_check_printf_format(c->false_expression, arg, spec);
112                 return MAX(nt, nf);
113         }
114
115         if (fmt_expr->kind != EXPR_STRING_LITERAL)
116                 return -1;
117
118         const char *string = fmt_expr->string_literal.value.begin;
119         size_t      size   = fmt_expr->string_literal.value.size;
120         const char *c      = string;
121
122         const position_t *pos = &fmt_expr->base.pos;
123         unsigned num_fmt  = 0;
124         unsigned num_args = 0;
125         char     fmt;
126         for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
127                 if (fmt != '%')
128                         continue;
129                 fmt = *(++c);
130
131                 if (fmt == '%')
132                         continue;
133
134                 ++num_fmt;
135                 ++num_args;
136
137                 format_flags_t fmt_flags = FMT_FLAG_NONE;
138                 if (fmt == '0') {
139                         fmt = *(++c);
140                         fmt_flags |= FMT_FLAG_ZERO;
141                 }
142
143                 /* argument selector or minimum field width */
144                 if (isdigit(fmt)) {
145                         do {
146                                 fmt = *(++c);
147                         } while (isdigit(fmt));
148
149                         /* digit string was ... */
150                         if (fmt == '$') {
151                                 /* ... argument selector */
152                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
153                                 /* TODO implement */
154                                 return -1;
155                         }
156                         /* ... minimum field width */
157                 } else {
158                         /* flags */
159                         for (;;) {
160                                 format_flags_t flag;
161                                 switch (fmt) {
162                                         case '#':  flag = FMT_FLAG_HASH;  break;
163                                         case '0':  flag = FMT_FLAG_ZERO;  break;
164                                         case '-':  flag = FMT_FLAG_MINUS; break;
165                                         case '\'': flag = FMT_FLAG_TICK;  break;
166
167                                         case ' ':
168                                                 if (fmt_flags & FMT_FLAG_PLUS) {
169                                                         warningf(WARN_FORMAT, pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
170                                                 }
171                                                 flag = FMT_FLAG_SPACE;
172                                                 break;
173
174                                         case '+':
175                                                 if (fmt_flags & FMT_FLAG_SPACE) {
176                                                         warningf(WARN_FORMAT, pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
177                                                 }
178                                                 flag = FMT_FLAG_PLUS;
179                                                 break;
180
181                                         default: goto break_fmt_flags;
182                                 }
183                                 if (fmt_flags & flag) {
184                                         warningf(WARN_FORMAT, pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
185                                 }
186                                 fmt_flags |= flag;
187                                 fmt = *(++c);
188                         }
189 break_fmt_flags:
190
191                         /* minimum field width */
192                         if (fmt == '*') {
193                                 ++num_args;
194                                 fmt = *(++c);
195                                 if (arg == NULL) {
196                                         warningf(WARN_FORMAT, pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
197                                         return -1;
198                                 }
199                                 const type_t *const arg_type = arg->expression->base.type;
200                                 if (arg_type != type_int) {
201                                         warningf(WARN_FORMAT, pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
202                                 }
203                                 arg = arg->next;
204                         } else {
205                                 while (isdigit(fmt)) {
206                                         fmt = *(++c);
207                                 }
208                         }
209                 }
210
211                 /* precision */
212                 if (fmt == '.') {
213                         if (fmt_flags & FMT_FLAG_ZERO) {
214                                 warningf(WARN_FORMAT, pos, "'0' flag ignored with precision in conversion specification %u", num_fmt);
215                         }
216
217                         ++num_args;
218                         fmt = *(++c);
219                         if (fmt == '*') {
220                                 fmt = *(++c);
221                                 if (arg == NULL) {
222                                         warningf(WARN_FORMAT, pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
223                                         return -1;
224                                 }
225                                 const type_t *const arg_type = arg->expression->base.type;
226                                 if (arg_type != type_int) {
227                                         warningf(WARN_FORMAT, pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
228                                 }
229                                 arg = arg->next;
230                         } else {
231                                 /* digit string may be omitted */
232                                 while (isdigit(fmt)) {
233                                         fmt = *(++c);
234                                 }
235                         }
236                 }
237
238                 /* length modifier */
239                 format_length_modifier_t fmt_mod;
240                 switch (fmt) {
241                         case 'h':
242                                 fmt = *(++c);
243                                 if (fmt == 'h') {
244                                         fmt = *(++c);
245                                         fmt_mod = FMT_MOD_hh;
246                                 } else {
247                                         fmt_mod = FMT_MOD_h;
248                                 }
249                                 break;
250
251                         case 'l':
252                                 fmt = *(++c);
253                                 if (fmt == 'l') {
254                                         fmt = *(++c);
255                                         fmt_mod = FMT_MOD_ll;
256                                 } else {
257                                         fmt_mod = FMT_MOD_l;
258                                 }
259                                 break;
260
261                         case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L;    break;
262                         case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j;    break;
263                         case 't': fmt = *(++c); fmt_mod = FMT_MOD_t;    break;
264                         case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z;    break;
265                         case 'q': fmt = *(++c); fmt_mod = FMT_MOD_q;    break;
266                         /* microsoft mode */
267                         case 'w':
268                                 if (c_mode & _MS) {
269                                         fmt = *(++c); fmt_mod = FMT_MOD_w;
270                                 } else {
271                                         fmt_mod = FMT_MOD_NONE;
272                                 }
273                                 break;
274                         case 'I':
275                                 if (c_mode & _MS) {
276                                         fmt = *(++c); fmt_mod = FMT_MOD_I;
277                                         if (fmt == '3') {
278                                                 fmt = *(++c);
279                                                 if (fmt == '2') {
280                                                         fmt = *(++c);
281                                                         fmt_mod = FMT_MOD_I32;
282                                                 } else {
283                                                         /* rewind */
284                                                         fmt = *(--c);
285                                                 }
286                                         } else if (fmt == '6') {
287                                                 fmt = *(++c);
288                                                 if (fmt == '4') {
289                                                         fmt = *(++c);
290                                                         fmt_mod = FMT_MOD_I64;
291                                                 } else {
292                                                         /* rewind */
293                                                         fmt = *(--c);
294                                                 }
295                                         }
296                                 } else {
297                                         fmt_mod = FMT_MOD_NONE;
298                                 }
299                                 break;
300                         default:
301                                 fmt_mod = FMT_MOD_NONE;
302                                 break;
303                 }
304
305                 if (fmt == '\0') {
306                         warningf(WARN_FORMAT, pos, "dangling %% in format string");
307                         break;
308                 }
309
310                 type_t            *expected_type;
311                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
312                 format_flags_t     allowed_flags;
313                 switch (fmt) {
314                         case 'd':
315                         case 'i':
316                                 switch (fmt_mod) {
317                                         case FMT_MOD_NONE: expected_type = type_int;         break;
318                                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
319                                         case FMT_MOD_h:    expected_type = type_short;       break;
320                                         case FMT_MOD_l:    expected_type = type_long;        break;
321                                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
322                                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
323                                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
324                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
325                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
326                                         case FMT_MOD_I32:  expected_type = type_int32;       break;
327                                         case FMT_MOD_I64:  expected_type = type_int64;       break;
328
329                                         default:
330                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
331                                                 goto next_arg;
332                                 }
333                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
334                                 break;
335
336                         case 'o':
337                         case 'X':
338                         case 'x':
339                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
340                                 goto eval_fmt_mod_unsigned;
341
342                         case 'u':
343                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
344 eval_fmt_mod_unsigned:
345                                 switch (fmt_mod) {
346                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
347                                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
348                                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
349                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
350                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
351                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
352                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
353                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
354                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
355                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
356                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
357
358                                         default:
359                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
360                                                 goto next_arg;
361                                 }
362                                 break;
363
364                         case 'A':
365                         case 'a':
366                         case 'E':
367                         case 'e':
368                         case 'F':
369                         case 'f':
370                         case 'G':
371                         case 'g':
372                                 switch (fmt_mod) {
373                                         case FMT_MOD_l:    /* l modifier is ignored */
374                                         case FMT_MOD_NONE: expected_type = type_double;      break;
375                                         case FMT_MOD_L:    expected_type = type_long_double; break;
376
377                                         default:
378                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
379                                                 goto next_arg;
380                                 }
381                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
382                                 break;
383
384                         case 'C':
385                                 if (fmt_mod != FMT_MOD_NONE) {
386                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
387                                         goto next_arg;
388                                 }
389                                 expected_type = type_wchar_t;
390                                 allowed_flags = FMT_FLAG_NONE;
391                                 break;
392
393                         case 'c':
394                                 expected_type = type_int;
395                                 switch (fmt_mod) {
396                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
397                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
398                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
399
400                                         default:
401                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
402                                                 goto next_arg;
403                                 }
404                                 allowed_flags = FMT_FLAG_NONE;
405                                 break;
406
407                         case 'S':
408                                 if (fmt_mod != FMT_MOD_NONE) {
409                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
410                                         goto next_arg;
411                                 }
412                                 expected_type = type_wchar_t_ptr;
413                                 expected_qual = TYPE_QUALIFIER_CONST;
414                                 allowed_flags = FMT_FLAG_MINUS;
415                                 break;
416
417                         case 's':
418                                 switch (fmt_mod) {
419                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
420                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
421                                         case FMT_MOD_w:    expected_type = type_wchar_t_ptr; break;
422
423                                         default:
424                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
425                                                 goto next_arg;
426                                 }
427                                 expected_qual = TYPE_QUALIFIER_CONST;
428                                 allowed_flags = FMT_FLAG_MINUS;
429                                 break;
430
431                         case 'p':
432                                 if (fmt_mod != FMT_MOD_NONE) {
433                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
434                                         goto next_arg;
435                                 }
436                                 expected_type = type_void_ptr;
437                                 allowed_flags = FMT_FLAG_NONE;
438                                 break;
439
440                         case 'n':
441                                 switch (fmt_mod) {
442                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
443                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
444                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
445                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
446                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
447                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
448                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
449                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
450
451                                         default:
452                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
453                                                 goto next_arg;
454                                 }
455                                 allowed_flags = FMT_FLAG_NONE;
456                                 break;
457
458                         default:
459                                 warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
460                                 if (arg == NULL) {
461                                         goto too_few_args;
462                                 }
463                                 goto next_arg;
464                 }
465
466                 format_flags_t wrong_flags = fmt_flags & ~allowed_flags;
467                 if (wrong_flags != 0) {
468                         char  wrong[8];
469                         char *p = wrong;
470                         if (wrong_flags & FMT_FLAG_HASH)  *p++ = '#';
471                         if (wrong_flags & FMT_FLAG_ZERO)  *p++ = '0';
472                         if (wrong_flags & FMT_FLAG_MINUS) *p++ = '-';
473                         if (wrong_flags & FMT_FLAG_SPACE) *p++ = ' ';
474                         if (wrong_flags & FMT_FLAG_PLUS)  *p++ = '+';
475                         if (wrong_flags & FMT_FLAG_TICK)  *p++ = '\'';
476                         *p = '\0';
477
478                         warningf(WARN_FORMAT, pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
479                 }
480
481                 if (arg == NULL) {
482 too_few_args:
483                         warningf(WARN_FORMAT, pos, "too few arguments for format string");
484                         return -1;
485                 }
486
487                 { /* create a scope here to prevent warning about the jump to next_arg */
488                         type_t *const arg_type           = arg->expression->base.type;
489                         type_t *const arg_skip           = skip_typeref(arg_type);
490                         type_t *const expected_type_skip = skip_typeref(expected_type);
491
492                         if (fmt == 'p') {
493                                 /* allow any pointer type for %p, not just void */
494                                 if (is_type_pointer(arg_skip))
495                                         goto next_arg;
496                         }
497
498                         if (is_type_pointer(expected_type_skip)) {
499                                 if (is_type_pointer(arg_skip)) {
500                                         type_t *const exp_to = skip_typeref(expected_type_skip->pointer.points_to);
501                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
502                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
503                                                 get_unqualified_type(arg_to) == exp_to) {
504                                                 goto next_arg;
505                                         }
506                                 }
507                         } else if (get_unqualified_type(arg_skip) == expected_type_skip) {
508                                 goto next_arg;
509                         } else if (arg->expression->kind == EXPR_UNARY_CAST) {
510                                 expression_t const *const expr        = arg->expression->unary.value;
511                                 type_t             *const unprom_type = skip_typeref(expr->base.type);
512                                 if (get_unqualified_type(unprom_type) == expected_type_skip) {
513                                         goto next_arg;
514                                 }
515                                 if (expected_type_skip == type_unsigned_int && !is_type_signed(unprom_type)) {
516                                         goto next_arg;
517                                 }
518                         }
519                         if (is_type_valid(arg_skip)) {
520                                 position_t const *const apos = &arg->expression->base.pos;
521                                 char       const *const mod  = get_length_modifier_name(fmt_mod);
522                                 warningf(WARN_FORMAT, apos, "conversion '%%%s%c' at position %u specifies type '%T' but the argument has type '%T'", mod, (char)fmt, num_fmt, expected_type, arg_type);
523                         }
524                 }
525 next_arg:
526                 arg = arg->next;
527         }
528         assert(fmt == '\0');
529         if (c+1 < string + size) {
530                 warningf(WARN_FORMAT, pos, "format string contains '\\0'");
531         }
532         return num_args;
533 }
534
535 /**
536  * Check printf-style format.
537  */
538 static void check_printf_format(call_argument_t const *arg,
539                                 format_spec_t const *const spec)
540 {
541         /* find format arg */
542         size_t idx = 0;
543         for (; idx < spec->fmt_idx; ++idx) {
544                 if (arg == NULL)
545                         return;
546                 arg = arg->next;
547         }
548
549         expression_t const *const fmt_expr = arg->expression;
550
551         /* find the real args */
552         for (; idx < spec->arg_idx && arg != NULL; ++idx)
553                 arg = arg->next;
554
555         int const num_fmt = internal_check_printf_format(fmt_expr, arg, spec);
556         if (num_fmt < 0)
557                 return;
558
559         size_t num_args = 0;
560         for (; arg != NULL; arg = arg->next)
561                 ++num_args;
562         if (num_args > (size_t)num_fmt) {
563                 position_t const *const pos = &fmt_expr->base.pos;
564                 warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt,  num_fmt  != 1 ? "s" : "");
565         }
566 }
567
568 /**
569  * Check scanf-style format.
570  */
571 static void check_scanf_format(const call_argument_t *arg,
572                                const format_spec_t *spec)
573 {
574         /* find format arg */
575         unsigned idx = 0;
576         for (; idx < spec->fmt_idx; ++idx) {
577                 if (arg == NULL)
578                         return;
579                 arg = arg->next;
580         }
581
582         const expression_t *fmt_expr = arg->expression;
583         if (fmt_expr->kind == EXPR_UNARY_CAST) {
584                 fmt_expr = fmt_expr->unary.value;
585         }
586
587         if (fmt_expr->kind != EXPR_STRING_LITERAL)
588                 return;
589
590         const char *string = fmt_expr->string_literal.value.begin;
591         size_t      size   = fmt_expr->string_literal.value.size;
592         const char *c      = string;
593
594         /* find the real args */
595         for (; idx < spec->arg_idx && arg != NULL; ++idx)
596                 arg = arg->next;
597
598         const position_t *pos = &fmt_expr->base.pos;
599         unsigned num_fmt = 0;
600         char     fmt;
601         for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
602                 if (fmt != '%')
603                         continue;
604                 fmt = *(++c);
605                 if (fmt == '%')
606                         continue;
607
608                 ++num_fmt;
609
610                 bool suppress_assignment = false;
611                 if (fmt == '*') {
612                         fmt = *++c;
613                         suppress_assignment = true;
614                 }
615
616                 size_t width = 0;
617                 if ('0' <= fmt && fmt <= '9') {
618                         do {
619                                 width = width * 10 + (fmt - '0');
620                                 fmt   = *++c;
621                         } while ('0' <= fmt && fmt <= '9');
622                         if (width == 0) {
623                                 warningf(WARN_FORMAT, pos, "field width is zero at format %u", num_fmt);
624                         }
625                 }
626
627                 /* look for length modifiers */
628                 format_length_modifier_t fmt_mod = FMT_MOD_NONE;
629                 switch (fmt) {
630                 case 'h':
631                         fmt = *(++c);
632                         if (fmt == 'h') {
633                                 fmt = *(++c);
634                                 fmt_mod = FMT_MOD_hh;
635                         } else {
636                                 fmt_mod = FMT_MOD_h;
637                         }
638                         break;
639
640                 case 'l':
641                         fmt = *(++c);
642                         if (fmt == 'l') {
643                                 fmt = *(++c);
644                                 fmt_mod = FMT_MOD_ll;
645                         } else {
646                                 fmt_mod = FMT_MOD_l;
647                         }
648                         break;
649
650                 case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break;
651                 case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break;
652                 case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break;
653                 case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break;
654                 /* microsoft mode */
655                 case 'w':
656                         if (c_mode & _MS) {
657                                 fmt = *(++c);
658                                 fmt_mod = FMT_MOD_w;
659                         }
660                         break;
661                 case 'I':
662                         if (c_mode & _MS) {
663                                 fmt = *(++c);
664                                 fmt_mod = FMT_MOD_I;
665                                 if (fmt == '3') {
666                                         fmt = *(++c);
667                                         if (fmt == '2') {
668                                                 fmt = *(++c);
669                                                 fmt_mod = FMT_MOD_I32;
670                                         } else {
671                                                 /* rewind */
672                                                 fmt = *(--c);
673                                         }
674                                 } else if (fmt == '6') {
675                                         fmt = *(++c);
676                                         if (fmt == '4') {
677                                                 fmt = *(++c);
678                                                 fmt_mod = FMT_MOD_I64;
679                                         } else {
680                                                 /* rewind */
681                                                 fmt = *(--c);
682                                         }
683                                 }
684                         }
685                         break;
686                 }
687
688                 if (fmt == '\0') {
689                         warningf(WARN_FORMAT, pos, "dangling %% with conversion specififer in format string");
690                         break;
691                 }
692
693                 type_t *expected_type;
694                 switch (fmt) {
695                 case 'd':
696                 case 'i':
697                         switch (fmt_mod) {
698                         case FMT_MOD_NONE: expected_type = type_int;         break;
699                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
700                         case FMT_MOD_h:    expected_type = type_short;       break;
701                         case FMT_MOD_l:    expected_type = type_long;        break;
702                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
703                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
704                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
705                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
706                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
707                         case FMT_MOD_I32:  expected_type = type_int32;       break;
708                         case FMT_MOD_I64:  expected_type = type_int64;       break;
709
710                         default:
711                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
712                                 goto next_arg;
713                         }
714                         break;
715
716                 case 'o':
717                 case 'X':
718                 case 'x':
719                 case 'u':
720                         switch (fmt_mod) {
721                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
722                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
723                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
724                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
725                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
726                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
727                         case FMT_MOD_z:    expected_type = type_size_t;             break;
728                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
729                         case FMT_MOD_I:    expected_type = type_size_t;             break;
730                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
731                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
732
733                         default:
734                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
735                                 goto next_arg;
736                         }
737                         break;
738
739                 case 'A':
740                 case 'a':
741                 case 'E':
742                 case 'e':
743                 case 'F':
744                 case 'f':
745                 case 'G':
746                 case 'g':
747                         switch (fmt_mod) {
748                         case FMT_MOD_l:    expected_type = type_double;      break;
749                         case FMT_MOD_NONE: expected_type = type_float;       break;
750                         case FMT_MOD_L:    expected_type = type_long_double; break;
751
752                         default:
753                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
754                                 goto next_arg;
755                         }
756                         break;
757
758                 case 'C':
759                         if (fmt_mod != FMT_MOD_NONE) {
760                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
761                                 goto next_arg;
762                         }
763                         expected_type = type_wchar_t;
764                         goto check_c_width;
765
766                 case 'c': {
767                         switch (fmt_mod) {
768                         case FMT_MOD_NONE: expected_type = type_char;    break;
769                         case FMT_MOD_l:    expected_type = type_wchar_t; break;
770                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
771
772                         default:
773                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
774                                 goto next_arg;
775                         }
776
777 check_c_width:
778                         if (width == 0)
779                                 width = 1;
780                         if (!suppress_assignment && arg != NULL) {
781                                 type_t *const type = skip_typeref(revert_automatic_type_conversion(arg->expression));
782                                 if (is_type_array(type)       &&
783                                     type->array.size_constant &&
784                                     width > type->array.size) {
785                                         warningf(WARN_FORMAT, pos, "target buffer '%T' is too small for %u characters at format %u", type, width, num_fmt);
786                                 }
787                         }
788                         break;
789                 }
790
791                 case 'S':
792                         if (fmt_mod != FMT_MOD_NONE) {
793                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
794                                 goto next_arg;
795                         }
796                         expected_type = type_wchar_t;
797                         break;
798
799                 case 's':
800                 case '[': {
801                         switch (fmt_mod) {
802                                 case FMT_MOD_NONE: expected_type = type_char;    break;
803                                 case FMT_MOD_l:    expected_type = type_wchar_t; break;
804                                 case FMT_MOD_w:    expected_type = type_wchar_t; break;
805
806                                 default:
807                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
808                                         goto next_arg;
809                         }
810
811                         if (!suppress_assignment &&
812                             width != 0           &&
813                             arg   != NULL) {
814                                 type_t *const type = skip_typeref(revert_automatic_type_conversion(arg->expression));
815                                 if (is_type_array(type)       &&
816                                     type->array.size_constant &&
817                                     width >= type->array.size) {
818                                         warningf(WARN_FORMAT, pos, "target buffer '%T' is too small for %u characters and \\0 at format %u", type, width, num_fmt);
819                                 }
820                         }
821                         break;
822                 }
823
824                 case 'p':
825                         if (fmt_mod != FMT_MOD_NONE) {
826                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
827                                 goto next_arg;
828                         }
829                         expected_type = type_void;
830                         break;
831
832                 case 'n': {
833                         if (suppress_assignment) {
834                                 warningf(WARN_FORMAT, pos, "conversion '%n' cannot be suppressed with '*' at format %u", num_fmt);
835                         }
836
837                         switch (fmt_mod) {
838                         case FMT_MOD_NONE: expected_type = type_int;         break;
839                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
840                         case FMT_MOD_h:    expected_type = type_short;       break;
841                         case FMT_MOD_l:    expected_type = type_long;        break;
842                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
843                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
844                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
845                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
846
847                         default:
848                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
849                                 goto next_arg;
850                         }
851                         break;
852                 }
853
854                 default:
855                         warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at format %u", fmt, num_fmt);
856                         if (suppress_assignment)
857                                 continue;
858                         if (arg == NULL)
859                                 goto too_few_args;
860                         goto next_arg;
861                 }
862
863                 if (suppress_assignment)
864                         continue;
865
866                 if (arg == NULL) {
867 too_few_args:
868                         warningf(WARN_FORMAT, pos, "too few arguments for format string");
869                         return;
870                 }
871
872                 { /* create a scope here to prevent warning about the jump to next_arg */
873                         type_t *const arg_type           = arg->expression->base.type;
874                         type_t *const arg_skip           = skip_typeref(arg_type);
875                         type_t *const expected_type_skip = skip_typeref(expected_type);
876
877                         if (! is_type_pointer(arg_skip))
878                                 goto error_arg_type;
879                         type_t *const ptr_skip = skip_typeref(arg_skip->pointer.points_to);
880
881                         if (fmt == 'p') {
882                                 /* allow any pointer type for %p, not just void */
883                                 if (is_type_pointer(ptr_skip))
884                                         goto next_arg;
885                         }
886
887                         /* do NOT allow const or restrict, all other should be ok */
888                         if (ptr_skip->base.qualifiers & (TYPE_QUALIFIER_CONST | TYPE_QUALIFIER_VOLATILE))
889                                 goto error_arg_type;
890                         type_t *const unqual_ptr = get_unqualified_type(ptr_skip);
891                         if (unqual_ptr == expected_type_skip) {
892                                 goto next_arg;
893                         } else if (expected_type_skip == type_char) {
894                                 /* char matches with unsigned char AND signed char */
895                                 if (unqual_ptr == type_signed_char || unqual_ptr == type_unsigned_char)
896                                         goto next_arg;
897                         }
898 error_arg_type:
899                         if (is_type_valid(arg_skip)) {
900                                 position_t const *const apos = &arg->expression->base.pos;
901                                 char       const *const mod  = get_length_modifier_name(fmt_mod);
902                                 warningf(WARN_FORMAT, apos, "conversion '%%%s%c' at position %u specifies type '%T*' but the argument has type '%T'", mod, (char)fmt, num_fmt, expected_type, arg_type);
903                         }
904                 }
905 next_arg:
906                 arg = arg->next;
907         }
908         assert(fmt == '\0');
909         if (c+1 < string + size) {
910                 warningf(WARN_FORMAT, pos, "format string contains '\\0'");
911         }
912         if (arg != NULL) {
913                 unsigned num_args = num_fmt;
914                 while (arg != NULL) {
915                         ++num_args;
916                         arg = arg->next;
917                 }
918                 warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt, num_fmt != 1 ? "s" : "");
919         }
920 }
921
922 static const format_spec_t builtin_table[] = {
923         { "printf",        FORMAT_PRINTF,   0, 1 },
924         { "wprintf",       FORMAT_PRINTF,   0, 1 },
925         { "sprintf",       FORMAT_PRINTF,   1, 2 },
926         { "swprintf",      FORMAT_PRINTF,   1, 2 },
927         { "snprintf",      FORMAT_PRINTF,   2, 3 },
928         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
929         { "fprintf",       FORMAT_PRINTF,   1, 2 },
930         { "fwprintf",      FORMAT_PRINTF,   1, 2 },
931         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
932         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
933
934         { "scanf",         FORMAT_SCANF,    0, 1 },
935         { "wscanf",        FORMAT_SCANF,    0, 1 },
936         { "sscanf",        FORMAT_SCANF,    1, 2 },
937         { "swscanf",       FORMAT_SCANF,    1, 2 },
938         { "fscanf",        FORMAT_SCANF,    1, 2 },
939         { "fwscanf",       FORMAT_SCANF,    1, 2 },
940
941         { "strftime",      FORMAT_STRFTIME, 3, 4 },
942         { "wcstrftime",    FORMAT_STRFTIME, 3, 4 },
943
944         { "strfmon",       FORMAT_STRFMON,  3, 4 },
945
946         /* MS extensions */
947         { "_snprintf",     FORMAT_PRINTF,   2, 3 },
948         { "_snwprintf",    FORMAT_PRINTF,   2, 3 },
949         { "_scrintf",      FORMAT_PRINTF,   0, 1 },
950         { "_scwprintf",    FORMAT_PRINTF,   0, 1 },
951         { "printf_s",      FORMAT_PRINTF,   0, 1 },
952         { "wprintf_s",     FORMAT_PRINTF,   0, 1 },
953         { "sprintf_s",     FORMAT_PRINTF,   3, 4 },
954         { "swprintf_s",    FORMAT_PRINTF,   3, 4 },
955         { "fprintf_s",     FORMAT_PRINTF,   1, 2 },
956         { "fwprintf_s",    FORMAT_PRINTF,   1, 2 },
957         { "_sprintf_l",    FORMAT_PRINTF,   1, 3 },
958         { "_swprintf_l",   FORMAT_PRINTF,   1, 3 },
959         { "_printf_l",     FORMAT_PRINTF,   0, 2 },
960         { "_wprintf_l",    FORMAT_PRINTF,   0, 2 },
961         { "_fprintf_l",    FORMAT_PRINTF,   1, 3 },
962         { "_fwprintf_l",   FORMAT_PRINTF,   1, 3 },
963         { "_printf_s_l",   FORMAT_PRINTF,   0, 2 },
964         { "_wprintf_s_l",  FORMAT_PRINTF,   0, 2 },
965         { "_sprintf_s_l",  FORMAT_PRINTF,   3, 5 },
966         { "_swprintf_s_l", FORMAT_PRINTF,   3, 5 },
967         { "_fprintf_s_l",  FORMAT_PRINTF,   1, 3 },
968         { "_fwprintf_s_l", FORMAT_PRINTF,   1, 3 },
969 };
970
971 void check_format(const call_expression_t *const call)
972 {
973         if (!is_warn_on(WARN_FORMAT))
974                 return;
975
976         const expression_t *const func_expr = call->function;
977         if (func_expr->kind != EXPR_REFERENCE)
978                 return;
979
980         const entity_t        *const entity = func_expr->reference.entity;
981         const call_argument_t *      arg    = call->arguments;
982
983         /*
984          * For some functions we always check the format, even if it was not
985          * specified. This allows to check format even in MS mode or without
986          * header included.
987          */
988         const char *const name = entity->base.symbol->string;
989         for (size_t i = 0; i < lengthof(builtin_table); ++i) {
990                 if (streq(name, builtin_table[i].name)) {
991                         switch (builtin_table[i].fmt_kind) {
992                         case FORMAT_PRINTF:
993                                 check_printf_format(arg, &builtin_table[i]);
994                                 break;
995                         case FORMAT_SCANF:
996                                 check_scanf_format(arg, &builtin_table[i]);
997                                 break;
998                         case FORMAT_STRFTIME:
999                         case FORMAT_STRFMON:
1000                                 /* TODO: implement other cases */
1001                                 break;
1002                         }
1003                         break;
1004                 }
1005         }
1006 }