Correct several uses of expr->literal to expr->string_literal.
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2009 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <config.h>
21
22 #include <ctype.h>
23
24 #include "adt/strutil.h"
25 #include "adt/util.h"
26 #include "format_check.h"
27 #include "symbol_t.h"
28 #include "ast_t.h"
29 #include "entity_t.h"
30 #include "diagnostic.h"
31 #include "parser.h"
32 #include "types.h"
33 #include "type_t.h"
34 #include "warning.h"
35 #include "lang_features.h"
36
37 typedef enum format_flag_t {
38         FMT_FLAG_NONE  = 0,
39         FMT_FLAG_HASH  = 1U << 0,
40         FMT_FLAG_ZERO  = 1U << 1,
41         FMT_FLAG_MINUS = 1U << 2,
42         FMT_FLAG_SPACE = 1U << 3,
43         FMT_FLAG_PLUS  = 1U << 4,
44         FMT_FLAG_TICK  = 1U << 5
45 } format_flag_t;
46
47 typedef unsigned format_flags_t;
48
49 typedef enum format_length_modifier_t {
50         FMT_MOD_NONE,
51         FMT_MOD_L,
52         FMT_MOD_hh,
53         FMT_MOD_h,
54         FMT_MOD_l,
55         FMT_MOD_ll,
56         FMT_MOD_j,
57         FMT_MOD_t,
58         FMT_MOD_z,
59         FMT_MOD_q,
60         /* only in microsoft mode */
61         FMT_MOD_w,
62         FMT_MOD_I,
63         FMT_MOD_I32,
64         FMT_MOD_I64
65 } format_length_modifier_t;
66
67 typedef struct format_spec_t {
68         const char    *name;     /**< name of the function */
69         format_kind_t  fmt_kind; /**< kind */
70         unsigned       fmt_idx;  /**< index of the format string */
71         unsigned       arg_idx;  /**< index of the first argument */
72 } format_spec_t;
73
74 static const char* get_length_modifier_name(const format_length_modifier_t mod)
75 {
76         static const char* const names[] = {
77                 [FMT_MOD_NONE] = "",
78                 [FMT_MOD_L]    = "L",
79                 [FMT_MOD_hh]   = "hh",
80                 [FMT_MOD_h]    = "h",
81                 [FMT_MOD_l]    = "l",
82                 [FMT_MOD_ll]   = "ll",
83                 [FMT_MOD_j]    = "j",
84                 [FMT_MOD_t]    = "t",
85                 [FMT_MOD_z]    = "z",
86                 [FMT_MOD_q]    = "q",
87                 /* only in microsoft mode */
88                 [FMT_MOD_w]    = "w",
89                 [FMT_MOD_I]    = "I",
90                 [FMT_MOD_I32]  = "I32",
91                 [FMT_MOD_I64]  = "I64"
92         };
93         assert((size_t)mod < lengthof(names));
94         return names[mod];
95 }
96
97 static void warn_invalid_length_modifier(const source_position_t *pos,
98                                          const format_length_modifier_t mod,
99                                          const utf32 conversion)
100 {
101         char const *const lmod = get_length_modifier_name(mod);
102         warningf(WARN_FORMAT, pos, "invalid length modifier '%s' for conversion specifier '%%%c'", lmod, conversion);
103 }
104
105 /**
106  * Check printf-style format. Returns number of expected arguments.
107  */
108 static int internal_check_printf_format(const expression_t *fmt_expr,
109                                         const call_argument_t *arg,
110                                         const format_spec_t *spec)
111 {
112         while (fmt_expr->kind == EXPR_UNARY_CAST) {
113                 fmt_expr = fmt_expr->unary.value;
114         }
115
116         /*
117          * gettext results in expressions like (X ? "format_string" : Y)
118          * we assume the left part is the format string
119          */
120         if (fmt_expr->kind == EXPR_CONDITIONAL) {
121                 conditional_expression_t const *const c = &fmt_expr->conditional;
122                 expression_t             const *      t = c->true_expression;
123                 if (t == NULL)
124                         t = c->condition;
125                 int const nt = internal_check_printf_format(t,                   arg, spec);
126                 int const nf = internal_check_printf_format(c->false_expression, arg, spec);
127                 return nt > nf ? nt : nf;
128         }
129
130         if (fmt_expr->kind != EXPR_STRING_LITERAL
131                         && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
132                 return -1;
133
134         const char *string = fmt_expr->string_literal.value.begin;
135         size_t      size   = fmt_expr->string_literal.value.size;
136         const char *c      = string;
137
138         const source_position_t *pos = &fmt_expr->base.source_position;
139         unsigned num_fmt  = 0;
140         unsigned num_args = 0;
141         char     fmt;
142         for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
143                 if (fmt != '%')
144                         continue;
145                 fmt = *(++c);
146
147                 if (fmt == '%')
148                         continue;
149
150                 ++num_fmt;
151                 ++num_args;
152
153                 format_flags_t fmt_flags = FMT_FLAG_NONE;
154                 if (fmt == '0') {
155                         fmt = *(++c);
156                         fmt_flags |= FMT_FLAG_ZERO;
157                 }
158
159                 /* argument selector or minimum field width */
160                 if (isdigit(fmt)) {
161                         do {
162                                 fmt = *(++c);
163                         } while (isdigit(fmt));
164
165                         /* digit string was ... */
166                         if (fmt == '$') {
167                                 /* ... argument selector */
168                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
169                                 /* TODO implement */
170                                 return -1;
171                         }
172                         /* ... minimum field width */
173                 } else {
174                         /* flags */
175                         for (;;) {
176                                 format_flags_t flag;
177                                 switch (fmt) {
178                                         case '#':  flag = FMT_FLAG_HASH;  break;
179                                         case '0':  flag = FMT_FLAG_ZERO;  break;
180                                         case '-':  flag = FMT_FLAG_MINUS; break;
181                                         case '\'': flag = FMT_FLAG_TICK;  break;
182
183                                         case ' ':
184                                                 if (fmt_flags & FMT_FLAG_PLUS) {
185                                                         warningf(WARN_FORMAT, pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
186                                                 }
187                                                 flag = FMT_FLAG_SPACE;
188                                                 break;
189
190                                         case '+':
191                                                 if (fmt_flags & FMT_FLAG_SPACE) {
192                                                         warningf(WARN_FORMAT, pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
193                                                 }
194                                                 flag = FMT_FLAG_PLUS;
195                                                 break;
196
197                                         default: goto break_fmt_flags;
198                                 }
199                                 if (fmt_flags & flag) {
200                                         warningf(WARN_FORMAT, pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
201                                 }
202                                 fmt_flags |= flag;
203                                 fmt = *(++c);
204                         }
205 break_fmt_flags:
206
207                         /* minimum field width */
208                         if (fmt == '*') {
209                                 ++num_args;
210                                 fmt = *(++c);
211                                 if (arg == NULL) {
212                                         warningf(WARN_FORMAT, pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
213                                         return -1;
214                                 }
215                                 const type_t *const arg_type = arg->expression->base.type;
216                                 if (arg_type != type_int) {
217                                         warningf(WARN_FORMAT, pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
218                                 }
219                                 arg = arg->next;
220                         } else {
221                                 while (isdigit(fmt)) {
222                                         fmt = *(++c);
223                                 }
224                         }
225                 }
226
227                 /* precision */
228                 if (fmt == '.') {
229                         if (fmt_flags & FMT_FLAG_ZERO) {
230                                 warningf(WARN_FORMAT, pos, "'0' flag ignored with precision in conversion specification %u", num_fmt);
231                         }
232
233                         ++num_args;
234                         fmt = *(++c);
235                         if (fmt == '*') {
236                                 fmt = *(++c);
237                                 if (arg == NULL) {
238                                         warningf(WARN_FORMAT, pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
239                                         return -1;
240                                 }
241                                 const type_t *const arg_type = arg->expression->base.type;
242                                 if (arg_type != type_int) {
243                                         warningf(WARN_FORMAT, pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
244                                 }
245                                 arg = arg->next;
246                         } else {
247                                 /* digit string may be omitted */
248                                 while (isdigit(fmt)) {
249                                         fmt = *(++c);
250                                 }
251                         }
252                 }
253
254                 /* length modifier */
255                 format_length_modifier_t fmt_mod;
256                 switch (fmt) {
257                         case 'h':
258                                 fmt = *(++c);
259                                 if (fmt == 'h') {
260                                         fmt = *(++c);
261                                         fmt_mod = FMT_MOD_hh;
262                                 } else {
263                                         fmt_mod = FMT_MOD_h;
264                                 }
265                                 break;
266
267                         case 'l':
268                                 fmt = *(++c);
269                                 if (fmt == 'l') {
270                                         fmt = *(++c);
271                                         fmt_mod = FMT_MOD_ll;
272                                 } else {
273                                         fmt_mod = FMT_MOD_l;
274                                 }
275                                 break;
276
277                         case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L;    break;
278                         case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j;    break;
279                         case 't': fmt = *(++c); fmt_mod = FMT_MOD_t;    break;
280                         case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z;    break;
281                         case 'q': fmt = *(++c); fmt_mod = FMT_MOD_q;    break;
282                         /* microsoft mode */
283                         case 'w':
284                                 if (c_mode & _MS) {
285                                         fmt = *(++c); fmt_mod = FMT_MOD_w;
286                                 } else {
287                                         fmt_mod = FMT_MOD_NONE;
288                                 }
289                                 break;
290                         case 'I':
291                                 if (c_mode & _MS) {
292                                         fmt = *(++c); fmt_mod = FMT_MOD_I;
293                                         if (fmt == '3') {
294                                                 fmt = *(++c);
295                                                 if (fmt == '2') {
296                                                         fmt = *(++c);
297                                                         fmt_mod = FMT_MOD_I32;
298                                                 } else {
299                                                         /* rewind */
300                                                         fmt = *(--c);
301                                                 }
302                                         } else if (fmt == '6') {
303                                                 fmt = *(++c);
304                                                 if (fmt == '4') {
305                                                         fmt = *(++c);
306                                                         fmt_mod = FMT_MOD_I64;
307                                                 } else {
308                                                         /* rewind */
309                                                         fmt = *(--c);
310                                                 }
311                                         }
312                                 } else {
313                                         fmt_mod = FMT_MOD_NONE;
314                                 }
315                                 break;
316                         default:
317                                 fmt_mod = FMT_MOD_NONE;
318                                 break;
319                 }
320
321                 if (fmt == '\0') {
322                         warningf(WARN_FORMAT, pos, "dangling %% in format string");
323                         break;
324                 }
325
326                 type_t            *expected_type;
327                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
328                 format_flags_t     allowed_flags;
329                 switch (fmt) {
330                         case 'd':
331                         case 'i':
332                                 switch (fmt_mod) {
333                                         case FMT_MOD_NONE: expected_type = type_int;         break;
334                                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
335                                         case FMT_MOD_h:    expected_type = type_short;       break;
336                                         case FMT_MOD_l:    expected_type = type_long;        break;
337                                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
338                                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
339                                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
340                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
341                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
342                                         case FMT_MOD_I32:  expected_type = type_int32;       break;
343                                         case FMT_MOD_I64:  expected_type = type_int64;       break;
344
345                                         default:
346                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
347                                                 goto next_arg;
348                                 }
349                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
350                                 break;
351
352                         case 'o':
353                         case 'X':
354                         case 'x':
355                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
356                                 goto eval_fmt_mod_unsigned;
357
358                         case 'u':
359                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
360 eval_fmt_mod_unsigned:
361                                 switch (fmt_mod) {
362                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
363                                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
364                                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
365                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
366                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
367                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
368                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
369                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
370                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
371                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
372                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
373
374                                         default:
375                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
376                                                 goto next_arg;
377                                 }
378                                 break;
379
380                         case 'A':
381                         case 'a':
382                         case 'E':
383                         case 'e':
384                         case 'F':
385                         case 'f':
386                         case 'G':
387                         case 'g':
388                                 switch (fmt_mod) {
389                                         case FMT_MOD_l:    /* l modifier is ignored */
390                                         case FMT_MOD_NONE: expected_type = type_double;      break;
391                                         case FMT_MOD_L:    expected_type = type_long_double; break;
392
393                                         default:
394                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
395                                                 goto next_arg;
396                                 }
397                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
398                                 break;
399
400                         case 'C':
401                                 if (fmt_mod != FMT_MOD_NONE) {
402                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
403                                         goto next_arg;
404                                 }
405                                 expected_type = type_wchar_t;
406                                 allowed_flags = FMT_FLAG_NONE;
407                                 break;
408
409                         case 'c':
410                                 expected_type = type_int;
411                                 switch (fmt_mod) {
412                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
413                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
414                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
415
416                                         default:
417                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
418                                                 goto next_arg;
419                                 }
420                                 allowed_flags = FMT_FLAG_NONE;
421                                 break;
422
423                         case 'S':
424                                 if (fmt_mod != FMT_MOD_NONE) {
425                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
426                                         goto next_arg;
427                                 }
428                                 expected_type = type_wchar_t_ptr;
429                                 expected_qual = TYPE_QUALIFIER_CONST;
430                                 allowed_flags = FMT_FLAG_MINUS;
431                                 break;
432
433                         case 's':
434                                 switch (fmt_mod) {
435                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
436                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
437                                         case FMT_MOD_w:    expected_type = type_wchar_t_ptr; break;
438
439                                         default:
440                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
441                                                 goto next_arg;
442                                 }
443                                 expected_qual = TYPE_QUALIFIER_CONST;
444                                 allowed_flags = FMT_FLAG_MINUS;
445                                 break;
446
447                         case 'p':
448                                 if (fmt_mod != FMT_MOD_NONE) {
449                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
450                                         goto next_arg;
451                                 }
452                                 expected_type = type_void_ptr;
453                                 allowed_flags = FMT_FLAG_NONE;
454                                 break;
455
456                         case 'n':
457                                 switch (fmt_mod) {
458                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
459                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
460                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
461                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
462                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
463                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
464                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
465                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
466
467                                         default:
468                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
469                                                 goto next_arg;
470                                 }
471                                 allowed_flags = FMT_FLAG_NONE;
472                                 break;
473
474                         default:
475                                 warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
476                                 if (arg == NULL) {
477                                         goto too_few_args;
478                                 }
479                                 goto next_arg;
480                 }
481
482                 format_flags_t wrong_flags = fmt_flags & ~allowed_flags;
483                 if (wrong_flags != 0) {
484                         char  wrong[8];
485                         char *p = wrong;
486                         if (wrong_flags & FMT_FLAG_HASH)  *p++ = '#';
487                         if (wrong_flags & FMT_FLAG_ZERO)  *p++ = '0';
488                         if (wrong_flags & FMT_FLAG_MINUS) *p++ = '-';
489                         if (wrong_flags & FMT_FLAG_SPACE) *p++ = ' ';
490                         if (wrong_flags & FMT_FLAG_PLUS)  *p++ = '+';
491                         if (wrong_flags & FMT_FLAG_TICK)  *p++ = '\'';
492                         *p = '\0';
493
494                         warningf(WARN_FORMAT, pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
495                 }
496
497                 if (arg == NULL) {
498 too_few_args:
499                         warningf(WARN_FORMAT, pos, "too few arguments for format string");
500                         return -1;
501                 }
502
503                 { /* create a scope here to prevent warning about the jump to next_arg */
504                         type_t *const arg_type           = arg->expression->base.type;
505                         type_t *const arg_skip           = skip_typeref(arg_type);
506                         type_t *const expected_type_skip = skip_typeref(expected_type);
507
508                         if (fmt == 'p') {
509                                 /* allow any pointer type for %p, not just void */
510                                 if (is_type_pointer(arg_skip))
511                                         goto next_arg;
512                         }
513
514                         if (is_type_pointer(expected_type_skip)) {
515                                 if (is_type_pointer(arg_skip)) {
516                                         type_t *const exp_to = skip_typeref(expected_type_skip->pointer.points_to);
517                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
518                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
519                                                 get_unqualified_type(arg_to) == exp_to) {
520                                                 goto next_arg;
521                                         }
522                                 }
523                         } else if (get_unqualified_type(arg_skip) == expected_type_skip) {
524                                 goto next_arg;
525                         } else if (arg->expression->kind == EXPR_UNARY_CAST) {
526                                 expression_t const *const expr        = arg->expression->unary.value;
527                                 type_t             *const unprom_type = skip_typeref(expr->base.type);
528                                 if (get_unqualified_type(unprom_type) == expected_type_skip) {
529                                         goto next_arg;
530                                 }
531                                 if (expected_type_skip == type_unsigned_int && !is_type_signed(unprom_type)) {
532                                         goto next_arg;
533                                 }
534                         }
535                         if (is_type_valid(arg_skip)) {
536                                 source_position_t const *const apos = &arg->expression->base.source_position;
537                                 char              const *const mod  = get_length_modifier_name(fmt_mod);
538                                 warningf(WARN_FORMAT, apos, "conversion '%%%s%c' at position %u specifies type '%T' but the argument has type '%T'", mod, (char)fmt, num_fmt, expected_type, arg_type);
539                         }
540                 }
541 next_arg:
542                 arg = arg->next;
543         }
544         assert(fmt == '\0');
545         if (c+1 < string + size) {
546                 warningf(WARN_FORMAT, pos, "format string contains '\\0'");
547         }
548         return num_args;
549 }
550
551 /**
552  * Check printf-style format.
553  */
554 static void check_printf_format(call_argument_t const *arg,
555                                 format_spec_t const *const spec)
556 {
557         /* find format arg */
558         size_t idx = 0;
559         for (; idx < spec->fmt_idx; ++idx) {
560                 if (arg == NULL)
561                         return;
562                 arg = arg->next;
563         }
564
565         expression_t const *const fmt_expr = arg->expression;
566
567         /* find the real args */
568         for (; idx < spec->arg_idx && arg != NULL; ++idx)
569                 arg = arg->next;
570
571         int const num_fmt = internal_check_printf_format(fmt_expr, arg, spec);
572         if (num_fmt < 0)
573                 return;
574
575         size_t num_args = 0;
576         for (; arg != NULL; arg = arg->next)
577                 ++num_args;
578         if (num_args > (size_t)num_fmt) {
579                 source_position_t const *const pos = &fmt_expr->base.source_position;
580                 warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt,  num_fmt  != 1 ? "s" : "");
581         }
582 }
583
584 /**
585  * Check scanf-style format.
586  */
587 static void check_scanf_format(const call_argument_t *arg,
588                                const format_spec_t *spec)
589 {
590         /* find format arg */
591         unsigned idx = 0;
592         for (; idx < spec->fmt_idx; ++idx) {
593                 if (arg == NULL)
594                         return;
595                 arg = arg->next;
596         }
597
598         const expression_t *fmt_expr = arg->expression;
599         if (fmt_expr->kind == EXPR_UNARY_CAST) {
600                 fmt_expr = fmt_expr->unary.value;
601         }
602
603         if (fmt_expr->kind != EXPR_STRING_LITERAL
604                         && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
605                 return;
606
607         const char *string = fmt_expr->string_literal.value.begin;
608         size_t      size   = fmt_expr->string_literal.value.size;
609         const char *c      = string;
610
611         /* find the real args */
612         for (; idx < spec->arg_idx && arg != NULL; ++idx)
613                 arg = arg->next;
614
615         const source_position_t *pos = &fmt_expr->base.source_position;
616         unsigned num_fmt = 0;
617         char     fmt;
618         for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
619                 if (fmt != '%')
620                         continue;
621                 fmt = *(++c);
622                 if (fmt == '%')
623                         continue;
624
625                 ++num_fmt;
626
627                 bool suppress_assignment = false;
628                 if (fmt == '*') {
629                         fmt = *++c;
630                         suppress_assignment = true;
631                 }
632
633                 size_t width = 0;
634                 if ('0' <= fmt && fmt <= '9') {
635                         do {
636                                 width = width * 10 + (fmt - '0');
637                                 fmt   = *++c;
638                         } while ('0' <= fmt && fmt <= '9');
639                         if (width == 0) {
640                                 warningf(WARN_FORMAT, pos, "field width is zero at format %u", num_fmt);
641                         }
642                 }
643
644                 /* look for length modifiers */
645                 format_length_modifier_t fmt_mod = FMT_MOD_NONE;
646                 switch (fmt) {
647                 case 'h':
648                         fmt = *(++c);
649                         if (fmt == 'h') {
650                                 fmt = *(++c);
651                                 fmt_mod = FMT_MOD_hh;
652                         } else {
653                                 fmt_mod = FMT_MOD_h;
654                         }
655                         break;
656
657                 case 'l':
658                         fmt = *(++c);
659                         if (fmt == 'l') {
660                                 fmt = *(++c);
661                                 fmt_mod = FMT_MOD_ll;
662                         } else {
663                                 fmt_mod = FMT_MOD_l;
664                         }
665                         break;
666
667                 case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break;
668                 case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break;
669                 case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break;
670                 case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break;
671                 /* microsoft mode */
672                 case 'w':
673                         if (c_mode & _MS) {
674                                 fmt = *(++c);
675                                 fmt_mod = FMT_MOD_w;
676                         }
677                         break;
678                 case 'I':
679                         if (c_mode & _MS) {
680                                 fmt = *(++c);
681                                 fmt_mod = FMT_MOD_I;
682                                 if (fmt == '3') {
683                                         fmt = *(++c);
684                                         if (fmt == '2') {
685                                                 fmt = *(++c);
686                                                 fmt_mod = FMT_MOD_I32;
687                                         } else {
688                                                 /* rewind */
689                                                 fmt = *(--c);
690                                         }
691                                 } else if (fmt == '6') {
692                                         fmt = *(++c);
693                                         if (fmt == '4') {
694                                                 fmt = *(++c);
695                                                 fmt_mod = FMT_MOD_I64;
696                                         } else {
697                                                 /* rewind */
698                                                 fmt = *(--c);
699                                         }
700                                 }
701                         }
702                         break;
703                 }
704
705                 if (fmt == '\0') {
706                         warningf(WARN_FORMAT, pos, "dangling %% with conversion specififer in format string");
707                         break;
708                 }
709
710                 type_t *expected_type;
711                 switch (fmt) {
712                 case 'd':
713                 case 'i':
714                         switch (fmt_mod) {
715                         case FMT_MOD_NONE: expected_type = type_int;         break;
716                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
717                         case FMT_MOD_h:    expected_type = type_short;       break;
718                         case FMT_MOD_l:    expected_type = type_long;        break;
719                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
720                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
721                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
722                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
723                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
724                         case FMT_MOD_I32:  expected_type = type_int32;       break;
725                         case FMT_MOD_I64:  expected_type = type_int64;       break;
726
727                         default:
728                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
729                                 goto next_arg;
730                         }
731                         break;
732
733                 case 'o':
734                 case 'X':
735                 case 'x':
736                 case 'u':
737                         switch (fmt_mod) {
738                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
739                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
740                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
741                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
742                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
743                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
744                         case FMT_MOD_z:    expected_type = type_size_t;             break;
745                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
746                         case FMT_MOD_I:    expected_type = type_size_t;             break;
747                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
748                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
749
750                         default:
751                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
752                                 goto next_arg;
753                         }
754                         break;
755
756                 case 'A':
757                 case 'a':
758                 case 'E':
759                 case 'e':
760                 case 'F':
761                 case 'f':
762                 case 'G':
763                 case 'g':
764                         switch (fmt_mod) {
765                         case FMT_MOD_l:    expected_type = type_double;      break;
766                         case FMT_MOD_NONE: expected_type = type_float;       break;
767                         case FMT_MOD_L:    expected_type = type_long_double; break;
768
769                         default:
770                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
771                                 goto next_arg;
772                         }
773                         break;
774
775                 case 'C':
776                         if (fmt_mod != FMT_MOD_NONE) {
777                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
778                                 goto next_arg;
779                         }
780                         expected_type = type_wchar_t;
781                         goto check_c_width;
782
783                 case 'c': {
784                         switch (fmt_mod) {
785                         case FMT_MOD_NONE: expected_type = type_char;    break;
786                         case FMT_MOD_l:    expected_type = type_wchar_t; break;
787                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
788
789                         default:
790                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
791                                 goto next_arg;
792                         }
793
794 check_c_width:
795                         if (width == 0)
796                                 width = 1;
797                         if (!suppress_assignment && arg != NULL) {
798                                 type_t *const type = skip_typeref(revert_automatic_type_conversion(arg->expression));
799                                 if (is_type_array(type)       &&
800                                     type->array.size_constant &&
801                                     width > type->array.size) {
802                                         warningf(WARN_FORMAT, pos, "target buffer '%T' is too small for %u characters at format %u", type, width, num_fmt);
803                                 }
804                         }
805                         break;
806                 }
807
808                 case 'S':
809                         if (fmt_mod != FMT_MOD_NONE) {
810                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
811                                 goto next_arg;
812                         }
813                         expected_type = type_wchar_t;
814                         break;
815
816                 case 's':
817                 case '[': {
818                         switch (fmt_mod) {
819                                 case FMT_MOD_NONE: expected_type = type_char;    break;
820                                 case FMT_MOD_l:    expected_type = type_wchar_t; break;
821                                 case FMT_MOD_w:    expected_type = type_wchar_t; break;
822
823                                 default:
824                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
825                                         goto next_arg;
826                         }
827
828                         if (!suppress_assignment &&
829                             width != 0           &&
830                             arg   != NULL) {
831                                 type_t *const type = skip_typeref(revert_automatic_type_conversion(arg->expression));
832                                 if (is_type_array(type)       &&
833                                     type->array.size_constant &&
834                                     width >= type->array.size) {
835                                         warningf(WARN_FORMAT, pos, "target buffer '%T' is too small for %u characters and \\0 at format %u", type, width, num_fmt);
836                                 }
837                         }
838                         break;
839                 }
840
841                 case 'p':
842                         if (fmt_mod != FMT_MOD_NONE) {
843                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
844                                 goto next_arg;
845                         }
846                         expected_type = type_void;
847                         break;
848
849                 case 'n': {
850                         if (suppress_assignment) {
851                                 warningf(WARN_FORMAT, pos, "conversion '%n' cannot be suppressed with '*' at format %u", num_fmt);
852                         }
853
854                         switch (fmt_mod) {
855                         case FMT_MOD_NONE: expected_type = type_int;         break;
856                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
857                         case FMT_MOD_h:    expected_type = type_short;       break;
858                         case FMT_MOD_l:    expected_type = type_long;        break;
859                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
860                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
861                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
862                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
863
864                         default:
865                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
866                                 goto next_arg;
867                         }
868                         break;
869                 }
870
871                 default:
872                         warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at format %u", fmt, num_fmt);
873                         if (suppress_assignment)
874                                 continue;
875                         if (arg == NULL)
876                                 goto too_few_args;
877                         goto next_arg;
878                 }
879
880                 if (suppress_assignment)
881                         continue;
882
883                 if (arg == NULL) {
884 too_few_args:
885                         warningf(WARN_FORMAT, pos, "too few arguments for format string");
886                         return;
887                 }
888
889                 { /* create a scope here to prevent warning about the jump to next_arg */
890                         type_t *const arg_type           = arg->expression->base.type;
891                         type_t *const arg_skip           = skip_typeref(arg_type);
892                         type_t *const expected_type_skip = skip_typeref(expected_type);
893
894                         if (! is_type_pointer(arg_skip))
895                                 goto error_arg_type;
896                         type_t *const ptr_skip = skip_typeref(arg_skip->pointer.points_to);
897
898                         if (fmt == 'p') {
899                                 /* allow any pointer type for %p, not just void */
900                                 if (is_type_pointer(ptr_skip))
901                                         goto next_arg;
902                         }
903
904                         /* do NOT allow const or restrict, all other should be ok */
905                         if (ptr_skip->base.qualifiers & (TYPE_QUALIFIER_CONST | TYPE_QUALIFIER_VOLATILE))
906                                 goto error_arg_type;
907                         type_t *const unqual_ptr = get_unqualified_type(ptr_skip);
908                         if (unqual_ptr == expected_type_skip) {
909                                 goto next_arg;
910                         } else if (expected_type_skip == type_char) {
911                                 /* char matches with unsigned char AND signed char */
912                                 if (unqual_ptr == type_signed_char || unqual_ptr == type_unsigned_char)
913                                         goto next_arg;
914                         }
915 error_arg_type:
916                         if (is_type_valid(arg_skip)) {
917                                 source_position_t const *const apos = &arg->expression->base.source_position;
918                                 char              const *const mod  = get_length_modifier_name(fmt_mod);
919                                 warningf(WARN_FORMAT, apos, "conversion '%%%s%c' at position %u specifies type '%T*' but the argument has type '%T'", mod, (char)fmt, num_fmt, expected_type, arg_type);
920                         }
921                 }
922 next_arg:
923                 arg = arg->next;
924         }
925         assert(fmt == '\0');
926         if (c+1 < string + size) {
927                 warningf(WARN_FORMAT, pos, "format string contains '\\0'");
928         }
929         if (arg != NULL) {
930                 unsigned num_args = num_fmt;
931                 while (arg != NULL) {
932                         ++num_args;
933                         arg = arg->next;
934                 }
935                 warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt, num_fmt != 1 ? "s" : "");
936         }
937 }
938
939 static const format_spec_t builtin_table[] = {
940         { "printf",        FORMAT_PRINTF,   0, 1 },
941         { "wprintf",       FORMAT_PRINTF,   0, 1 },
942         { "sprintf",       FORMAT_PRINTF,   1, 2 },
943         { "swprintf",      FORMAT_PRINTF,   1, 2 },
944         { "snprintf",      FORMAT_PRINTF,   2, 3 },
945         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
946         { "fprintf",       FORMAT_PRINTF,   1, 2 },
947         { "fwprintf",      FORMAT_PRINTF,   1, 2 },
948         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
949         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
950
951         { "scanf",         FORMAT_SCANF,    0, 1 },
952         { "wscanf",        FORMAT_SCANF,    0, 1 },
953         { "sscanf",        FORMAT_SCANF,    1, 2 },
954         { "swscanf",       FORMAT_SCANF,    1, 2 },
955         { "fscanf",        FORMAT_SCANF,    1, 2 },
956         { "fwscanf",       FORMAT_SCANF,    1, 2 },
957
958         { "strftime",      FORMAT_STRFTIME, 3, 4 },
959         { "wcstrftime",    FORMAT_STRFTIME, 3, 4 },
960
961         { "strfmon",       FORMAT_STRFMON,  3, 4 },
962
963         /* MS extensions */
964         { "_snprintf",     FORMAT_PRINTF,   2, 3 },
965         { "_snwprintf",    FORMAT_PRINTF,   2, 3 },
966         { "_scrintf",      FORMAT_PRINTF,   0, 1 },
967         { "_scwprintf",    FORMAT_PRINTF,   0, 1 },
968         { "printf_s",      FORMAT_PRINTF,   0, 1 },
969         { "wprintf_s",     FORMAT_PRINTF,   0, 1 },
970         { "sprintf_s",     FORMAT_PRINTF,   3, 4 },
971         { "swprintf_s",    FORMAT_PRINTF,   3, 4 },
972         { "fprintf_s",     FORMAT_PRINTF,   1, 2 },
973         { "fwprintf_s",    FORMAT_PRINTF,   1, 2 },
974         { "_sprintf_l",    FORMAT_PRINTF,   1, 3 },
975         { "_swprintf_l",   FORMAT_PRINTF,   1, 3 },
976         { "_printf_l",     FORMAT_PRINTF,   0, 2 },
977         { "_wprintf_l",    FORMAT_PRINTF,   0, 2 },
978         { "_fprintf_l",    FORMAT_PRINTF,   1, 3 },
979         { "_fwprintf_l",   FORMAT_PRINTF,   1, 3 },
980         { "_printf_s_l",   FORMAT_PRINTF,   0, 2 },
981         { "_wprintf_s_l",  FORMAT_PRINTF,   0, 2 },
982         { "_sprintf_s_l",  FORMAT_PRINTF,   3, 5 },
983         { "_swprintf_s_l", FORMAT_PRINTF,   3, 5 },
984         { "_fprintf_s_l",  FORMAT_PRINTF,   1, 3 },
985         { "_fwprintf_s_l", FORMAT_PRINTF,   1, 3 },
986 };
987
988 void check_format(const call_expression_t *const call)
989 {
990         if (!is_warn_on(WARN_FORMAT))
991                 return;
992
993         const expression_t *const func_expr = call->function;
994         if (func_expr->kind != EXPR_REFERENCE)
995                 return;
996
997         const entity_t        *const entity = func_expr->reference.entity;
998         const call_argument_t *      arg    = call->arguments;
999
1000         /*
1001          * For some functions we always check the format, even if it was not
1002          * specified. This allows to check format even in MS mode or without
1003          * header included.
1004          */
1005         const char *const name = entity->base.symbol->string;
1006         for (size_t i = 0; i < lengthof(builtin_table); ++i) {
1007                 if (streq(name, builtin_table[i].name)) {
1008                         switch (builtin_table[i].fmt_kind) {
1009                         case FORMAT_PRINTF:
1010                                 check_printf_format(arg, &builtin_table[i]);
1011                                 break;
1012                         case FORMAT_SCANF:
1013                                 check_scanf_format(arg, &builtin_table[i]);
1014                                 break;
1015                         case FORMAT_STRFTIME:
1016                         case FORMAT_STRFMON:
1017                                 /* TODO: implement other cases */
1018                                 break;
1019                         }
1020                         break;
1021                 }
1022         }
1023 }