- Rework the way literals are handled, these are now kept as strings until
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2009 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <config.h>
21
22 #include <ctype.h>
23
24 #include "adt/util.h"
25 #include "format_check.h"
26 #include "symbol_t.h"
27 #include "ast_t.h"
28 #include "entity_t.h"
29 #include "diagnostic.h"
30 #include "types.h"
31 #include "type_t.h"
32 #include "warning.h"
33 #include "lang_features.h"
34
35 typedef enum format_flag_t {
36         FMT_FLAG_NONE  = 0,
37         FMT_FLAG_HASH  = 1U << 0,
38         FMT_FLAG_ZERO  = 1U << 1,
39         FMT_FLAG_MINUS = 1U << 2,
40         FMT_FLAG_SPACE = 1U << 3,
41         FMT_FLAG_PLUS  = 1U << 4,
42         FMT_FLAG_TICK  = 1U << 5
43 } format_flag_t;
44
45 typedef unsigned format_flags_t;
46
47 typedef enum format_length_modifier_t {
48         FMT_MOD_NONE,
49         FMT_MOD_L,
50         FMT_MOD_hh,
51         FMT_MOD_h,
52         FMT_MOD_l,
53         FMT_MOD_ll,
54         FMT_MOD_j,
55         FMT_MOD_t,
56         FMT_MOD_z,
57         FMT_MOD_q,
58         /* only in microsoft mode */
59         FMT_MOD_w,
60         FMT_MOD_I,
61         FMT_MOD_I32,
62         FMT_MOD_I64
63 } format_length_modifier_t;
64
65 typedef struct format_spec_t {
66         const char    *name;     /**< name of the function */
67         format_kind_t  fmt_kind; /**< kind */
68         unsigned       fmt_idx;  /**< index of the format string */
69         unsigned       arg_idx;  /**< index of the first argument */
70 } format_spec_t;
71
72 static const char* get_length_modifier_name(const format_length_modifier_t mod)
73 {
74         static const char* const names[] = {
75                 [FMT_MOD_NONE] = "",
76                 [FMT_MOD_L]    = "L",
77                 [FMT_MOD_hh]   = "hh",
78                 [FMT_MOD_h]    = "h",
79                 [FMT_MOD_l]    = "l",
80                 [FMT_MOD_ll]   = "ll",
81                 [FMT_MOD_j]    = "j",
82                 [FMT_MOD_t]    = "t",
83                 [FMT_MOD_z]    = "z",
84                 [FMT_MOD_q]    = "q",
85                 /* only in microsoft mode */
86                 [FMT_MOD_w]    = "w",
87                 [FMT_MOD_I]    = "I",
88                 [FMT_MOD_I32]  = "I32",
89                 [FMT_MOD_I64]  = "I64"
90         };
91         assert(mod < lengthof(names));
92         return names[mod];
93 }
94
95 static void warn_invalid_length_modifier(const source_position_t *pos,
96                                          const format_length_modifier_t mod,
97                                          const utf32 conversion)
98 {
99         warningf(pos,
100                 "invalid length modifier '%s' for conversion specifier '%%%c'",
101                 get_length_modifier_name(mod), conversion
102         );
103 }
104
105 /**
106  * Check printf-style format.
107  */
108 static int internal_check_printf_format(const expression_t *fmt_expr,
109                                         const call_argument_t *arg,
110                                         const format_spec_t *spec)
111 {
112         while (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
113                 fmt_expr = fmt_expr->unary.value;
114         }
115
116         /*
117          * gettext results in expressions like (X ? "format_string" : Y)
118          * we assume the left part is the format string
119          */
120         if (fmt_expr->kind == EXPR_CONDITIONAL) {
121                 conditional_expression_t const *const c = &fmt_expr->conditional;
122                 expression_t             const *      t = c->true_expression;
123                 if (t == NULL)
124                         t = c->condition;
125                 int const nt = internal_check_printf_format(t,                   arg, spec);
126                 int const nf = internal_check_printf_format(c->false_expression, arg, spec);
127                 return nt > nf ? nt : nf;
128         }
129
130         if (fmt_expr->kind != EXPR_STRING_LITERAL
131                         && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
132                 return -1;
133
134         const char *string = fmt_expr->literal.value.begin;
135         size_t      size   = fmt_expr->literal.value.size;
136         const char *c      = string;
137
138         const source_position_t *pos = &fmt_expr->base.source_position;
139         unsigned num_fmt = 0;
140         char     fmt;
141         for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
142                 if (fmt != '%')
143                         continue;
144                 fmt = *(++c);
145
146                 if (fmt == '\0') {
147                         warningf(pos, "dangling %% in format string");
148                         break;
149                 }
150                 if (fmt == '%')
151                         continue;
152
153                 ++num_fmt;
154
155                 format_flags_t fmt_flags = FMT_FLAG_NONE;
156                 if (fmt == '0') {
157                         fmt = *(++c);
158                         fmt_flags |= FMT_FLAG_ZERO;
159                 }
160
161                 /* argument selector or minimum field width */
162                 if (isdigit(fmt)) {
163                         do {
164                                 fmt = *(++c);
165                         } while (isdigit(fmt));
166
167                         /* digit string was ... */
168                         if (fmt == '$') {
169                                 /* ... argument selector */
170                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
171                                 /* TODO implement */
172                                 return -1;
173                         }
174                         /* ... minimum field width */
175                 } else {
176                         /* flags */
177                         for (;;) {
178                                 format_flags_t flag;
179                                 switch (fmt) {
180                                         case '#':  flag = FMT_FLAG_HASH;  break;
181                                         case '0':  flag = FMT_FLAG_ZERO;  break;
182                                         case '-':  flag = FMT_FLAG_MINUS; break;
183                                         case '\'': flag = FMT_FLAG_TICK;  break;
184
185                                         case ' ':
186                                                 if (fmt_flags & FMT_FLAG_PLUS) {
187                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
188                                                 }
189                                                 flag = FMT_FLAG_SPACE;
190                                                 break;
191
192                                         case '+':
193                                                 if (fmt_flags & FMT_FLAG_SPACE) {
194                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
195                                                 }
196                                                 flag = FMT_FLAG_PLUS;
197                                                 break;
198
199                                         default: goto break_fmt_flags;
200                                 }
201                                 if (fmt_flags & flag) {
202                                         warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
203                                 }
204                                 fmt_flags |= flag;
205                                 fmt = *(++c);
206                         }
207 break_fmt_flags:
208
209                         /* minimum field width */
210                         if (fmt == '*') {
211                                 fmt = *(++c);
212                                 if (arg == NULL) {
213                                         warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
214                                         return -1;
215                                 }
216                                 const type_t *const arg_type = arg->expression->base.type;
217                                 if (arg_type != type_int) {
218                                         warningf(pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
219                                 }
220                                 arg = arg->next;
221                         } else {
222                                 while (isdigit(fmt)) {
223                                         fmt = *(++c);
224                                 }
225                         }
226                 }
227
228                 /* precision */
229                 if (fmt == '.') {
230                         fmt = *(++c);
231                         if (fmt == '*') {
232                                 fmt = *(++c);
233                                 if (arg == NULL) {
234                                         warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
235                                         return -1;
236                                 }
237                                 const type_t *const arg_type = arg->expression->base.type;
238                                 if (arg_type != type_int) {
239                                         warningf(pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
240                                 }
241                                 arg = arg->next;
242                         } else {
243                                 /* digit string may be omitted */
244                                 while (isdigit(fmt)) {
245                                         fmt = *(++c);
246                                 }
247                         }
248                 }
249
250                 /* length modifier */
251                 format_length_modifier_t fmt_mod;
252                 switch (fmt) {
253                         case 'h':
254                                 fmt = *(++c);
255                                 if (fmt == 'h') {
256                                         fmt = *(++c);
257                                         fmt_mod = FMT_MOD_hh;
258                                 } else {
259                                         fmt_mod = FMT_MOD_h;
260                                 }
261                                 break;
262
263                         case 'l':
264                                 fmt = *(++c);
265                                 if (fmt == 'l') {
266                                         fmt = *(++c);
267                                         fmt_mod = FMT_MOD_ll;
268                                 } else {
269                                         fmt_mod = FMT_MOD_l;
270                                 }
271                                 break;
272
273                         case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L;    break;
274                         case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j;    break;
275                         case 't': fmt = *(++c); fmt_mod = FMT_MOD_t;    break;
276                         case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z;    break;
277                         case 'q': fmt = *(++c); fmt_mod = FMT_MOD_q;    break;
278                         /* microsoft mode */
279                         case 'w':
280                                 if (c_mode & _MS) {
281                                         fmt = *(++c); fmt_mod = FMT_MOD_w;
282                                 } else {
283                                         fmt_mod = FMT_MOD_NONE;
284                                 }
285                                 break;
286                         case 'I':
287                                 if (c_mode & _MS) {
288                                         fmt = *(++c); fmt_mod = FMT_MOD_I;
289                                         if (fmt == '3') {
290                                                 fmt = *(++c);
291                                                 if (fmt == '2') {
292                                                         fmt = *(++c);
293                                                         fmt_mod = FMT_MOD_I32;
294                                                 } else {
295                                                         /* rewind */
296                                                         fmt = *(--c);
297                                                 }
298                                         } else if (fmt == '6') {
299                                                 fmt = *(++c);
300                                                 if (fmt == '4') {
301                                                         fmt = *(++c);
302                                                         fmt_mod = FMT_MOD_I64;
303                                                 } else {
304                                                         /* rewind */
305                                                         fmt = *(--c);
306                                                 }
307                                         }
308                                 } else {
309                                         fmt_mod = FMT_MOD_NONE;
310                                 }
311                                 break;
312                         default:
313                                 fmt_mod = FMT_MOD_NONE;
314                                 break;
315                 }
316
317
318                 type_t            *expected_type;
319                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
320                 format_flags_t     allowed_flags;
321                 switch (fmt) {
322                         case 'd':
323                         case 'i':
324                                 switch (fmt_mod) {
325                                         case FMT_MOD_NONE: expected_type = type_int;       break;
326                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
327                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
328                                         case FMT_MOD_l:    expected_type = type_long;      break;
329                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
330                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
331                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
332                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
333                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t; break;
334                                         case FMT_MOD_I32:  expected_type = type_int32;     break;
335                                         case FMT_MOD_I64:  expected_type = type_int64;     break;
336
337                                         default:
338                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
339                                                 goto next_arg;
340                                 }
341                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
342                                 break;
343
344                         case 'o':
345                         case 'X':
346                         case 'x':
347                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
348                                 goto eval_fmt_mod_unsigned;
349
350                         case 'u':
351                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
352 eval_fmt_mod_unsigned:
353                                 switch (fmt_mod) {
354                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
355                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
356                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
357                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
358                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
359                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
360                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
361                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
362                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
363                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
364                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
365
366                                         default:
367                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
368                                                 goto next_arg;
369                                 }
370                                 break;
371
372                         case 'A':
373                         case 'a':
374                         case 'E':
375                         case 'e':
376                         case 'F':
377                         case 'f':
378                         case 'G':
379                         case 'g':
380                                 switch (fmt_mod) {
381                                         case FMT_MOD_l:    /* l modifier is ignored */
382                                         case FMT_MOD_NONE: expected_type = type_double;      break;
383                                         case FMT_MOD_L:    expected_type = type_long_double; break;
384
385                                         default:
386                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
387                                                 goto next_arg;
388                                 }
389                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
390                                 break;
391
392                         case 'C':
393                                 if (fmt_mod != FMT_MOD_NONE) {
394                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
395                                         goto next_arg;
396                                 }
397                                 expected_type = type_wchar_t;
398                                 allowed_flags = FMT_FLAG_NONE;
399                                 break;
400
401                         case 'c':
402                                 expected_type = type_int;
403                                 switch (fmt_mod) {
404                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
405                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
406                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
407
408                                         default:
409                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
410                                                 goto next_arg;
411                                 }
412                                 allowed_flags = FMT_FLAG_NONE;
413                                 break;
414
415                         case 'S':
416                                 if (fmt_mod != FMT_MOD_NONE) {
417                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
418                                         goto next_arg;
419                                 }
420                                 expected_type = type_wchar_t_ptr;
421                                 expected_qual = TYPE_QUALIFIER_CONST;
422                                 allowed_flags = FMT_FLAG_MINUS;
423                                 break;
424
425                         case 's':
426                                 switch (fmt_mod) {
427                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
428                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
429                                         case FMT_MOD_w:    expected_type = type_wchar_t_ptr; break;
430
431                                         default:
432                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
433                                                 goto next_arg;
434                                 }
435                                 expected_qual = TYPE_QUALIFIER_CONST;
436                                 allowed_flags = FMT_FLAG_MINUS;
437                                 break;
438
439                         case 'p':
440                                 if (fmt_mod != FMT_MOD_NONE) {
441                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
442                                         goto next_arg;
443                                 }
444                                 expected_type = type_void_ptr;
445                                 allowed_flags = FMT_FLAG_NONE;
446                                 break;
447
448                         case 'n':
449                                 switch (fmt_mod) {
450                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
451                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
452                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
453                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
454                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
455                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
456                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
457                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
458
459                                         default:
460                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
461                                                 goto next_arg;
462                                 }
463                                 allowed_flags = FMT_FLAG_NONE;
464                                 break;
465
466                         default:
467                                 warningf(pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
468                                 if (arg == NULL) {
469                                         warningf(pos, "too few arguments for format string");
470                                         return -1;
471                                 }
472                                 goto next_arg;
473                 }
474
475                 format_flags_t wrong_flags = fmt_flags & ~allowed_flags;
476                 if (wrong_flags != 0) {
477                         char  wrong[8];
478                         char *p = wrong;
479                         if (wrong_flags & FMT_FLAG_HASH)  *p++ = '#';
480                         if (wrong_flags & FMT_FLAG_ZERO)  *p++ = '0';
481                         if (wrong_flags & FMT_FLAG_MINUS) *p++ = '-';
482                         if (wrong_flags & FMT_FLAG_SPACE) *p++ = ' ';
483                         if (wrong_flags & FMT_FLAG_PLUS)  *p++ = '+';
484                         if (wrong_flags & FMT_FLAG_TICK)  *p++ = '\'';
485                         *p = '\0';
486
487                         warningf(pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
488                 }
489
490                 if (arg == NULL) {
491                         warningf(pos, "too few arguments for format string");
492                         return -1;
493                 }
494
495                 {       /* create a scope here to prevent warning about the jump to next_arg */
496                         type_t *const arg_type           = arg->expression->base.type;
497                         type_t *const arg_skip           = skip_typeref(arg_type);
498                         type_t *const expected_type_skip = skip_typeref(expected_type);
499
500                         if (fmt == 'p') {
501                                 /* allow any pointer type for %p, not just void */
502                                 if (is_type_pointer(arg_skip))
503                                         goto next_arg;
504                         }
505
506                         if (is_type_pointer(expected_type_skip)) {
507                                 if (is_type_pointer(arg_skip)) {
508                                         type_t *const exp_to = skip_typeref(expected_type_skip->pointer.points_to);
509                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
510                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
511                                                 get_unqualified_type(arg_to) == exp_to) {
512                                                 goto next_arg;
513                                         }
514                                 }
515                         } else if (get_unqualified_type(arg_skip) == expected_type_skip) {
516                                 goto next_arg;
517                         }
518                         if (is_type_valid(arg_skip)) {
519                                 warningf(pos,
520                                         "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
521                                         arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
522                         }
523                 }
524 next_arg:
525                 arg = arg->next;
526         }
527         assert(fmt == '\0');
528         if (c+1 < string + size) {
529                 warningf(pos, "format string contains '\\0'");
530         }
531         return num_fmt;
532 }
533
534 /**
535  * Check printf-style format.
536  */
537 static void check_printf_format(call_argument_t const *arg,
538                                 format_spec_t const *const spec)
539 {
540         /* find format arg */
541         size_t idx = 0;
542         for (; idx < spec->fmt_idx; ++idx) {
543                 if (arg == NULL)
544                         return;
545                 arg = arg->next;
546         }
547
548         expression_t const *const fmt_expr = arg->expression;
549
550         /* find the real args */
551         for (; idx < spec->arg_idx && arg != NULL; ++idx)
552                 arg = arg->next;
553
554         int const num_fmt = internal_check_printf_format(fmt_expr, arg, spec);
555         if (num_fmt < 0)
556                 return;
557
558         size_t num_args = 0;
559         for (; arg != NULL; arg = arg->next)
560                 ++num_args;
561         if (num_args > (size_t)num_fmt) {
562                 warningf(&fmt_expr->base.source_position,
563                          "%u argument%s but only %u format specifier%s",
564                          num_args, num_args != 1 ? "s" : "",
565                          num_fmt,  num_fmt  != 1 ? "s" : "");
566         }
567 }
568
569 /**
570  * Check scanf-style format.
571  */
572 static void check_scanf_format(const call_argument_t *arg,
573                                const format_spec_t *spec)
574 {
575         /* find format arg */
576         unsigned idx = 0;
577         for (; idx < spec->fmt_idx; ++idx) {
578                 if (arg == NULL)
579                         return;
580                 arg = arg->next;
581         }
582
583         const expression_t *fmt_expr = arg->expression;
584         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
585                 fmt_expr = fmt_expr->unary.value;
586         }
587
588         if (fmt_expr->kind != EXPR_STRING_LITERAL
589                         && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
590                 return;
591
592         const char *string = fmt_expr->literal.value.begin;
593         size_t      size   = fmt_expr->literal.value.size;
594         const char *c      = string;
595
596         /* find the real args */
597         for (; idx < spec->arg_idx && arg != NULL; ++idx)
598                 arg = arg->next;
599
600         const source_position_t *pos = &fmt_expr->base.source_position;
601         unsigned num_fmt = 0;
602         char     fmt;
603         for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
604                 if (fmt != '%')
605                         continue;
606                 fmt = *(++c);
607                 if (fmt == '\0') {
608                         warningf(pos, "dangling '%%' in format string");
609                         break;
610                 }
611                 if (fmt == '%')
612                         continue;
613
614                 ++num_fmt;
615
616                 /* look for length modifiers */
617                 format_length_modifier_t fmt_mod = FMT_MOD_NONE;
618                 switch (fmt) {
619                 case 'h':
620                         fmt = *(++c);
621                         if (fmt == 'h') {
622                                 fmt = *(++c);
623                                 fmt_mod = FMT_MOD_hh;
624                         } else {
625                                 fmt_mod = FMT_MOD_h;
626                         }
627                         break;
628
629                 case 'l':
630                         fmt = *(++c);
631                         if (fmt == 'l') {
632                                 fmt = *(++c);
633                                 fmt_mod = FMT_MOD_ll;
634                         } else {
635                                 fmt_mod = FMT_MOD_l;
636                         }
637                         break;
638
639                 case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break;
640                 case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break;
641                 case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break;
642                 case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break;
643                 /* microsoft mode */
644                 case 'w':
645                         if (c_mode & _MS) {
646                                 fmt = *(++c);
647                                 fmt_mod = FMT_MOD_w;
648                         }
649                         break;
650                 case 'I':
651                         if (c_mode & _MS) {
652                                 fmt = *(++c);
653                                 fmt_mod = FMT_MOD_I;
654                                 if (fmt == '3') {
655                                         fmt = *(++c);
656                                         if (fmt == '2') {
657                                                 fmt = *(++c);
658                                                 fmt_mod = FMT_MOD_I32;
659                                         } else {
660                                                 /* rewind */
661                                                 fmt = *(--c);
662                                         }
663                                 } else if (fmt == '6') {
664                                         fmt = *(++c);
665                                         if (fmt == '4') {
666                                                 fmt = *(++c);
667                                                 fmt_mod = FMT_MOD_I64;
668                                         } else {
669                                                 /* rewind */
670                                                 fmt = *(--c);
671                                         }
672                                 }
673                         }
674                         break;
675                 }
676
677                 if (fmt == '\0') {
678                         warningf(pos, "dangling % with conversion specififer in format string");
679                         break;
680                 }
681
682                 type_t *expected_type;
683                 switch (fmt) {
684                 case 'd':
685                 case 'i':
686                         switch (fmt_mod) {
687                         case FMT_MOD_NONE: expected_type = type_int;         break;
688                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
689                         case FMT_MOD_h:    expected_type = type_short;       break;
690                         case FMT_MOD_l:    expected_type = type_long;        break;
691                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
692                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
693                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
694                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
695                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
696                         case FMT_MOD_I32:  expected_type = type_int32;       break;
697                         case FMT_MOD_I64:  expected_type = type_int64;       break;
698
699                         default:
700                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
701                                 goto next_arg;
702                         }
703                         break;
704
705                 case 'o':
706                 case 'X':
707                 case 'x':
708                 case 'u':
709                         switch (fmt_mod) {
710                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
711                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
712                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
713                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
714                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
715                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
716                         case FMT_MOD_z:    expected_type = type_size_t;             break;
717                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
718                         case FMT_MOD_I:    expected_type = type_size_t;             break;
719                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
720                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
721
722                         default:
723                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
724                                 goto next_arg;
725                         }
726                         break;
727
728                 case 'A':
729                 case 'a':
730                 case 'E':
731                 case 'e':
732                 case 'F':
733                 case 'f':
734                 case 'G':
735                 case 'g':
736                         switch (fmt_mod) {
737                         case FMT_MOD_l:    expected_type = type_double;      break;
738                         case FMT_MOD_NONE: expected_type = type_float;       break;
739                         case FMT_MOD_L:    expected_type = type_long_double; break;
740
741                         default:
742                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
743                                 goto next_arg;
744                         }
745                         break;
746
747                 case 'C':
748                         if (fmt_mod != FMT_MOD_NONE) {
749                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
750                                 goto next_arg;
751                         }
752                         expected_type = type_wchar_t;
753                         break;
754
755                 case 'c':
756                         expected_type = type_int;
757                         switch (fmt_mod) {
758                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
759                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
760                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
761
762                         default:
763                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
764                                 goto next_arg;
765                         }
766                         break;
767
768                 case 'S':
769                         if (fmt_mod != FMT_MOD_NONE) {
770                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
771                                 goto next_arg;
772                         }
773                         expected_type = type_wchar_t;
774                         break;
775
776                 case 's':
777                 case '[':
778                         switch (fmt_mod) {
779                                 case FMT_MOD_NONE: expected_type = type_char;    break;
780                                 case FMT_MOD_l:    expected_type = type_wchar_t; break;
781                                 case FMT_MOD_w:    expected_type = type_wchar_t; break;
782
783                                 default:
784                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
785                                         goto next_arg;
786                         }
787                         break;
788
789                 case 'p':
790                         if (fmt_mod != FMT_MOD_NONE) {
791                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
792                                 goto next_arg;
793                         }
794                         expected_type = type_void_ptr;
795                         break;
796
797                 case 'n':
798                         switch (fmt_mod) {
799                         case FMT_MOD_NONE: expected_type = type_int;         break;
800                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
801                         case FMT_MOD_h:    expected_type = type_short;       break;
802                         case FMT_MOD_l:    expected_type = type_long;        break;
803                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
804                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
805                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
806                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
807
808                         default:
809                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
810                                 goto next_arg;
811                         }
812                         break;
813
814                 default:
815                         warningf(pos, "encountered unknown conversion specifier '%%%c' at format %u",
816                                  fmt, num_fmt);
817                         if (arg == NULL) {
818                                 warningf(pos, "too few arguments for format string");
819                                 return;
820                         }
821                         goto next_arg;
822                 }
823
824                 if (arg == NULL) {
825                         warningf(pos, "too few arguments for format string");
826                         return;
827                 }
828
829                 {       /* create a scope here to prevent warning about the jump to next_arg */
830                         type_t *const arg_type           = arg->expression->base.type;
831                         type_t *const arg_skip           = skip_typeref(arg_type);
832                         type_t *const expected_type_skip = skip_typeref(expected_type);
833
834                         if (! is_type_pointer(arg_skip))
835                                 goto error_arg_type;
836                         type_t *const ptr_skip = skip_typeref(arg_skip->pointer.points_to);
837
838                         if (fmt == 'p') {
839                                 /* allow any pointer type for %p, not just void */
840                                 if (is_type_pointer(ptr_skip))
841                                         goto next_arg;
842                         }
843
844                         /* do NOT allow const or restrict, all other should be ok */
845                         if (ptr_skip->base.qualifiers & (TYPE_QUALIFIER_CONST | TYPE_QUALIFIER_VOLATILE))
846                                 goto error_arg_type;
847                         type_t *const unqual_ptr = get_unqualified_type(ptr_skip);
848                         if (unqual_ptr == expected_type_skip) {
849                                 goto next_arg;
850                         } else if (expected_type_skip == type_char) {
851                                 /* char matches with unsigned char AND signed char */
852                                 if (unqual_ptr == type_signed_char || unqual_ptr == type_unsigned_char)
853                                         goto next_arg;
854                         }
855 error_arg_type:
856                         if (is_type_valid(arg_skip)) {
857                                 warningf(pos,
858                                         "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
859                                         arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
860                         }
861                 }
862 next_arg:
863                 arg = arg->next;
864         }
865         assert(fmt == '\0');
866         if (c+1 < string + size) {
867                 warningf(pos, "format string contains '\\0'");
868         }
869         if (arg != NULL) {
870                 unsigned num_args = num_fmt;
871                 while (arg != NULL) {
872                         ++num_args;
873                         arg = arg->next;
874                 }
875                 warningf(pos, "%u argument%s but only %u format specifier%s",
876                          num_args, num_args != 1 ? "s" : "",
877                          num_fmt, num_fmt != 1 ? "s" : "");
878         }
879 }
880
881 static const format_spec_t builtin_table[] = {
882         { "printf",        FORMAT_PRINTF,   0, 1 },
883         { "wprintf",       FORMAT_PRINTF,   0, 1 },
884         { "sprintf",       FORMAT_PRINTF,   1, 2 },
885         { "swprintf",      FORMAT_PRINTF,   1, 2 },
886         { "snprintf",      FORMAT_PRINTF,   2, 3 },
887         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
888         { "fprintf",       FORMAT_PRINTF,   1, 2 },
889         { "fwprintf",      FORMAT_PRINTF,   1, 2 },
890         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
891         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
892
893         { "scanf",         FORMAT_SCANF,    0, 1 },
894         { "wscanf",        FORMAT_SCANF,    0, 1 },
895         { "sscanf",        FORMAT_SCANF,    1, 2 },
896         { "swscanf",       FORMAT_SCANF,    1, 2 },
897         { "fscanf",        FORMAT_SCANF,    1, 2 },
898         { "fwscanf",       FORMAT_SCANF,    1, 2 },
899
900         { "strftime",      FORMAT_STRFTIME, 3, 4 },
901         { "wcstrftime",    FORMAT_STRFTIME, 3, 4 },
902
903         { "strfmon",       FORMAT_STRFMON,  3, 4 },
904
905         /* MS extensions */
906         { "_snprintf",     FORMAT_PRINTF,   2, 3 },
907         { "_snwprintf",    FORMAT_PRINTF,   2, 3 },
908         { "_scrintf",      FORMAT_PRINTF,   0, 1 },
909         { "_scwprintf",    FORMAT_PRINTF,   0, 1 },
910         { "printf_s",      FORMAT_PRINTF,   0, 1 },
911         { "wprintf_s",     FORMAT_PRINTF,   0, 1 },
912         { "sprintf_s",     FORMAT_PRINTF,   3, 4 },
913         { "swprintf_s",    FORMAT_PRINTF,   3, 4 },
914         { "fprintf_s",     FORMAT_PRINTF,   1, 2 },
915         { "fwprintf_s",    FORMAT_PRINTF,   1, 2 },
916         { "_sprintf_l",    FORMAT_PRINTF,   1, 3 },
917         { "_swprintf_l",   FORMAT_PRINTF,   1, 3 },
918         { "_printf_l",     FORMAT_PRINTF,   0, 2 },
919         { "_wprintf_l",    FORMAT_PRINTF,   0, 2 },
920         { "_fprintf_l",    FORMAT_PRINTF,   1, 3 },
921         { "_fwprintf_l",   FORMAT_PRINTF,   1, 3 },
922         { "_printf_s_l",   FORMAT_PRINTF,   0, 2 },
923         { "_wprintf_s_l",  FORMAT_PRINTF,   0, 2 },
924         { "_sprintf_s_l",  FORMAT_PRINTF,   3, 5 },
925         { "_swprintf_s_l", FORMAT_PRINTF,   3, 5 },
926         { "_fprintf_s_l",  FORMAT_PRINTF,   1, 3 },
927         { "_fwprintf_s_l", FORMAT_PRINTF,   1, 3 },
928 };
929
930 void check_format(const call_expression_t *const call)
931 {
932         if (!warning.format)
933                 return;
934
935         const expression_t *const func_expr = call->function;
936         if (func_expr->kind != EXPR_REFERENCE)
937                 return;
938
939         const entity_t        *const entity = func_expr->reference.entity;
940         const call_argument_t *      arg    = call->arguments;
941
942         if (false) {
943                 /* the declaration has a GNU format attribute, check it */
944         } else {
945                 /*
946                  * For some functions we always check the format, even if it was not
947                  * specified. This allows to check format even in MS mode or without
948                  * header included.
949                  */
950                 const char *const name = entity->base.symbol->string;
951                 for (size_t i = 0; i < lengthof(builtin_table); ++i) {
952                         if (strcmp(name, builtin_table[i].name) == 0) {
953                                 switch (builtin_table[i].fmt_kind) {
954                                 case FORMAT_PRINTF:
955                                         check_printf_format(arg, &builtin_table[i]);
956                                         break;
957                                 case FORMAT_SCANF:
958                                         check_scanf_format(arg, &builtin_table[i]);
959                                         break;
960                                 case FORMAT_STRFTIME:
961                                 case FORMAT_STRFMON:
962                                         /* TODO: implement other cases */
963                                         break;
964                                 }
965                                 break;
966                         }
967                 }
968         }
969 }