When doing format string checking handle ?: as format expression, i.e. process the...
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2009 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <ctype.h>
21 #include <wctype.h>
22
23 #include "adt/util.h"
24 #include "format_check.h"
25 #include "symbol_t.h"
26 #include "ast_t.h"
27 #include "entity_t.h"
28 #include "diagnostic.h"
29 #include "types.h"
30 #include "type_t.h"
31 #include "warning.h"
32 #include "lang_features.h"
33
34 typedef enum format_flag_t {
35         FMT_FLAG_NONE  = 0,
36         FMT_FLAG_HASH  = 1U << 0,
37         FMT_FLAG_ZERO  = 1U << 1,
38         FMT_FLAG_MINUS = 1U << 2,
39         FMT_FLAG_SPACE = 1U << 3,
40         FMT_FLAG_PLUS  = 1U << 4,
41         FMT_FLAG_TICK  = 1U << 5
42 } format_flag_t;
43
44 typedef unsigned format_flags_t;
45
46 typedef enum format_length_modifier_t {
47         FMT_MOD_NONE,
48         FMT_MOD_L,
49         FMT_MOD_hh,
50         FMT_MOD_h,
51         FMT_MOD_l,
52         FMT_MOD_ll,
53         FMT_MOD_j,
54         FMT_MOD_t,
55         FMT_MOD_z,
56         FMT_MOD_q,
57         /* only in microsoft mode */
58         FMT_MOD_w,
59         FMT_MOD_I,
60         FMT_MOD_I32,
61         FMT_MOD_I64
62 } format_length_modifier_t;
63
64 typedef struct format_spec_t {
65         const char    *name;     /**< name of the function */
66         format_kind_t  fmt_kind; /**< kind */
67         unsigned       fmt_idx;  /**< index of the format string */
68         unsigned       arg_idx;  /**< index of the first argument */
69 } format_spec_t;
70
71 static const char* get_length_modifier_name(const format_length_modifier_t mod)
72 {
73         static const char* const names[] = {
74                 [FMT_MOD_NONE] = "",
75                 [FMT_MOD_L]    = "L",
76                 [FMT_MOD_hh]   = "hh",
77                 [FMT_MOD_h]    = "h",
78                 [FMT_MOD_l]    = "l",
79                 [FMT_MOD_ll]   = "ll",
80                 [FMT_MOD_j]    = "j",
81                 [FMT_MOD_t]    = "t",
82                 [FMT_MOD_z]    = "z",
83                 [FMT_MOD_q]    = "q",
84                 /* only in microsoft mode */
85                 [FMT_MOD_w]    = "w",
86                 [FMT_MOD_I]    = "I",
87                 [FMT_MOD_I32]  = "I32",
88                 [FMT_MOD_I64]  = "I64"
89         };
90         assert(mod < lengthof(names));
91         return names[mod];
92 }
93
94 static void warn_invalid_length_modifier(const source_position_t *pos,
95                                          const format_length_modifier_t mod,
96                                          const wchar_rep_t conversion)
97 {
98         warningf(pos,
99                 "invalid length modifier '%s' for conversion specifier '%%%c'",
100                 get_length_modifier_name(mod), conversion
101         );
102 }
103
104 typedef struct vchar_t vchar_t;
105 struct vchar_t {
106         const void *string;   /**< the string */
107         size_t     position;  /**< current position */
108         size_t     size;      /**< size of the string */
109
110         /** return the first character of the string and setthe position to 0. */
111         unsigned (*first)(vchar_t *self);
112         /** return the next character of the string */
113         unsigned (*next)(vchar_t *self);
114         /** return non_zero if the given character is a digit */
115         int (*is_digit)(unsigned vchar);
116 };
117
118 static unsigned string_first(vchar_t *self)
119 {
120         self->position = 0;
121         const string_t *string = self->string;
122         return string->begin[0];
123 }
124
125 static unsigned string_next(vchar_t *self)
126 {
127         ++self->position;
128         const string_t *string = self->string;
129         return string->begin[self->position];
130 }
131
132 static int string_isdigit(unsigned vchar)
133 {
134         return isdigit(vchar);
135 }
136
137 static unsigned wstring_first(vchar_t *self)
138 {
139         self->position = 0;
140         const wide_string_t *wstring = self->string;
141         return wstring->begin[0];
142 }
143
144 static unsigned wstring_next(vchar_t *self)
145 {
146         ++self->position;
147         const wide_string_t *wstring = self->string;
148         return wstring->begin[self->position];
149 }
150
151 static int wstring_isdigit(unsigned vchar)
152 {
153         return iswdigit(vchar);
154 }
155
156 static bool atend(vchar_t *self)
157 {
158         return self->position + 1 == self->size;
159 }
160
161 /**
162  * Check printf-style format.
163  */
164 static ssize_t internal_check_printf_format(const expression_t *fmt_expr,
165     const call_argument_t *arg, const format_spec_t *spec)
166 {
167         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
168                 fmt_expr = fmt_expr->unary.value;
169         }
170
171         vchar_t vchar;
172         switch (fmt_expr->kind) {
173                 case EXPR_STRING_LITERAL:
174                         vchar.string   = &fmt_expr->string.value;
175                         vchar.size     = fmt_expr->string.value.size;
176                         vchar.first    = string_first;
177                         vchar.next     = string_next;
178                         vchar.is_digit = string_isdigit;
179                         break;
180
181                 case EXPR_WIDE_STRING_LITERAL:
182                         vchar.string   = &fmt_expr->wide_string.value;
183                         vchar.size     = fmt_expr->wide_string.value.size;
184                         vchar.first    = wstring_first;
185                         vchar.next     = wstring_next;
186                         vchar.is_digit = wstring_isdigit;
187                         break;
188
189                 case EXPR_CONDITIONAL: {
190                         conditional_expression_t const *const c = &fmt_expr->conditional;
191                         expression_t             const *      t = c->true_expression;
192                         if (t == NULL)
193                                 t = c->condition;
194                         ssize_t const nt = internal_check_printf_format(t,                   arg, spec);
195                         ssize_t const nf = internal_check_printf_format(c->false_expression, arg, spec);
196                         return nt > nf ? nt : nf;
197                 }
198
199                 default:
200                         return -1;
201         }
202
203         const source_position_t *pos = &fmt_expr->base.source_position;
204         unsigned fmt     = vchar.first(&vchar);
205         unsigned num_fmt = 0;
206         for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
207                 if (fmt != '%')
208                         continue;
209                 fmt = vchar.next(&vchar);
210
211                 if (fmt == '%')
212                         continue;
213
214                 ++num_fmt;
215
216                 format_flags_t fmt_flags = FMT_FLAG_NONE;
217                 if (fmt == '0') {
218                         fmt = vchar.next(&vchar);
219                         fmt_flags |= FMT_FLAG_ZERO;
220                 }
221
222                 /* argument selector or minimum field width */
223                 if (vchar.is_digit(fmt)) {
224                         do {
225                                 fmt = vchar.next(&vchar);
226                         } while (vchar.is_digit(fmt));
227
228                         /* digit string was ... */
229                         if (fmt == '$') {
230                                 /* ... argument selector */
231                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
232                                 /* TODO implement */
233                                 return -1;
234                         }
235                         /* ... minimum field width */
236                 } else {
237                         /* flags */
238                         for (;;) {
239                                 format_flags_t flag;
240                                 switch (fmt) {
241                                         case '#':  flag = FMT_FLAG_HASH;  break;
242                                         case '0':  flag = FMT_FLAG_ZERO;  break;
243                                         case '-':  flag = FMT_FLAG_MINUS; break;
244                                         case '\'': flag = FMT_FLAG_TICK;  break;
245
246                                         case ' ':
247                                                 if (fmt_flags & FMT_FLAG_PLUS) {
248                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
249                                                 }
250                                                 flag = FMT_FLAG_SPACE;
251                                                 break;
252
253                                         case '+':
254                                                 if (fmt_flags & FMT_FLAG_SPACE) {
255                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
256                                                 }
257                                                 flag = FMT_FLAG_PLUS;
258                                                 break;
259
260                                         default: goto break_fmt_flags;
261                                 }
262                                 if (fmt_flags & flag) {
263                                         warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
264                                 }
265                                 fmt_flags |= flag;
266                                 fmt = vchar.next(&vchar);
267                         }
268 break_fmt_flags:
269
270                         /* minimum field width */
271                         if (fmt == '*') {
272                                 fmt = vchar.next(&vchar);
273                                 if (arg == NULL) {
274                                         warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
275                                         return -1;
276                                 }
277                                 const type_t *const arg_type = arg->expression->base.type;
278                                 if (arg_type != type_int) {
279                                         warningf(pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
280                                 }
281                                 arg = arg->next;
282                         } else {
283                                 while (vchar.is_digit(fmt)) {
284                                         fmt = vchar.next(&vchar);
285                                 }
286                         }
287                 }
288
289                 /* precision */
290                 if (fmt == '.') {
291                         fmt = vchar.next(&vchar);
292                         if (fmt == '*') {
293                                 fmt = vchar.next(&vchar);
294                                 if (arg == NULL) {
295                                         warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
296                                         return -1;
297                                 }
298                                 const type_t *const arg_type = arg->expression->base.type;
299                                 if (arg_type != type_int) {
300                                         warningf(pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
301                                 }
302                                 arg = arg->next;
303                         } else {
304                                 /* digit string may be omitted */
305                                 while (vchar.is_digit(fmt)) {
306                                         fmt = vchar.next(&vchar);
307                                 }
308                         }
309                 }
310
311                 /* length modifier */
312                 format_length_modifier_t fmt_mod;
313                 switch (fmt) {
314                         case 'h':
315                                 fmt = vchar.next(&vchar);
316                                 if (fmt == 'h') {
317                                         fmt = vchar.next(&vchar);
318                                         fmt_mod = FMT_MOD_hh;
319                                 } else {
320                                         fmt_mod = FMT_MOD_h;
321                                 }
322                                 break;
323
324                         case 'l':
325                                 fmt = vchar.next(&vchar);
326                                 if (fmt == 'l') {
327                                         fmt = vchar.next(&vchar);
328                                         fmt_mod = FMT_MOD_ll;
329                                 } else {
330                                         fmt_mod = FMT_MOD_l;
331                                 }
332                                 break;
333
334                         case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
335                         case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
336                         case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
337                         case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
338                         case 'q': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_q;    break;
339                         /* microsoft mode */
340                         case 'w':
341                                 if (c_mode & _MS) {
342                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
343                                 } else {
344                                         fmt_mod = FMT_MOD_NONE;
345                                 }
346                                 break;
347                         case 'I':
348                                 if (c_mode & _MS) {
349                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
350                                         if (fmt == '3') {
351                                                 fmt = vchar.next(&vchar);
352                                                 if (fmt == '2') {
353                                                         fmt = vchar.next(&vchar);
354                                                         fmt_mod = FMT_MOD_I32;
355                                                 } else {
356                                                         /* rewind */
357                                                         --vchar.position;
358                                                 }
359                                         } else if (fmt == '6') {
360                                                 fmt = vchar.next(&vchar);
361                                                 if (fmt == '4') {
362                                                         fmt = vchar.next(&vchar);
363                                                         fmt_mod = FMT_MOD_I64;
364                                                 } else {
365                                                         /* rewind */
366                                                         --vchar.position;
367                                                 }
368                                         }
369                                 } else {
370                                         fmt_mod = FMT_MOD_NONE;
371                                 }
372                                 break;
373                         default:
374                                 fmt_mod = FMT_MOD_NONE;
375                                 break;
376                 }
377
378                 if (fmt == '\0') {
379                         warningf(pos, "dangling %% in format string");
380                         break;
381                 }
382
383                 type_t            *expected_type;
384                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
385                 format_flags_t     allowed_flags;
386                 switch (fmt) {
387                         case 'd':
388                         case 'i':
389                                 switch (fmt_mod) {
390                                         case FMT_MOD_NONE: expected_type = type_int;       break;
391                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
392                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
393                                         case FMT_MOD_l:    expected_type = type_long;      break;
394                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
395                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
396                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
397                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
398                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t; break;
399                                         case FMT_MOD_I32:  expected_type = type_int32;     break;
400                                         case FMT_MOD_I64:  expected_type = type_int64;     break;
401
402                                         default:
403                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
404                                                 goto next_arg;
405                                 }
406                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
407                                 break;
408
409                         case 'o':
410                         case 'X':
411                         case 'x':
412                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
413                                 goto eval_fmt_mod_unsigned;
414
415                         case 'u':
416                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
417 eval_fmt_mod_unsigned:
418                                 switch (fmt_mod) {
419                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
420                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
421                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
422                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
423                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
424                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
425                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
426                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
427                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
428                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
429                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
430
431                                         default:
432                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
433                                                 goto next_arg;
434                                 }
435                                 break;
436
437                         case 'A':
438                         case 'a':
439                         case 'E':
440                         case 'e':
441                         case 'F':
442                         case 'f':
443                         case 'G':
444                         case 'g':
445                                 switch (fmt_mod) {
446                                         case FMT_MOD_l:    /* l modifier is ignored */
447                                         case FMT_MOD_NONE: expected_type = type_double;      break;
448                                         case FMT_MOD_L:    expected_type = type_long_double; break;
449
450                                         default:
451                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
452                                                 goto next_arg;
453                                 }
454                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
455                                 break;
456
457                         case 'C':
458                                 if (fmt_mod != FMT_MOD_NONE) {
459                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
460                                         goto next_arg;
461                                 }
462                                 expected_type = type_wchar_t;
463                                 allowed_flags = FMT_FLAG_NONE;
464                                 break;
465
466                         case 'c':
467                                 expected_type = type_int;
468                                 switch (fmt_mod) {
469                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
470                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
471                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
472
473                                         default:
474                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
475                                                 goto next_arg;
476                                 }
477                                 allowed_flags = FMT_FLAG_NONE;
478                                 break;
479
480                         case 'S':
481                                 if (fmt_mod != FMT_MOD_NONE) {
482                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
483                                         goto next_arg;
484                                 }
485                                 expected_type = type_wchar_t_ptr;
486                                 expected_qual = TYPE_QUALIFIER_CONST;
487                                 allowed_flags = FMT_FLAG_MINUS;
488                                 break;
489
490                         case 's':
491                                 switch (fmt_mod) {
492                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
493                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
494                                         case FMT_MOD_w:    expected_type = type_wchar_t_ptr; break;
495
496                                         default:
497                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
498                                                 goto next_arg;
499                                 }
500                                 expected_qual = TYPE_QUALIFIER_CONST;
501                                 allowed_flags = FMT_FLAG_MINUS;
502                                 break;
503
504                         case 'p':
505                                 if (fmt_mod != FMT_MOD_NONE) {
506                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
507                                         goto next_arg;
508                                 }
509                                 expected_type = type_void_ptr;
510                                 allowed_flags = FMT_FLAG_NONE;
511                                 break;
512
513                         case 'n':
514                                 switch (fmt_mod) {
515                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
516                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
517                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
518                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
519                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
520                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
521                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
522                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
523
524                                         default:
525                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
526                                                 goto next_arg;
527                                 }
528                                 allowed_flags = FMT_FLAG_NONE;
529                                 break;
530
531                         default:
532                                 warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
533                                 if (arg == NULL) {
534                                         warningf(pos, "too few arguments for format string");
535                                         return -1;
536                                 }
537                                 goto next_arg;
538                 }
539
540                 format_flags_t wrong_flags = fmt_flags & ~allowed_flags;
541                 if (wrong_flags != 0) {
542                         char  wrong[8];
543                         char *p = wrong;
544                         if (wrong_flags & FMT_FLAG_HASH)  *p++ = '#';
545                         if (wrong_flags & FMT_FLAG_ZERO)  *p++ = '0';
546                         if (wrong_flags & FMT_FLAG_MINUS) *p++ = '-';
547                         if (wrong_flags & FMT_FLAG_SPACE) *p++ = ' ';
548                         if (wrong_flags & FMT_FLAG_PLUS)  *p++ = '+';
549                         if (wrong_flags & FMT_FLAG_TICK)  *p++ = '\'';
550                         *p = '\0';
551
552                         warningf(pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
553                 }
554
555                 if (arg == NULL) {
556                         warningf(pos, "too few arguments for format string");
557                         return -1;
558                 }
559
560                 {       /* create a scope here to prevent warning about the jump to next_arg */
561                         type_t *const arg_type           = arg->expression->base.type;
562                         type_t *const arg_skip           = skip_typeref(arg_type);
563                         type_t *const expected_type_skip = skip_typeref(expected_type);
564
565                         if (fmt == 'p') {
566                                 /* allow any pointer type for %p, not just void */
567                                 if (is_type_pointer(arg_skip))
568                                         goto next_arg;
569                         }
570
571                         if (is_type_pointer(expected_type_skip)) {
572                                 if (is_type_pointer(arg_skip)) {
573                                         type_t *const exp_to = skip_typeref(expected_type_skip->pointer.points_to);
574                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
575                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
576                                                 get_unqualified_type(arg_to) == exp_to) {
577                                                 goto next_arg;
578                                         }
579                                 }
580                         } else {
581                                 if (get_unqualified_type(arg_skip) == expected_type_skip) {
582                                         goto next_arg;
583                                 }
584                         }
585                         if (is_type_valid(arg_skip)) {
586                                 warningf(pos,
587                                         "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
588                                         arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
589                         }
590                 }
591 next_arg:
592                 arg = arg->next;
593         }
594         if (!atend(&vchar)) {
595                 warningf(pos, "format string contains '\\0'");
596         }
597         return num_fmt;
598 }
599
600 /**
601  * Check printf-style format.
602  */
603 static void check_printf_format(call_argument_t const *arg, format_spec_t const *const spec)
604 {
605         /* find format arg */
606         size_t idx = 0;
607         for (; idx < spec->fmt_idx; ++idx) {
608                 if (arg == NULL)
609                         return;
610                 arg = arg->next;
611         }
612
613         expression_t const *const fmt_expr = arg->expression;
614
615         /* find the real args */
616         for (; idx < spec->arg_idx && arg != NULL; ++idx)
617                 arg = arg->next;
618
619         ssize_t const num_fmt = internal_check_printf_format(fmt_expr, arg, spec);
620         if (num_fmt < 0)
621                 return;
622
623         size_t num_args = 0;
624         for (; arg != NULL; arg = arg->next)
625                 ++num_args;
626         if (num_args > (size_t)num_fmt) {
627                 warningf(&fmt_expr->base.source_position,
628                         "%u argument%s but only %u format specifier%s",
629                         num_args, num_args != 1 ? "s" : "",
630                         num_fmt,  num_fmt  != 1 ? "s" : "");
631         }
632 }
633
634 /**
635  * Check scanf-style format.
636  */
637 static void check_scanf_format(const call_argument_t *arg, const format_spec_t *spec)
638 {
639         /* find format arg */
640         unsigned idx = 0;
641         for (; idx < spec->fmt_idx; ++idx) {
642                 if (arg == NULL)
643                         return;
644                 arg = arg->next;
645         }
646
647         const expression_t *fmt_expr = arg->expression;
648         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
649                 fmt_expr = fmt_expr->unary.value;
650         }
651
652         vchar_t vchar;
653         if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
654                 vchar.string   = &fmt_expr->wide_string.value;
655                 vchar.size     = fmt_expr->wide_string.value.size;
656                 vchar.first    = wstring_first;
657                 vchar.next     = wstring_next;
658                 vchar.is_digit = wstring_isdigit;
659         } else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
660                 vchar.string   = &fmt_expr->string.value;
661                 vchar.size     = fmt_expr->string.value.size;
662                 vchar.first    = string_first;
663                 vchar.next     = string_next;
664                 vchar.is_digit = string_isdigit;
665         } else {
666                 return;
667         }
668         /* find the real args */
669         for (; idx < spec->arg_idx && arg != NULL; ++idx)
670                 arg = arg->next;
671
672         const source_position_t *pos = &fmt_expr->base.source_position;
673         unsigned fmt     = vchar.first(&vchar);
674         unsigned num_fmt = 0;
675         for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
676                 if (fmt != '%')
677                         continue;
678                 fmt = vchar.next(&vchar);
679
680                 if (fmt == '%')
681                         continue;
682
683                 ++num_fmt;
684
685                 /* length modifier */
686                 format_length_modifier_t fmt_mod;
687                 switch (fmt) {
688                         case 'h':
689                                 fmt = vchar.next(&vchar);
690                                 if (fmt == 'h') {
691                                         fmt = vchar.next(&vchar);
692                                         fmt_mod = FMT_MOD_hh;
693                                 } else {
694                                         fmt_mod = FMT_MOD_h;
695                                 }
696                                 break;
697
698                         case 'l':
699                                 fmt = vchar.next(&vchar);
700                                 if (fmt == 'l') {
701                                         fmt = vchar.next(&vchar);
702                                         fmt_mod = FMT_MOD_ll;
703                                 } else {
704                                         fmt_mod = FMT_MOD_l;
705                                 }
706                                 break;
707
708                         case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
709                         case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
710                         case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
711                         case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
712                         /* microsoft mode */
713                         case 'w':
714                                 if (c_mode & _MS) {
715                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
716                                 } else {
717                                         fmt_mod = FMT_MOD_NONE;
718                                 }
719                                 break;
720                         case 'I':
721                                 if (c_mode & _MS) {
722                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
723                                         if (fmt == '3') {
724                                                 fmt = vchar.next(&vchar);
725                                                 if (fmt == '2') {
726                                                         fmt = vchar.next(&vchar);
727                                                         fmt_mod = FMT_MOD_I32;
728                                                 } else {
729                                                         /* rewind */
730                                                         --vchar.position;
731                                                 }
732                                         } else if (fmt == '6') {
733                                                 fmt = vchar.next(&vchar);
734                                                 if (fmt == '4') {
735                                                         fmt = vchar.next(&vchar);
736                                                         fmt_mod = FMT_MOD_I64;
737                                                 } else {
738                                                         /* rewind */
739                                                         --vchar.position;
740                                                 }
741                                         }
742                                 } else {
743                                         fmt_mod = FMT_MOD_NONE;
744                                 }
745                                 break;
746                         default:
747                                 fmt_mod = FMT_MOD_NONE;
748                                 break;
749                 }
750
751                 if (fmt == '\0') {
752                         warningf(pos, "dangling %% in format string");
753                         break;
754                 }
755
756                 type_t            *expected_type;
757                 switch (fmt) {
758                         case 'd':
759                         case 'i':
760                                 switch (fmt_mod) {
761                                         case FMT_MOD_NONE: expected_type = type_int;         break;
762                                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
763                                         case FMT_MOD_h:    expected_type = type_short;       break;
764                                         case FMT_MOD_l:    expected_type = type_long;        break;
765                                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
766                                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
767                                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
768                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
769                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
770                                         case FMT_MOD_I32:  expected_type = type_int32;       break;
771                                         case FMT_MOD_I64:  expected_type = type_int64;       break;
772
773                                         default:
774                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
775                                                 goto next_arg;
776                                 }
777                                 break;
778
779                         case 'o':
780                         case 'X':
781                         case 'x':
782                                 goto eval_fmt_mod_unsigned;
783
784                         case 'u':
785 eval_fmt_mod_unsigned:
786                                 switch (fmt_mod) {
787                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
788                                         case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
789                                         case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
790                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
791                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
792                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
793                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
794                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
795                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
796                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
797                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
798
799                                         default:
800                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
801                                                 goto next_arg;
802                                 }
803                                 break;
804
805                         case 'A':
806                         case 'a':
807                         case 'E':
808                         case 'e':
809                         case 'F':
810                         case 'f':
811                         case 'G':
812                         case 'g':
813                                 switch (fmt_mod) {
814                                         case FMT_MOD_l:    /* l modifier is ignored */
815                                         case FMT_MOD_NONE: expected_type = type_double;      break;
816                                         case FMT_MOD_L:    expected_type = type_long_double; break;
817
818                                         default:
819                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
820                                                 goto next_arg;
821                                 }
822                                 break;
823
824                         case 'C':
825                                 if (fmt_mod != FMT_MOD_NONE) {
826                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
827                                         goto next_arg;
828                                 }
829                                 expected_type = type_wchar_t;
830                                 break;
831
832                         case 'c':
833                                 expected_type = type_int;
834                                 switch (fmt_mod) {
835                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
836                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
837                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
838
839                                         default:
840                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
841                                                 goto next_arg;
842                                 }
843                                 break;
844
845                         case 'S':
846                                 if (fmt_mod != FMT_MOD_NONE) {
847                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
848                                         goto next_arg;
849                                 }
850                                 expected_type = type_wchar_t;
851                                 break;
852
853                         case 's':
854                         case '[':
855                                 switch (fmt_mod) {
856                                         case FMT_MOD_NONE: expected_type = type_char;    break;
857                                         case FMT_MOD_l:    expected_type = type_wchar_t; break;
858                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
859
860                                         default:
861                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
862                                                 goto next_arg;
863                                 }
864                                 break;
865
866                         case 'p':
867                                 if (fmt_mod != FMT_MOD_NONE) {
868                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
869                                         goto next_arg;
870                                 }
871                                 expected_type = type_void_ptr;
872                                 break;
873
874                         case 'n':
875                                 switch (fmt_mod) {
876                                         case FMT_MOD_NONE: expected_type = type_int;         break;
877                                         case FMT_MOD_hh:   expected_type = type_signed_char; break;
878                                         case FMT_MOD_h:    expected_type = type_short;       break;
879                                         case FMT_MOD_l:    expected_type = type_long;        break;
880                                         case FMT_MOD_ll:   expected_type = type_long_long;   break;
881                                         case FMT_MOD_j:    expected_type = type_intmax_t;    break;
882                                         case FMT_MOD_z:    expected_type = type_ssize_t;     break;
883                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
884
885                                         default:
886                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
887                                                 goto next_arg;
888                                 }
889                                 break;
890
891                         default:
892                                 warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
893                                 if (arg == NULL) {
894                                         warningf(pos, "too few arguments for format string");
895                                         return;
896                                 }
897                                 goto next_arg;
898                 }
899
900                 if (arg == NULL) {
901                         warningf(pos, "too few arguments for format string");
902                         return;
903                 }
904
905                 {       /* create a scope here to prevent warning about the jump to next_arg */
906                         type_t *const arg_type           = arg->expression->base.type;
907                         type_t *const arg_skip           = skip_typeref(arg_type);
908                         type_t *const expected_type_skip = skip_typeref(expected_type);
909
910                         if (! is_type_pointer(arg_skip))
911                                 goto error_arg_type;
912                         type_t *const ptr_skip = skip_typeref(arg_skip->pointer.points_to);
913
914                         if (fmt == 'p') {
915                                 /* allow any pointer type for %p, not just void */
916                                 if (is_type_pointer(ptr_skip))
917                                         goto next_arg;
918                         }
919
920                         /* do NOT allow const or restrict, all other should be ok */
921                         if (ptr_skip->base.qualifiers & (TYPE_QUALIFIER_CONST | TYPE_QUALIFIER_VOLATILE))
922                                 goto error_arg_type;
923                         type_t *const unqual_ptr = get_unqualified_type(ptr_skip);
924                         if (unqual_ptr == expected_type_skip) {
925                                 goto next_arg;
926                         } else if (expected_type_skip == type_char) {
927                                 /* char matches with unsigned char AND signed char */
928                                 if (unqual_ptr == type_signed_char || unqual_ptr == type_unsigned_char)
929                                         goto next_arg;
930                         }
931 error_arg_type:
932                         if (is_type_valid(arg_skip)) {
933                                 warningf(pos,
934                                         "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
935                                         arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
936                         }
937                 }
938 next_arg:
939                 arg = arg->next;
940         }
941         if (!atend(&vchar)) {
942                 warningf(pos, "format string contains '\\0'");
943         }
944         if (arg != NULL) {
945                 unsigned num_args = num_fmt;
946                 while (arg != NULL) {
947                         ++num_args;
948                         arg = arg->next;
949                 }
950                 warningf(pos, "%u argument%s but only %u format specifier%s",
951                         num_args, num_args != 1 ? "s" : "",
952                         num_fmt, num_fmt != 1 ? "s" : "");
953         }
954 }
955
956 static const format_spec_t builtin_table[] = {
957         { "printf",        FORMAT_PRINTF,   0, 1 },
958         { "wprintf",       FORMAT_PRINTF,   0, 1 },
959         { "sprintf",       FORMAT_PRINTF,   1, 2 },
960         { "swprintf",      FORMAT_PRINTF,   1, 2 },
961         { "snprintf",      FORMAT_PRINTF,   2, 3 },
962         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
963         { "fprintf",       FORMAT_PRINTF,   1, 2 },
964         { "fwprintf",      FORMAT_PRINTF,   1, 2 },
965         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
966         { "snwprintf",     FORMAT_PRINTF,   2, 3 },
967
968         { "scanf",         FORMAT_SCANF,    0, 1 },
969         { "wscanf",        FORMAT_SCANF,    0, 1 },
970         { "sscanf",        FORMAT_SCANF,    1, 2 },
971         { "swscanf",       FORMAT_SCANF,    1, 2 },
972         { "fscanf",        FORMAT_SCANF,    1, 2 },
973         { "fwscanf",       FORMAT_SCANF,    1, 2 },
974
975         { "strftime",      FORMAT_STRFTIME, 3, 4 },
976         { "wcstrftime",    FORMAT_STRFTIME, 3, 4 },
977
978         { "strfmon",       FORMAT_STRFMON,  3, 4 },
979
980         /* MS extensions */
981         { "_snprintf",     FORMAT_PRINTF,   2, 3 },
982         { "_snwprintf",    FORMAT_PRINTF,   2, 3 },
983         { "_scrintf",      FORMAT_PRINTF,   0, 1 },
984         { "_scwprintf",    FORMAT_PRINTF,   0, 1 },
985         { "printf_s",      FORMAT_PRINTF,   0, 1 },
986         { "wprintf_s",     FORMAT_PRINTF,   0, 1 },
987         { "sprintf_s",     FORMAT_PRINTF,   3, 4 },
988         { "swprintf_s",    FORMAT_PRINTF,   3, 4 },
989         { "fprintf_s",     FORMAT_PRINTF,   1, 2 },
990         { "fwprintf_s",    FORMAT_PRINTF,   1, 2 },
991         { "_sprintf_l",    FORMAT_PRINTF,   1, 3 },
992         { "_swprintf_l",   FORMAT_PRINTF,   1, 3 },
993         { "_printf_l",     FORMAT_PRINTF,   0, 2 },
994         { "_wprintf_l",    FORMAT_PRINTF,   0, 2 },
995         { "_fprintf_l",    FORMAT_PRINTF,   1, 3 },
996         { "_fwprintf_l",   FORMAT_PRINTF,   1, 3 },
997         { "_printf_s_l",   FORMAT_PRINTF,   0, 2 },
998         { "_wprintf_s_l",  FORMAT_PRINTF,   0, 2 },
999         { "_sprintf_s_l",  FORMAT_PRINTF,   3, 5 },
1000         { "_swprintf_s_l", FORMAT_PRINTF,   3, 5 },
1001         { "_fprintf_s_l",  FORMAT_PRINTF,   1, 3 },
1002         { "_fwprintf_s_l", FORMAT_PRINTF,   1, 3 },
1003 };
1004
1005 void check_format(const call_expression_t *const call)
1006 {
1007         if (!warning.format)
1008                 return;
1009
1010         const expression_t *const func_expr = call->function;
1011         if (func_expr->kind != EXPR_REFERENCE)
1012                 return;
1013
1014         const entity_t        *const entity = func_expr->reference.entity;
1015         const call_argument_t *      arg    = call->arguments;
1016
1017         if (false) {
1018                 /* the declaration has a GNU format attribute, check it */
1019         } else {
1020                 /*
1021                  * For some functions we always check the format, even if it was not specified.
1022                  * This allows to check format even in MS mode or without header included.
1023                  */
1024                 const char *const name = entity->base.symbol->string;
1025                 for (size_t i = 0; i < lengthof(builtin_table); ++i) {
1026                         if (strcmp(name, builtin_table[i].name) == 0) {
1027                                 switch (builtin_table[i].fmt_kind) {
1028                                 case FORMAT_PRINTF:
1029                                         check_printf_format(arg, &builtin_table[i]);
1030                                         break;
1031                                 case FORMAT_SCANF:
1032                                         check_scanf_format(arg, &builtin_table[i]);
1033                                         break;
1034                                 case FORMAT_STRFTIME:
1035                                 case FORMAT_STRFMON:
1036                                         /* TODO: implement other cases */
1037                                         break;
1038                                 }
1039                                 break;
1040                         }
1041                 }
1042         }
1043 }