recognize microsoft format specifiers in MS mode
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2008 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <ctype.h>
21 #include <wctype.h>
22
23 #include "format_check.h"
24 #include "symbol_t.h"
25 #include "ast_t.h"
26 #include "diagnostic.h"
27 #include "types.h"
28 #include "type_t.h"
29 #include "warning.h"
30 #include "lang_features.h"
31
32 typedef enum format_flag_t {
33         FMT_FLAG_NONE  = 0,
34         FMT_FLAG_HASH  = 1U << 0,
35         FMT_FLAG_ZERO  = 1U << 1,
36         FMT_FLAG_MINUS = 1U << 2,
37         FMT_FLAG_SPACE = 1U << 3,
38         FMT_FLAG_PLUS  = 1U << 4,
39         FMT_FLAG_TICK  = 1U << 5
40 } format_flag_t;
41
42 typedef unsigned format_flags_t;
43
44 typedef enum format_length_modifier_t {
45         FMT_MOD_NONE,
46         FMT_MOD_L,
47         FMT_MOD_hh,
48         FMT_MOD_h,
49         FMT_MOD_l,
50         FMT_MOD_ll,
51         FMT_MOD_j,
52         FMT_MOD_t,
53         FMT_MOD_z,
54         FMT_MOD_q,
55         /* only in microsoft mode */
56         FMT_MOD_w,
57         FMT_MOD_I,
58         FMT_MOD_I32,
59         FMT_MOD_I64
60 } format_length_modifier_t;
61
62 static const char* get_length_modifier_name(const format_length_modifier_t mod)
63 {
64         static const char* const names[] = {
65                 [FMT_MOD_NONE] = "",
66                 [FMT_MOD_L]    = "L",
67                 [FMT_MOD_hh]   = "hh",
68                 [FMT_MOD_h]    = "h",
69                 [FMT_MOD_l]    = "l",
70                 [FMT_MOD_ll]   = "ll",
71                 [FMT_MOD_j]    = "j",
72                 [FMT_MOD_t]    = "t",
73                 [FMT_MOD_z]    = "z",
74                 [FMT_MOD_q]    = "q",
75                 /* only in microsoft mode */
76                 [FMT_MOD_w]    = "w",
77                 [FMT_MOD_I]    = "I",
78                 [FMT_MOD_I32]  = "I32",
79                 [FMT_MOD_I64]  = "I64"
80         };
81         assert(mod < sizeof(names) / sizeof(*names));
82         return names[mod];
83 }
84
85 static void warn_invalid_length_modifier(const source_position_t pos,
86                                          const format_length_modifier_t mod,
87                                          const wchar_rep_t conversion)
88 {
89         warningf(pos,
90                 "invalid length modifier '%s' for conversion specifier '%%%c'",
91                 get_length_modifier_name(mod), conversion
92         );
93 }
94
95 typedef struct vchar_t vchar_t;
96 struct vchar_t {
97         const void *string;   /**< the string */
98         size_t     position;  /**< current position */
99         size_t     size;      /**< size of the string */
100
101         /** return the first character of the string and setthe position to 0. */
102         unsigned (*first)(vchar_t *self);
103         /** return the next character of the string */
104         unsigned (*next)(vchar_t *self);
105         /** return non_zero if the given character is a digit */
106         int (*is_digit)(unsigned vchar);
107 };
108
109 static unsigned string_first(vchar_t *self) {
110         self->position = 0;
111         const string_t *string = self->string;
112         return string->begin[0];
113 }
114
115 static unsigned string_next(vchar_t *self) {
116         ++self->position;
117         const string_t *string = self->string;
118         return string->begin[self->position];
119 }
120
121 static int string_isdigit(unsigned vchar) {
122         return isdigit(vchar);
123 }
124
125 static unsigned wstring_first(vchar_t *self) {
126         self->position = 0;
127         const wide_string_t *wstring = self->string;
128         return wstring->begin[0];
129 }
130
131 static unsigned wstring_next(vchar_t *self) {
132         ++self->position;
133         const wide_string_t *wstring = self->string;
134         return wstring->begin[self->position];
135 }
136
137 static int wstring_isdigit(unsigned vchar) {
138         return iswdigit(vchar);
139 }
140
141 static bool atend(vchar_t *self) {
142         return self->position + 1 == self->size;
143 }
144
145 static void check_format_arguments(const call_argument_t *const fmt_arg, const call_argument_t* arg)
146 {
147         const expression_t *fmt_expr = fmt_arg->expression;
148         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
149                 fmt_expr = fmt_expr->unary.value;
150         }
151
152         vchar_t vchar;
153         if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
154                 vchar.string   = &fmt_expr->wide_string.value;
155                 vchar.size     = fmt_expr->wide_string.value.size;
156                 vchar.first    = wstring_first;
157                 vchar.next     = wstring_next;
158                 vchar.is_digit = wstring_isdigit;
159         } else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
160                 vchar.string   = &fmt_expr->string.value;
161                 vchar.size     = fmt_expr->string.value.size;
162                 vchar.first    = string_first;
163                 vchar.next     = string_next;
164                 vchar.is_digit = string_isdigit;
165         } else {
166                 return;
167         }
168         const source_position_t    pos     = fmt_expr->base.source_position;
169         unsigned fmt = vchar.first(&vchar);
170         for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
171                 if (fmt != '%')
172                         continue;
173                 fmt = vchar.next(&vchar);
174
175                 if (fmt == '%')
176                         continue;
177
178                 format_flags_t fmt_flags = FMT_FLAG_NONE;
179                 if (fmt == '0') {
180                         fmt = vchar.next(&vchar);
181                         fmt_flags |= FMT_FLAG_ZERO;
182                 }
183
184                 /* argument selector or minimum field width */
185                 if (vchar.is_digit(fmt)) {
186                         do {
187                                 fmt = vchar.next(&vchar);
188                         } while (vchar.is_digit(fmt));
189
190                         /* digit string was ... */
191                         if (fmt == '$') {
192                                 /* ... argument selector */
193                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
194                                 /* TODO implement */
195                                 return;
196                         }
197                         /* ... minimum field width */
198                 } else {
199                         /* flags */
200                         for (;;) {
201                                 format_flags_t flag;
202                                 switch (fmt) {
203                                         case '#':  flag = FMT_FLAG_HASH;  break;
204                                         case '0':  flag = FMT_FLAG_ZERO;  break;
205                                         case '-':  flag = FMT_FLAG_MINUS; break;
206                                         case '\'': flag = FMT_FLAG_TICK;  break;
207
208                                         case ' ':
209                                                 if (fmt_flags & FMT_FLAG_PLUS) {
210                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification");
211                                                 }
212                                                 flag = FMT_FLAG_SPACE;
213                                                 break;
214
215                                         case '+':
216                                                 if (fmt_flags & FMT_FLAG_SPACE) {
217                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification");
218                                                 }
219                                                 flag = FMT_FLAG_PLUS;
220                                                 break;
221
222                                         default: goto break_fmt_flags;
223                                 }
224                                 if (fmt_flags & flag) {
225                                         warningf(pos, "repeated flag '%c' in conversion specification", (char)fmt);
226                                 }
227                                 fmt_flags |= flag;
228                                 fmt = vchar.next(&vchar);
229                         }
230 break_fmt_flags:
231
232                         /* minimum field width */
233                         if (fmt == '*') {
234                                 if (arg == NULL) {
235                                         warningf(pos, "missing argument for '*' field width in conversion specification");
236                                         return;
237                                 }
238                                 const type_t *const arg_type = arg->expression->base.type;
239                                 if (arg_type != type_int) {
240                                         warningf(pos, "argument for '*' field width in conversion specification is not an 'int', but an '%T'", arg_type);
241                                 }
242                                 arg = arg->next;
243                         } else {
244                                 while (vchar.is_digit(fmt)) {
245                                         fmt = vchar.next(&vchar);
246                                 }
247                         }
248                 }
249
250                 /* precision */
251                 if (fmt == '.') {
252                         fmt = vchar.next(&vchar);
253                         if (fmt == '*') {
254                                 if (arg == NULL) {
255                                         warningf(pos, "missing argument for '*' precision in conversion specification");
256                                         return;
257                                 }
258                                 const type_t *const arg_type = arg->expression->base.type;
259                                 if (arg_type != type_int) {
260                                         warningf(pos, "argument for '*' precision in conversion specification is not an 'int', but an '%T'", arg_type);
261                                 }
262                                 arg = arg->next;
263                         } else {
264                                 /* digit string may be omitted */
265                                 while (vchar.is_digit(fmt)) {
266                                         fmt = vchar.next(&vchar);
267                                 }
268                         }
269                 }
270
271                 /* length modifier */
272                 format_length_modifier_t fmt_mod;
273                 switch (fmt) {
274                         case 'h':
275                                 fmt = vchar.next(&vchar);
276                                 if (fmt == 'h') {
277                                         fmt = vchar.next(&vchar);
278                                         fmt_mod = FMT_MOD_hh;
279                                 } else {
280                                         fmt_mod = FMT_MOD_h;
281                                 }
282                                 break;
283
284                         case 'l':
285                                 fmt = vchar.next(&vchar);
286                                 if (fmt == 'l') {
287                                         fmt = vchar.next(&vchar);
288                                         fmt_mod = FMT_MOD_ll;
289                                 } else {
290                                         fmt_mod = FMT_MOD_l;
291                                 }
292                                 break;
293
294                         case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
295                         case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
296                         case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
297                         case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
298                         case 'q': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_q;    break;
299                         /* microsoft mode */
300                         case 'w':
301                                 if (c_mode & _MS) {
302                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
303                                 } else {
304                                         fmt_mod = FMT_MOD_NONE;
305                                 }
306                                 break;
307                         case 'I':
308                                 if (c_mode & _MS) {
309                                         fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
310                                         if (fmt == '3') {
311                                                 fmt = vchar.next(&vchar);
312                                                 if (fmt == '2') {
313                                                         fmt = vchar.next(&vchar);
314                                                         fmt_mod = FMT_MOD_I32;
315                                                 } else {
316                                                         /* rewind */
317                                                         --vchar.position;
318                                                 }
319                                         } else if (fmt == '6') {
320                                                 fmt = vchar.next(&vchar);
321                                                 if (fmt == '4') {
322                                                         fmt = vchar.next(&vchar);
323                                                         fmt_mod = FMT_MOD_I64;
324                                                 } else {
325                                                         /* rewind */
326                                                         --vchar.position;
327                                                 }
328                                         }
329                                 } else {
330                                         fmt_mod = FMT_MOD_NONE;
331                                 }
332                                 break;
333                         default:
334                                 fmt_mod = FMT_MOD_NONE;
335                                 break;
336                 }
337
338                 if (fmt == '\0') {
339                         warningf(pos, "dangling %% in format string");
340                         break;
341                 }
342
343                 const type_t      *expected_type;
344                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
345                 format_flags_t     allowed_flags;
346                 switch (fmt) {
347                         case 'd':
348                         case 'i':
349                                 switch (fmt_mod) {
350                                         case FMT_MOD_NONE: expected_type = type_int;       break;
351                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
352                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
353                                         case FMT_MOD_l:    expected_type = type_long;      break;
354                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
355                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
356                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
357                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
358                                         case FMT_MOD_I:    expected_type = type_ptrdiff_t; break;
359                                         case FMT_MOD_I32:  expected_type = type_int32;     break;
360                                         case FMT_MOD_I64:  expected_type = type_int64;     break;
361
362                                         default:
363                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
364                                                 goto next_arg;
365                                 }
366                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
367                                 break;
368
369                         case 'o':
370                         case 'X':
371                         case 'x':
372                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
373                                 goto eval_fmt_mod_unsigned;
374                                 break;
375
376                         case 'u':
377                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
378 eval_fmt_mod_unsigned:
379                                 switch (fmt_mod) {
380                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
381                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
382                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
383                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
384                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
385                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
386                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
387                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
388                                         case FMT_MOD_I:    expected_type = type_size_t;             break;
389                                         case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
390                                         case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
391
392                                         default:
393                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
394                                                 goto next_arg;
395                                 }
396                                 break;
397
398                         case 'A':
399                         case 'a':
400                         case 'E':
401                         case 'e':
402                         case 'F':
403                         case 'f':
404                         case 'G':
405                         case 'g':
406                                 switch (fmt_mod) {
407                                         case FMT_MOD_l:    /* l modifier is ignored */
408                                         case FMT_MOD_NONE: expected_type = type_double;      break;
409                                         case FMT_MOD_L:    expected_type = type_long_double; break;
410
411                                         default:
412                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
413                                                 goto next_arg;
414                                 }
415                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
416                                 break;
417
418                         case 'C':
419                                 if (fmt_mod != FMT_MOD_NONE) {
420                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
421                                         goto next_arg;
422                                 }
423                                 expected_type = type_wchar_t;
424                                 allowed_flags = FMT_FLAG_NONE;
425                                 break;
426
427                         case 'c':
428                                 expected_type = type_int;
429                                 switch (fmt_mod) {
430                                         case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
431                                         case FMT_MOD_l:    expected_type = type_wint_t;  break;
432                                         case FMT_MOD_w:    expected_type = type_wchar_t; break;
433
434                                         default:
435                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
436                                                 goto next_arg;
437                                 }
438                                 allowed_flags = FMT_FLAG_NONE;
439                                 break;
440
441                         case 'S':
442                                 if (fmt_mod != FMT_MOD_NONE) {
443                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
444                                         goto next_arg;
445                                 }
446                                 expected_type = type_wchar_t_ptr;
447                                 expected_qual = TYPE_QUALIFIER_CONST;
448                                 allowed_flags = FMT_FLAG_NONE;
449                                 break;
450
451                         case 's':
452                                 switch (fmt_mod) {
453                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
454                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
455                                         case FMT_MOD_w:    expected_type = type_wchar_t_ptr; break;
456
457                                         default:
458                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
459                                                 goto next_arg;
460                                 }
461                                 expected_qual = TYPE_QUALIFIER_CONST;
462                                 allowed_flags = FMT_FLAG_NONE;
463                                 break;
464
465                         case 'p':
466                                 if (fmt_mod != FMT_MOD_NONE) {
467                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
468                                         goto next_arg;
469                                 }
470                                 expected_type = type_void_ptr;
471                                 allowed_flags = FMT_FLAG_NONE;
472                                 break;
473
474                         case 'n':
475                                 switch (fmt_mod) {
476                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
477                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
478                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
479                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
480                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
481                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
482                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
483                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
484
485                                         default:
486                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
487                                                 goto next_arg;
488                                 }
489                                 allowed_flags = FMT_FLAG_NONE;
490                                 break;
491
492                         default:
493                                 warningf(pos, "encountered unknown conversion specifier '%%%C'", (wint_t)fmt);
494                                 goto next_arg;
495                 }
496
497                 if ((fmt_flags & ~allowed_flags) != 0) {
498                         /* TODO better warning message text */
499                         warningf(pos, "invalid format flags in conversion specification");
500                 }
501
502                 if (arg == NULL) {
503                         warningf(pos, "too few arguments for format string");
504                         return;
505                 }
506
507                 {       /* create a scope here to prevent warning about the jump to next_arg */
508                         type_t *const arg_type = arg->expression->base.type;
509                         if (is_type_pointer(expected_type)) {
510                                 type_t *const arg_skip = skip_typeref(arg_type);
511                                 if (is_type_pointer(arg_skip)) {
512                                         type_t *const exp_to = skip_typeref(expected_type->pointer.points_to);
513                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
514                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
515                                                 get_unqualified_type(arg_to) == exp_to) {
516                                                 goto next_arg;
517                                         }
518                                 }
519                         } else {
520                                 if (get_unqualified_type(skip_typeref(arg_type)) == expected_type) {
521                                         goto next_arg;
522                                 }
523                         }
524                         if (is_type_valid(arg_type)) {
525                                 warningf(pos,
526                                         "argument type '%T' does not match conversion specifier '%%%s%c'",
527                                         arg_type, get_length_modifier_name(fmt_mod), (char)fmt);
528                         }
529                 }
530 next_arg:
531                 arg = arg->next;
532         }
533         if (!atend(&vchar)) {
534                 warningf(pos, "format string contains NUL");
535         }
536         if (arg != NULL) {
537                 warningf(pos, "too many arguments for format string");
538         }
539 }
540
541 void check_format(const call_expression_t *const call)
542 {
543         if (!warning.check_format)
544                 return;
545
546         const expression_t *const func_expr = call->function;
547         if (func_expr->kind != EXPR_REFERENCE)
548                 return;
549
550         const char            *const name = func_expr->reference.symbol->string;
551         const call_argument_t *      arg  = call->arguments;
552         if (strcmp(name, "wprintf") == 0) { /* TODO gammlig */
553                 check_format_arguments(arg, arg->next);
554         } else if (strcmp(name, "printf") == 0) {
555                 check_format_arguments(arg, arg->next);
556         } else if (strcmp(name, "swprintf") == 0) {
557                 arg = arg->next->next; /* skip destination buffer and size */
558                 check_format_arguments(arg, arg->next);
559         } else if (strcmp(name, "sprintf") == 0) {
560                 arg = arg->next->next; /* skip destination buffer and size */
561                 check_format_arguments(arg, arg->next);
562         }
563 }