format checker can now handle normal and wide strings
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2008 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <ctype.h>
21 #include <wctype.h>
22
23 #include "format_check.h"
24 #include "symbol_t.h"
25 #include "ast_t.h"
26 #include "diagnostic.h"
27 #include "types.h"
28 #include "type_t.h"
29 #include "warning.h"
30
31
32 typedef enum format_flag_t {
33         FMT_FLAG_NONE  = 0,
34         FMT_FLAG_HASH  = 1U << 0,
35         FMT_FLAG_ZERO  = 1U << 1,
36         FMT_FLAG_MINUS = 1U << 2,
37         FMT_FLAG_SPACE = 1U << 3,
38         FMT_FLAG_PLUS  = 1U << 4,
39         FMT_FLAG_TICK  = 1U << 5
40 } format_flag_t;
41
42 typedef unsigned format_flags_t;
43
44 typedef enum format_length_modifier_t {
45         FMT_MOD_NONE,
46         FMT_MOD_L,
47         FMT_MOD_hh,
48         FMT_MOD_h,
49         FMT_MOD_l,
50         FMT_MOD_ll,
51         FMT_MOD_j,
52         FMT_MOD_t,
53         FMT_MOD_z,
54         FMT_MOD_q
55 } format_length_modifier_t;
56
57 static const char* get_length_modifier_name(const format_length_modifier_t mod)
58 {
59         static const char* const names[] = {
60                 [FMT_MOD_NONE] = "",
61                 [FMT_MOD_L]    = "L",
62                 [FMT_MOD_hh]   = "hh",
63                 [FMT_MOD_h]    = "h",
64                 [FMT_MOD_l]    = "l",
65                 [FMT_MOD_ll]   = "ll",
66                 [FMT_MOD_j]    = "j",
67                 [FMT_MOD_t]    = "t",
68                 [FMT_MOD_z]    = "z",
69                 [FMT_MOD_q]    = "q"
70         };
71         assert(mod < sizeof(names) / sizeof(*names));
72         return names[mod];
73 }
74
75 static void warn_invalid_length_modifier(const source_position_t pos,
76                                          const format_length_modifier_t mod,
77                                          const wchar_rep_t conversion)
78 {
79         warningf(pos,
80                 "invalid length modifier '%s' for conversion specifier '%%%c'",
81                 get_length_modifier_name(mod), conversion
82         );
83 }
84
85 typedef struct vchar_t vchar_t;
86 struct vchar_t {
87         const void *string;   /**< the string */
88         size_t     position;
89         size_t     size;
90
91         unsigned (*first)(vchar_t *self);
92         unsigned (*next)(vchar_t *self);
93         int (*is_digit)(unsigned vchar);
94 };
95
96 static unsigned string_first(vchar_t *self) {
97         self->position = 0;
98         const string_t *string = self->string;
99         return string->begin[0];
100 }
101
102 static unsigned string_next(vchar_t *self) {
103         ++self->position;
104         const string_t *string = self->string;
105         return string->begin[self->position];
106 }
107
108 static int string_isdigit(unsigned vchar) {
109         return isdigit(vchar);
110 }
111
112 static unsigned wstring_first(vchar_t *self) {
113         self->position = 0;
114         const wide_string_t *wstring = self->string;
115         return wstring->begin[0];
116 }
117
118 static unsigned wstring_next(vchar_t *self) {
119         ++self->position;
120         const wide_string_t *wstring = self->string;
121         return wstring->begin[self->position];
122 }
123
124 static int wstring_isdigit(unsigned vchar) {
125         return iswdigit(vchar);
126 }
127
128 static bool atend(vchar_t *self) {
129         return self->position + 1 == self->size;
130 }
131
132 static void check_format_arguments(const call_argument_t *const fmt_arg, const call_argument_t* arg)
133 {
134         const expression_t *fmt_expr = fmt_arg->expression;
135         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
136                 fmt_expr = fmt_expr->unary.value;
137         }
138
139         vchar_t vchar;
140         if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
141                 vchar.string   = &fmt_expr->wide_string.value;
142                 vchar.size     = fmt_expr->wide_string.value.size;
143                 vchar.first    = wstring_first;
144                 vchar.next     = wstring_next;
145                 vchar.is_digit = wstring_isdigit;
146         } else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
147                 vchar.string   = &fmt_expr->string.value;
148                 vchar.size     = fmt_expr->string.value.size;
149                 vchar.first    = string_first;
150                 vchar.next     = string_next;
151                 vchar.is_digit = string_isdigit;
152         } else {
153                 return;
154         }
155         const source_position_t    pos     = fmt_expr->base.source_position;
156         unsigned fmt = vchar.first(&vchar);
157         for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
158                 if (fmt != '%')
159                         continue;
160                 fmt = vchar.next(&vchar);
161
162                 if (fmt == '%')
163                         continue;
164
165                 format_flags_t fmt_flags = FMT_FLAG_NONE;
166                 if (fmt == '0') {
167                         fmt = vchar.next(&vchar);
168                         fmt_flags |= FMT_FLAG_ZERO;
169                 }
170
171                 /* argument selector or minimum field width */
172                 if (vchar.is_digit(fmt)) {
173                         do {
174                                 fmt = vchar.next(&vchar);
175                         } while (vchar.is_digit(fmt));
176
177                         /* digit string was ... */
178                         if (fmt == '$') {
179                                 /* ... argument selector */
180                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
181                                 /* TODO implement */
182                                 return;
183                         }
184                         /* ... minimum field width */
185                 } else {
186                         /* flags */
187                         for (;;) {
188                                 format_flags_t flag;
189                                 switch (fmt) {
190                                         case '#':  flag = FMT_FLAG_HASH;  break;
191                                         case '0':  flag = FMT_FLAG_ZERO;  break;
192                                         case '-':  flag = FMT_FLAG_MINUS; break;
193                                         case '\'': flag = FMT_FLAG_TICK;  break;
194
195                                         case ' ':
196                                                 if (fmt_flags & FMT_FLAG_PLUS) {
197                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification");
198                                                 }
199                                                 flag = FMT_FLAG_SPACE;
200                                                 break;
201
202                                         case '+':
203                                                 if (fmt_flags & FMT_FLAG_SPACE) {
204                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification");
205                                                 }
206                                                 flag = FMT_FLAG_PLUS;
207                                                 break;
208
209                                         default: goto break_fmt_flags;
210                                 }
211                                 if (fmt_flags & flag) {
212                                         warningf(pos, "repeated flag '%c' in conversion specification", (char)fmt);
213                                 }
214                                 fmt_flags |= flag;
215                                 fmt = vchar.next(&vchar);
216                         }
217 break_fmt_flags:
218
219                         /* minimum field width */
220                         if (fmt == '*') {
221                                 if (arg == NULL) {
222                                         warningf(pos, "missing argument for '*' field width in conversion specification");
223                                         return;
224                                 }
225                                 const type_t *const arg_type = arg->expression->base.type;
226                                 if (arg_type != type_int) {
227                                         warningf(pos, "argument for '*' field width in conversion specification is not an 'int', but an '%T'", arg_type);
228                                 }
229                                 arg = arg->next;
230                         } else {
231                                 while (vchar.is_digit(fmt)) {
232                                         fmt = vchar.next(&vchar);
233                                 }
234                         }
235                 }
236
237                 /* precision */
238                 if (fmt == '.') {
239                         fmt = vchar.next(&vchar);
240                         if (fmt == '*') {
241                                 if (arg == NULL) {
242                                         warningf(pos, "missing argument for '*' precision in conversion specification");
243                                         return;
244                                 }
245                                 const type_t *const arg_type = arg->expression->base.type;
246                                 if (arg_type != type_int) {
247                                         warningf(pos, "argument for '*' precision in conversion specification is not an 'int', but an '%T'", arg_type);
248                                 }
249                                 arg = arg->next;
250                         } else {
251                                 /* digit string may be omitted */
252                                 while (vchar.is_digit(fmt)) {
253                                         fmt = vchar.next(&vchar);
254                                 }
255                         }
256                 }
257
258                 /* length modifier */
259                 format_length_modifier_t fmt_mod;
260                 switch (fmt) {
261                         case 'h':
262                                 fmt = vchar.next(&vchar);
263                                 if (fmt == 'h') {
264                                         fmt = vchar.next(&vchar);
265                                         fmt_mod = FMT_MOD_hh;
266                                 } else {
267                                         fmt_mod = FMT_MOD_h;
268                                 }
269                                 break;
270
271                         case 'l':
272                                 fmt = vchar.next(&vchar);
273                                 if (fmt == 'l') {
274                                         fmt = vchar.next(&vchar);
275                                         fmt_mod = FMT_MOD_ll;
276                                 } else {
277                                         fmt_mod = FMT_MOD_l;
278                                 }
279                                 break;
280
281                         case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
282                         case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
283                         case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
284                         case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
285                         case 'q': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_q;    break;
286                         default:                            fmt_mod = FMT_MOD_NONE; break;
287                 }
288
289                 if (fmt == '\0') {
290                         warningf(pos, "dangling %% in format string");
291                         break;
292                 }
293
294                 const type_t      *expected_type;
295                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
296                 format_flags_t     allowed_flags;
297                 switch (fmt) {
298                         case 'd':
299                         case 'i':
300                                 switch (fmt_mod) {
301                                         case FMT_MOD_NONE: expected_type = type_int;       break;
302                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
303                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
304                                         case FMT_MOD_l:    expected_type = type_long;      break;
305                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
306                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
307                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
308                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
309
310                                         default:
311                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
312                                                 goto next_arg;
313                                 }
314                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
315                                 break;
316
317                         case 'o':
318                         case 'X':
319                         case 'x':
320                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
321                                 goto eval_fmt_mod_unsigned;
322                                 break;
323
324                         case 'u':
325                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
326 eval_fmt_mod_unsigned:
327                                 switch (fmt_mod) {
328                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
329                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
330                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
331                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
332                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
333                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
334                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
335                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
336
337                                         default:
338                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
339                                                 goto next_arg;
340                                 }
341                                 break;
342
343                         case 'A':
344                         case 'a':
345                         case 'E':
346                         case 'e':
347                         case 'F':
348                         case 'f':
349                         case 'G':
350                         case 'g':
351                                 switch (fmt_mod) {
352                                         case FMT_MOD_l:    /* l modifier is ignored */
353                                         case FMT_MOD_NONE: expected_type = type_double;      break;
354                                         case FMT_MOD_L:    expected_type = type_long_double; break;
355
356                                         default:
357                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
358                                                 goto next_arg;
359                                 }
360                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
361                                 break;
362
363                         case 'C':
364                                 if (fmt_mod != FMT_MOD_NONE) {
365                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
366                                         goto next_arg;
367                                 }
368                                 expected_type = type_wchar_t;
369                                 allowed_flags = FMT_FLAG_NONE;
370                                 break;
371
372                         case 'c':
373                                 expected_type = type_int;
374                                 switch (fmt_mod) {
375                                         case FMT_MOD_NONE: expected_type = type_int;    break; /* TODO promoted char */
376                                         case FMT_MOD_l:    expected_type = type_wint_t; break;
377
378                                         default:
379                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
380                                                 goto next_arg;
381                                 }
382                                 allowed_flags = FMT_FLAG_NONE;
383                                 break;
384
385                         case 'S':
386                                 if (fmt_mod != FMT_MOD_NONE) {
387                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
388                                         goto next_arg;
389                                 }
390                                 expected_type = type_wchar_t_ptr;
391                                 expected_qual = TYPE_QUALIFIER_CONST;
392                                 allowed_flags = FMT_FLAG_NONE;
393                                 break;
394
395                         case 's':
396                                 switch (fmt_mod) {
397                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
398                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
399
400                                         default:
401                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
402                                                 goto next_arg;
403                                 }
404                                 expected_qual = TYPE_QUALIFIER_CONST;
405                                 allowed_flags = FMT_FLAG_NONE;
406                                 break;
407
408                         case 'p':
409                                 if (fmt_mod != FMT_MOD_NONE) {
410                                         warn_invalid_length_modifier(pos, fmt_mod, fmt);
411                                         goto next_arg;
412                                 }
413                                 expected_type = type_void_ptr;
414                                 allowed_flags = FMT_FLAG_NONE;
415                                 break;
416
417                         case 'n':
418                                 switch (fmt_mod) {
419                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
420                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
421                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
422                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
423                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
424                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
425                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
426                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
427
428                                         default:
429                                                 warn_invalid_length_modifier(pos, fmt_mod, fmt);
430                                                 goto next_arg;
431                                 }
432                                 allowed_flags = FMT_FLAG_NONE;
433                                 break;
434
435                         default:
436                                 warningf(pos, "encountered unknown conversion specifier '%%%C'", (wint_t)fmt);
437                                 goto next_arg;
438                 }
439
440                 if ((fmt_flags & ~allowed_flags) != 0) {
441                         /* TODO better warning message text */
442                         warningf(pos, "invalid format flags in conversion specification");
443                 }
444
445                 if (arg == NULL) {
446                         warningf(pos, "too few arguments for format string");
447                         return;
448                 }
449
450                 {       /* create a scope here to prevent warning about the jump to next_arg */
451                         type_t *const arg_type = arg->expression->base.type;
452                         if (is_type_pointer(expected_type)) {
453                                 type_t *const arg_skip = skip_typeref(arg_type);
454                                 if (is_type_pointer(arg_skip)) {
455                                         type_t *const exp_to = skip_typeref(expected_type->pointer.points_to);
456                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
457                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
458                                                 get_unqualified_type(arg_to) == exp_to) {
459                                                 goto next_arg;
460                                         }
461                                 }
462                         } else {
463                                 if (get_unqualified_type(skip_typeref(arg_type)) == expected_type) {
464                                         goto next_arg;
465                                 }
466                         }
467                         if (is_type_valid(arg_type)) {
468                                 warningf(pos,
469                                         "argument type '%T' does not match conversion specifier '%%%s%c'",
470                                         arg_type, get_length_modifier_name(fmt_mod), (char)fmt);
471                         }
472                 }
473 next_arg:
474                 arg = arg->next;
475         }
476         if (!atend(&vchar)) {
477                 warningf(pos, "format string contains NUL");
478         }
479         if (arg != NULL) {
480                 warningf(pos, "too many arguments for format string");
481         }
482 }
483
484 void check_format(const call_expression_t *const call)
485 {
486         if (!warning.check_format)
487                 return;
488
489         const expression_t *const func_expr = call->function;
490         if (func_expr->kind != EXPR_REFERENCE)
491                 return;
492
493         const char            *const name = func_expr->reference.symbol->string;
494         const call_argument_t *      arg  = call->arguments;
495         if (strcmp(name, "wprintf") == 0) { /* TODO gammlig */
496                 check_format_arguments(arg, arg->next);
497         } else if (strcmp(name, "printf") == 0) {
498                 check_format_arguments(arg, arg->next);
499         } else if (strcmp(name, "swprintf") == 0) {
500                 arg = arg->next->next; /* skip destination buffer and size */
501                 check_format_arguments(arg, arg->next);
502         } else if (strcmp(name, "sprintf") == 0) {
503                 arg = arg->next->next; /* skip destination buffer and size */
504                 check_format_arguments(arg, arg->next);
505         }
506 }