some refactoring in preparation for a preprocessor
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2008 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <wctype.h>
21
22 #include "format_check.h"
23 #include "symbol_t.h"
24 #include "ast_t.h"
25 #include "diagnostic.h"
26 #include "types.h"
27 #include "type_t.h"
28 #include "warning.h"
29
30
31 typedef enum format_flag_t {
32         FMT_FLAG_NONE  = 0,
33         FMT_FLAG_HASH  = 1U << 0,
34         FMT_FLAG_ZERO  = 1U << 1,
35         FMT_FLAG_MINUS = 1U << 2,
36         FMT_FLAG_SPACE = 1U << 3,
37         FMT_FLAG_PLUS  = 1U << 4,
38         FMT_FLAG_TICK  = 1U << 5
39 } format_flag_t;
40
41 typedef unsigned format_flags_t;
42
43 typedef enum format_length_modifier_t {
44         FMT_MOD_NONE,
45         FMT_MOD_L,
46         FMT_MOD_hh,
47         FMT_MOD_h,
48         FMT_MOD_l,
49         FMT_MOD_ll,
50         FMT_MOD_j,
51         FMT_MOD_t,
52         FMT_MOD_z,
53         FMT_MOD_q
54 } format_length_modifier_t;
55
56 static const char* get_length_modifier_name(const format_length_modifier_t mod)
57 {
58         static const char* const names[] = {
59                 [FMT_MOD_NONE] = "",
60                 [FMT_MOD_L]    = "L",
61                 [FMT_MOD_hh]   = "hh",
62                 [FMT_MOD_h]    = "h",
63                 [FMT_MOD_l]    = "l",
64                 [FMT_MOD_ll]   = "ll",
65                 [FMT_MOD_j]    = "j",
66                 [FMT_MOD_t]    = "t",
67                 [FMT_MOD_z]    = "z",
68                 [FMT_MOD_q]    = "q"
69         };
70         assert(mod < sizeof(names) / sizeof(*names));
71         return names[mod];
72 }
73
74 static void warn_invalid_length_modifier(const source_position_t pos,
75                                          const format_length_modifier_t mod,
76                                          const wchar_rep_t conversion)
77 {
78         warningf(pos,
79                 "invalid length modifier '%s' for conversion specifier '%%%c'",
80                 get_length_modifier_name(mod), conversion
81         );
82 }
83
84 static void check_format_arguments(const call_argument_t *const fmt_arg, const call_argument_t* arg)
85 {
86         const expression_t *fmt_expr = fmt_arg->expression;
87         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
88                 fmt_expr = fmt_expr->unary.value;
89         }
90
91         if (fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
92                 return;
93
94         const source_position_t    pos     = fmt_expr->base.source_position;
95         const wide_string_t *const wstring = &fmt_expr->wide_string.value;
96         const wchar_rep_t *fmt = wstring->begin;
97         for (; *fmt != '\0'; ++fmt) {
98                 if (*fmt != '%')
99                         continue;
100                 ++fmt;
101
102                 if (*fmt == '%')
103                         continue;
104
105                 format_flags_t fmt_flags = FMT_FLAG_NONE;
106                 if (*fmt == '0') {
107                         ++fmt;
108                         fmt_flags |= FMT_FLAG_ZERO;
109                 }
110
111                 /* argument selector or minimum field width */
112                 if (iswdigit(*fmt)) {
113                         do {
114                                 ++fmt;
115                         } while (iswdigit(*fmt));
116
117                         /* digit string was ... */
118                         if (*fmt == '$') {
119                                 /* ... argument selector */
120                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
121                                 /* TODO implement */
122                                 return;
123                         }
124                         /* ... minimum field width */
125                 } else {
126                         /* flags */
127                         for (;;) {
128                                 format_flags_t flag;
129                                 switch (*fmt) {
130                                         case '#':  flag = FMT_FLAG_HASH;  break;
131                                         case '0':  flag = FMT_FLAG_ZERO;  break;
132                                         case '-':  flag = FMT_FLAG_MINUS; break;
133                                         case '\'': flag = FMT_FLAG_TICK;  break;
134
135                                         case ' ':
136                                                 if (fmt_flags & FMT_FLAG_PLUS) {
137                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification");
138                                                 }
139                                                 flag = FMT_FLAG_SPACE;
140                                                 break;
141
142                                         case '+':
143                                                 if (fmt_flags & FMT_FLAG_SPACE) {
144                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification");
145                                                 }
146                                                 flag = FMT_FLAG_PLUS;
147                                                 break;
148
149                                         default: goto break_fmt_flags;
150                                 }
151                                 if (fmt_flags & flag) {
152                                         warningf(pos, "repeated flag '%c' in conversion specification", (char)*fmt);
153                                 }
154                                 fmt_flags |= flag;
155                                 ++fmt;
156                         }
157 break_fmt_flags:
158
159                         /* minimum field width */
160                         if (*fmt == '*') {
161                                 if (arg == NULL) {
162                                         warningf(pos, "missing argument for '*' field width in conversion specification");
163                                         return;
164                                 }
165                                 const type_t *const arg_type = arg->expression->base.type;
166                                 if (arg_type != type_int) {
167                                         warningf(pos, "argument for '*' field width in conversion specification is not an 'int', but an '%T'", arg_type);
168                                 }
169                                 arg = arg->next;
170                         } else {
171                                 while (iswdigit(*fmt)) {
172                                         ++fmt;
173                                 }
174                         }
175                 }
176
177                 /* precision */
178                 if (*fmt == '.') {
179                         ++fmt;
180                         if (*fmt == '*') {
181                                 if (arg == NULL) {
182                                         warningf(pos, "missing argument for '*' precision in conversion specification");
183                                         return;
184                                 }
185                                 const type_t *const arg_type = arg->expression->base.type;
186                                 if (arg_type != type_int) {
187                                         warningf(pos, "argument for '*' precision in conversion specification is not an 'int', but an '%T'", arg_type);
188                                 }
189                                 arg = arg->next;
190                         } else {
191                                 /* digit string may be omitted */
192                                 while (iswdigit(*fmt)) {
193                                         ++fmt;
194                                 }
195                         }
196                 }
197
198                 /* length modifier */
199                 format_length_modifier_t fmt_mod;
200                 switch (*fmt) {
201                         case 'h':
202                                 ++fmt;
203                                 if (*fmt == 'h') {
204                                         ++fmt;
205                                         fmt_mod = FMT_MOD_hh;
206                                 } else {
207                                         fmt_mod = FMT_MOD_h;
208                                 }
209                                 break;
210
211                         case 'l':
212                                 ++fmt;
213                                 if (*fmt == 'l') {
214                                         ++fmt;
215                                         fmt_mod = FMT_MOD_ll;
216                                 } else {
217                                         fmt_mod = FMT_MOD_l;
218                                 }
219                                 break;
220
221                         case 'L': ++fmt; fmt_mod = FMT_MOD_L;    break;
222                         case 'j': ++fmt; fmt_mod = FMT_MOD_j;    break;
223                         case 't': ++fmt; fmt_mod = FMT_MOD_t;    break;
224                         case 'z': ++fmt; fmt_mod = FMT_MOD_z;    break;
225                         case 'q': ++fmt; fmt_mod = FMT_MOD_q;    break;
226                         default:         fmt_mod = FMT_MOD_NONE; break;
227                 }
228
229                 if (*fmt == '\0') {
230                         warningf(pos, "dangling %% in format string");
231                         break;
232                 }
233
234                 const type_t      *expected_type;
235                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
236                 format_flags_t     allowed_flags;
237                 switch (*fmt) {
238                         case 'd':
239                         case 'i':
240                                 switch (fmt_mod) {
241                                         case FMT_MOD_NONE: expected_type = type_int;       break;
242                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
243                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
244                                         case FMT_MOD_l:    expected_type = type_long;      break;
245                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
246                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
247                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
248                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
249
250                                         default:
251                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
252                                                 goto next_arg;
253                                 }
254                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
255                                 break;
256
257                         case 'o':
258                         case 'X':
259                         case 'x':
260                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
261                                 goto eval_fmt_mod_unsigned;
262                                 break;
263
264                         case 'u':
265                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
266 eval_fmt_mod_unsigned:
267                                 switch (fmt_mod) {
268                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
269                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
270                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
271                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
272                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
273                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
274                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
275                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
276
277                                         default:
278                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
279                                                 goto next_arg;
280                                 }
281                                 break;
282
283                         case 'A':
284                         case 'a':
285                         case 'E':
286                         case 'e':
287                         case 'F':
288                         case 'f':
289                         case 'G':
290                         case 'g':
291                                 switch (fmt_mod) {
292                                         case FMT_MOD_l:    /* l modifier is ignored */
293                                         case FMT_MOD_NONE: expected_type = type_double;      break;
294                                         case FMT_MOD_L:    expected_type = type_long_double; break;
295
296                                         default:
297                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
298                                                 goto next_arg;
299                                 }
300                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
301                                 break;
302
303                         case 'C':
304                                 if (fmt_mod != FMT_MOD_NONE) {
305                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
306                                         goto next_arg;
307                                 }
308                                 expected_type = type_wchar_t;
309                                 allowed_flags = FMT_FLAG_NONE;
310                                 break;
311
312                         case 'c':
313                                 expected_type = type_int;
314                                 switch (fmt_mod) {
315                                         case FMT_MOD_NONE: expected_type = type_int;    break; /* TODO promoted char */
316                                         case FMT_MOD_l:    expected_type = type_wint_t; break;
317
318                                         default:
319                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
320                                                 goto next_arg;
321                                 }
322                                 allowed_flags = FMT_FLAG_NONE;
323                                 break;
324
325                         case 'S':
326                                 if (fmt_mod != FMT_MOD_NONE) {
327                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
328                                         goto next_arg;
329                                 }
330                                 expected_type = type_wchar_t_ptr;
331                                 expected_qual = TYPE_QUALIFIER_CONST;
332                                 allowed_flags = FMT_FLAG_NONE;
333                                 break;
334
335                         case 's':
336                                 switch (fmt_mod) {
337                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
338                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
339
340                                         default:
341                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
342                                                 goto next_arg;
343                                 }
344                                 expected_qual = TYPE_QUALIFIER_CONST;
345                                 allowed_flags = FMT_FLAG_NONE;
346                                 break;
347
348                         case 'p':
349                                 if (fmt_mod != FMT_MOD_NONE) {
350                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
351                                         goto next_arg;
352                                 }
353                                 expected_type = type_void_ptr;
354                                 allowed_flags = FMT_FLAG_NONE;
355                                 break;
356
357                         case 'n':
358                                 switch (fmt_mod) {
359                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
360                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
361                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
362                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
363                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
364                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
365                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
366                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
367
368                                         default:
369                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
370                                                 goto next_arg;
371                                 }
372                                 allowed_flags = FMT_FLAG_NONE;
373                                 break;
374
375                         default:
376                                 warningf(pos, "encountered unknown conversion specifier '%%%C'", (wint_t)*fmt);
377                                 goto next_arg;
378                 }
379
380                 if ((fmt_flags & ~allowed_flags) != 0) {
381                         /* TODO better warning message text */
382                         warningf(pos, "invalid format flags in conversion specification");
383                 }
384
385                 if (arg == NULL) {
386                         warningf(pos, "too few arguments for format string");
387                         return;
388                 }
389
390                 {       /* create a scope here to prevent warning about the jump to next_arg */
391                         type_t *const arg_type = arg->expression->base.type;
392                         if (is_type_pointer(expected_type)) {
393                                 type_t *const arg_skip = skip_typeref(arg_type);
394                                 if (is_type_pointer(arg_skip)) {
395                                         type_t *const exp_to = skip_typeref(expected_type->pointer.points_to);
396                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
397                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
398                                                 get_unqualified_type(arg_to) == exp_to) {
399                                                 goto next_arg;
400                                         }
401                                 }
402                         } else {
403                                 if (get_unqualified_type(skip_typeref(arg_type)) == expected_type) {
404                                         goto next_arg;
405                                 }
406                         }
407                         if (is_type_valid(arg_type)) {
408                                 warningf(pos,
409                                         "argument type '%T' does not match conversion specifier '%%%s%c'",
410                                         arg_type, get_length_modifier_name(fmt_mod), (char)*fmt);
411                         }
412                 }
413 next_arg:
414                 arg = arg->next;
415         }
416         if (fmt + 1 != wstring->begin + wstring->size) {
417                 warningf(pos, "format string contains NUL");
418         }
419         if (arg != NULL) {
420                 warningf(pos, "too many arguments for format string");
421         }
422 }
423
424 void check_format(const call_expression_t *const call)
425 {
426         if (!warning.check_format)
427                 return;
428
429         const expression_t *const func_expr = call->function;
430         if (func_expr->kind != EXPR_REFERENCE)
431                 return;
432
433         const char            *const name = func_expr->reference.symbol->string;
434         const call_argument_t *      arg  = call->arguments;
435         if (strcmp(name, "wprintf") == 0) { /* TODO gammlig */
436                 check_format_arguments(arg, arg->next);
437         } else if (strcmp(name, "swprintf") == 0) {
438                 arg = arg->next->next; /* skip destination buffer and size */
439                 check_format_arguments(arg, arg->next);
440         }
441 }