add license comments
[cparser] / format_check.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2008 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <wctype.h>
21
22 #include "ast_t.h"
23 #include "diagnostic.h"
24 #include "format_check.h"
25 #include "types.h"
26 #include "type_t.h"
27 #include "warning.h"
28
29
30 typedef enum format_flag_t {
31         FMT_FLAG_NONE  = 0,
32         FMT_FLAG_HASH  = 1U << 0,
33         FMT_FLAG_ZERO  = 1U << 1,
34         FMT_FLAG_MINUS = 1U << 2,
35         FMT_FLAG_SPACE = 1U << 3,
36         FMT_FLAG_PLUS  = 1U << 4,
37         FMT_FLAG_TICK  = 1U << 5
38 } format_flag_t;
39
40 typedef unsigned format_flags_t;
41
42 typedef enum format_length_modifier_t {
43         FMT_MOD_NONE,
44         FMT_MOD_L,
45         FMT_MOD_hh,
46         FMT_MOD_h,
47         FMT_MOD_l,
48         FMT_MOD_ll,
49         FMT_MOD_j,
50         FMT_MOD_t,
51         FMT_MOD_z,
52         FMT_MOD_q
53 } format_length_modifier_t;
54
55 static const char* get_length_modifier_name(const format_length_modifier_t mod)
56 {
57         static const char* const names[] = {
58                 [FMT_MOD_NONE] = "",
59                 [FMT_MOD_L]    = "L",
60                 [FMT_MOD_hh]   = "hh",
61                 [FMT_MOD_h]    = "h",
62                 [FMT_MOD_l]    = "l",
63                 [FMT_MOD_ll]   = "ll",
64                 [FMT_MOD_j]    = "j",
65                 [FMT_MOD_t]    = "t",
66                 [FMT_MOD_z]    = "z",
67                 [FMT_MOD_q]    = "q"
68         };
69         assert(mod < sizeof(names) / sizeof(*names));
70         return names[mod];
71 }
72
73 static void warn_invalid_length_modifier(const source_position_t pos,
74                                          const format_length_modifier_t mod,
75                                          const wchar_rep_t conversion)
76 {
77         warningf(pos,
78                 "invalid length modifier '%s' for conversion specifier '%%%c'",
79                 get_length_modifier_name(mod), conversion
80         );
81 }
82
83 static void check_format_arguments(const call_argument_t *const fmt_arg, const call_argument_t* arg)
84 {
85         const expression_t *fmt_expr = fmt_arg->expression;
86         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
87                 fmt_expr = fmt_expr->unary.value;
88         }
89
90         if (fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
91                 return;
92
93         const source_position_t    pos     = fmt_expr->base.source_position;
94         const wide_string_t *const wstring = &fmt_expr->wide_string.value;
95         const wchar_rep_t *fmt = wstring->begin;
96         for (; *fmt != '\0'; ++fmt) {
97                 if (*fmt != '%')
98                         continue;
99                 ++fmt;
100
101                 if (*fmt == '%')
102                         continue;
103
104                 format_flags_t fmt_flags = FMT_FLAG_NONE;
105                 if (*fmt == '0') {
106                         ++fmt;
107                         fmt_flags |= FMT_FLAG_ZERO;
108                 }
109
110                 /* argument selector or minimum field width */
111                 if (iswdigit(*fmt)) {
112                         do {
113                                 ++fmt;
114                         } while (iswdigit(*fmt));
115
116                         /* digit string was ... */
117                         if (*fmt == '$') {
118                                 /* ... argument selector */
119                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
120                                 /* TODO implement */
121                                 return;
122                         }
123                         /* ... minimum field width */
124                 } else {
125                         /* flags */
126                         for (;;) {
127                                 format_flags_t flag;
128                                 switch (*fmt) {
129                                         case '#':  flag = FMT_FLAG_HASH;  break;
130                                         case '0':  flag = FMT_FLAG_ZERO;  break;
131                                         case '-':  flag = FMT_FLAG_MINUS; break;
132                                         case '\'': flag = FMT_FLAG_TICK;  break;
133
134                                         case ' ':
135                                                 if (fmt_flags & FMT_FLAG_PLUS) {
136                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification");
137                                                 }
138                                                 flag = FMT_FLAG_SPACE;
139                                                 break;
140
141                                         case '+':
142                                                 if (fmt_flags & FMT_FLAG_SPACE) {
143                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification");
144                                                 }
145                                                 flag = FMT_FLAG_PLUS;
146                                                 break;
147
148                                         default: goto break_fmt_flags;
149                                 }
150                                 if (fmt_flags & flag) {
151                                         warningf(pos, "repeated flag '%c' in conversion specification", (char)*fmt);
152                                 }
153                                 fmt_flags |= flag;
154                                 ++fmt;
155                         }
156 break_fmt_flags:
157
158                         /* minimum field width */
159                         if (*fmt == '*') {
160                                 if (arg == NULL) {
161                                         warningf(pos, "missing argument for '*' field width in conversion specification");
162                                         return;
163                                 }
164                                 const type_t *const arg_type = arg->expression->base.type;
165                                 if (arg_type != type_int) {
166                                         warningf(pos, "argument for '*' field width in conversion specification is not an 'int', but an '%T'", arg_type);
167                                 }
168                                 arg = arg->next;
169                         } else {
170                                 while (iswdigit(*fmt)) {
171                                         ++fmt;
172                                 }
173                         }
174                 }
175
176                 /* precision */
177                 if (*fmt == '.') {
178                         ++fmt;
179                         if (*fmt == '*') {
180                                 if (arg == NULL) {
181                                         warningf(pos, "missing argument for '*' precision in conversion specification");
182                                         return;
183                                 }
184                                 const type_t *const arg_type = arg->expression->base.type;
185                                 if (arg_type != type_int) {
186                                         warningf(pos, "argument for '*' precision in conversion specification is not an 'int', but an '%T'", arg_type);
187                                 }
188                                 arg = arg->next;
189                         } else {
190                                 /* digit string may be omitted */
191                                 while (iswdigit(*fmt)) {
192                                         ++fmt;
193                                 }
194                         }
195                 }
196
197                 /* length modifier */
198                 format_length_modifier_t fmt_mod;
199                 switch (*fmt) {
200                         case 'h':
201                                 ++fmt;
202                                 if (*fmt == 'h') {
203                                         ++fmt;
204                                         fmt_mod = FMT_MOD_hh;
205                                 } else {
206                                         fmt_mod = FMT_MOD_h;
207                                 }
208                                 break;
209
210                         case 'l':
211                                 ++fmt;
212                                 if (*fmt == 'l') {
213                                         ++fmt;
214                                         fmt_mod = FMT_MOD_ll;
215                                 } else {
216                                         fmt_mod = FMT_MOD_l;
217                                 }
218                                 break;
219
220                         case 'L': ++fmt; fmt_mod = FMT_MOD_L;    break;
221                         case 'j': ++fmt; fmt_mod = FMT_MOD_j;    break;
222                         case 't': ++fmt; fmt_mod = FMT_MOD_t;    break;
223                         case 'z': ++fmt; fmt_mod = FMT_MOD_z;    break;
224                         case 'q': ++fmt; fmt_mod = FMT_MOD_q;    break;
225                         default:         fmt_mod = FMT_MOD_NONE; break;
226                 }
227
228                 if (*fmt == '\0') {
229                         warningf(pos, "dangling %% in format string");
230                         break;
231                 }
232
233                 const type_t      *expected_type;
234                 type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
235                 format_flags_t     allowed_flags;
236                 switch (*fmt) {
237                         case 'd':
238                         case 'i':
239                                 switch (fmt_mod) {
240                                         case FMT_MOD_NONE: expected_type = type_int;       break;
241                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
242                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
243                                         case FMT_MOD_l:    expected_type = type_long;      break;
244                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
245                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
246                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
247                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
248
249                                         default:
250                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
251                                                 goto next_arg;
252                                 }
253                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
254                                 break;
255
256                         case 'o':
257                         case 'X':
258                         case 'x':
259                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
260                                 goto eval_fmt_mod_unsigned;
261                                 break;
262
263                         case 'u':
264                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
265 eval_fmt_mod_unsigned:
266                                 switch (fmt_mod) {
267                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
268                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
269                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
270                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
271                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
272                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
273                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
274                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
275
276                                         default:
277                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
278                                                 goto next_arg;
279                                 }
280                                 break;
281
282                         case 'A':
283                         case 'a':
284                         case 'E':
285                         case 'e':
286                         case 'F':
287                         case 'f':
288                         case 'G':
289                         case 'g':
290                                 switch (fmt_mod) {
291                                         case FMT_MOD_l:    /* l modifier is ignored */
292                                         case FMT_MOD_NONE: expected_type = type_double;      break;
293                                         case FMT_MOD_L:    expected_type = type_long_double; break;
294
295                                         default:
296                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
297                                                 goto next_arg;
298                                 }
299                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
300                                 break;
301
302                         case 'C':
303                                 if (fmt_mod != FMT_MOD_NONE) {
304                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
305                                         goto next_arg;
306                                 }
307                                 expected_type = type_wchar_t;
308                                 allowed_flags = FMT_FLAG_NONE;
309                                 break;
310
311                         case 'c':
312                                 expected_type = type_int;
313                                 switch (fmt_mod) {
314                                         case FMT_MOD_NONE: expected_type = type_int;    break; /* TODO promoted char */
315                                         case FMT_MOD_l:    expected_type = type_wint_t; break;
316
317                                         default:
318                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
319                                                 goto next_arg;
320                                 }
321                                 allowed_flags = FMT_FLAG_NONE;
322                                 break;
323
324                         case 'S':
325                                 if (fmt_mod != FMT_MOD_NONE) {
326                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
327                                         goto next_arg;
328                                 }
329                                 expected_type = type_wchar_t_ptr;
330                                 expected_qual = TYPE_QUALIFIER_CONST;
331                                 allowed_flags = FMT_FLAG_NONE;
332                                 break;
333
334                         case 's':
335                                 switch (fmt_mod) {
336                                         case FMT_MOD_NONE: expected_type = type_char_ptr;    break;
337                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
338
339                                         default:
340                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
341                                                 goto next_arg;
342                                 }
343                                 expected_qual = TYPE_QUALIFIER_CONST;
344                                 allowed_flags = FMT_FLAG_NONE;
345                                 break;
346
347                         case 'p':
348                                 if (fmt_mod != FMT_MOD_NONE) {
349                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
350                                         goto next_arg;
351                                 }
352                                 expected_type = type_void_ptr;
353                                 allowed_flags = FMT_FLAG_NONE;
354                                 break;
355
356                         case 'n':
357                                 switch (fmt_mod) {
358                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
359                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
360                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
361                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
362                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
363                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
364                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
365                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
366
367                                         default:
368                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
369                                                 goto next_arg;
370                                 }
371                                 allowed_flags = FMT_FLAG_NONE;
372                                 break;
373
374                         default:
375                                 warningf(pos, "encountered unknown conversion specifier '%%%C'", (wint_t)*fmt);
376                                 goto next_arg;
377                 }
378
379                 if ((fmt_flags & ~allowed_flags) != 0) {
380                         /* TODO better warning message text */
381                         warningf(pos, "invalid format flags in conversion specification");
382                 }
383
384                 if (arg == NULL) {
385                         warningf(pos, "too few arguments for format string");
386                         return;
387                 }
388
389                 {       /* create a scope here to prevent warning about the jump to next_arg */
390                         type_t *const arg_type = arg->expression->base.type;
391                         if (is_type_pointer(expected_type)) {
392                                 type_t *const arg_skip = skip_typeref(arg_type);
393                                 if (is_type_pointer(arg_skip)) {
394                                         type_t *const exp_to = skip_typeref(expected_type->pointer.points_to);
395                                         type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
396                                         if ((arg_to->base.qualifiers & ~expected_qual) == 0 &&
397                                                 get_unqualified_type(arg_to) == exp_to) {
398                                                 goto next_arg;
399                                         }
400                                 }
401                         } else {
402                                 if (get_unqualified_type(skip_typeref(arg_type)) == expected_type) {
403                                         goto next_arg;
404                                 }
405                         }
406                         if (is_type_valid(arg_type)) {
407                                 warningf(pos,
408                                         "argument type '%T' does not match conversion specifier '%%%s%c'",
409                                         arg_type, get_length_modifier_name(fmt_mod), (char)*fmt);
410                         }
411                 }
412 next_arg:
413                 arg = arg->next;
414         }
415         if (fmt + 1 != wstring->begin + wstring->size) {
416                 warningf(pos, "format string contains NUL");
417         }
418         if (arg != NULL) {
419                 warningf(pos, "too many arguments for format string");
420         }
421 }
422
423 void check_format(const call_expression_t *const call)
424 {
425         if (!warning.check_format)
426                 return;
427
428         const expression_t *const func_expr = call->function;
429         if (func_expr->kind != EXPR_REFERENCE)
430                 return;
431
432         const char            *const name = func_expr->reference.symbol->string;
433         const call_argument_t *      arg  = call->arguments;
434         if (strcmp(name, "wprintf") == 0) { /* TODO gammlig */
435                 check_format_arguments(arg, arg->next);
436         } else if (strcmp(name, "swprintf") == 0) {
437                 arg = arg->next->next; /* skip destination buffer and size */
438                 check_format_arguments(arg, arg->next);
439         }
440 }