Implement a new agile warning framework.
[cparser] / format_check.c
1 #include <wctype.h>
2
3 #include "ast_t.h"
4 #include "diagnostic.h"
5 #include "format_check.h"
6 #include "types.h"
7 #include "type_t.h"
8 #include "warning.h"
9
10
11 typedef enum format_flag_t {
12         FMT_FLAG_NONE  = 0,
13         FMT_FLAG_HASH  = 1U << 0,
14         FMT_FLAG_ZERO  = 1U << 1,
15         FMT_FLAG_MINUS = 1U << 2,
16         FMT_FLAG_SPACE = 1U << 3,
17         FMT_FLAG_PLUS  = 1U << 4,
18         FMT_FLAG_TICK  = 1U << 5
19 } format_flag_t;
20
21 typedef unsigned format_flags_t;
22
23 typedef enum format_length_modifier_t {
24         FMT_MOD_NONE,
25         FMT_MOD_L,
26         FMT_MOD_hh,
27         FMT_MOD_h,
28         FMT_MOD_l,
29         FMT_MOD_ll,
30         FMT_MOD_j,
31         FMT_MOD_t,
32         FMT_MOD_z,
33         FMT_MOD_q
34 } format_length_modifier_t;
35
36 static const char* get_length_modifier_name(const format_length_modifier_t mod)
37 {
38         static const char* const names[] = {
39                 [FMT_MOD_NONE] = "",
40                 [FMT_MOD_L]    = "L",
41                 [FMT_MOD_hh]   = "hh",
42                 [FMT_MOD_h]    = "h",
43                 [FMT_MOD_l]    = "l",
44                 [FMT_MOD_ll]   = "ll",
45                 [FMT_MOD_j]    = "j",
46                 [FMT_MOD_t]    = "t",
47                 [FMT_MOD_z]    = "z",
48                 [FMT_MOD_q]    = "q"
49         };
50         assert(mod < sizeof(names) / sizeof(*names));
51         return names[mod];
52 }
53
54 static void warn_invalid_length_modifier(const source_position_t pos,
55                                          const format_length_modifier_t mod,
56                                          const wchar_rep_t conversion)
57 {
58         warningf(pos,
59                 "invalid length modifier '%s' for conversion specifier '%%%c'",
60                 get_length_modifier_name(mod), conversion
61         );
62 }
63
64 static void check_format_arguments(const call_argument_t *const fmt_arg, const call_argument_t* arg)
65 {
66         const expression_t *fmt_expr = fmt_arg->expression;
67         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
68                 fmt_expr = fmt_expr->unary.value;
69         }
70
71         if (fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
72                 return;
73
74         const source_position_t    pos     = fmt_expr->base.source_position;
75         const wide_string_t *const wstring = &fmt_expr->wide_string.value;
76         const wchar_rep_t *fmt = wstring->begin;
77         for (; *fmt != '\0'; ++fmt) {
78                 if (*fmt != '%')
79                         continue;
80                 ++fmt;
81
82                 if (*fmt == '%')
83                         continue;
84
85                 format_flags_t fmt_flags = FMT_FLAG_NONE;
86                 if (*fmt == '0') {
87                         ++fmt;
88                         fmt_flags |= FMT_FLAG_ZERO;
89                 }
90
91                 /* argument selector or minimum field width */
92                 if (iswdigit(*fmt)) {
93                         do {
94                                 ++fmt;
95                         } while (iswdigit(*fmt));
96
97                         /* digit string was ... */
98                         if (*fmt == '$') {
99                                 /* ... argument selector */
100                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
101                                 /* TODO implement */
102                                 return;
103                         }
104                         /* ... minimum field width */
105                 } else {
106                         /* flags */
107                         for (;;) {
108                                 format_flags_t flag;
109                                 switch (*fmt) {
110                                         case '#':  flag = FMT_FLAG_HASH;  break;
111                                         case '0':  flag = FMT_FLAG_ZERO;  break;
112                                         case '-':  flag = FMT_FLAG_MINUS; break;
113                                         case '\'': flag = FMT_FLAG_TICK;  break;
114
115                                         case ' ':
116                                                 if (fmt_flags & FMT_FLAG_PLUS) {
117                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification");
118                                                 }
119                                                 flag = FMT_FLAG_SPACE;
120                                                 break;
121
122                                         case '+':
123                                                 if (fmt_flags & FMT_FLAG_SPACE) {
124                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification");
125                                                 }
126                                                 flag = FMT_FLAG_PLUS;
127                                                 break;
128
129                                         default: goto break_fmt_flags;
130                                 }
131                                 if (fmt_flags & flag) {
132                                         warningf(pos, "repeated flag '%c' in conversion specification", (char)*fmt);
133                                 }
134                                 fmt_flags |= flag;
135                                 ++fmt;
136                         }
137 break_fmt_flags:
138
139                         /* minimum field width */
140                         if (*fmt == '*') {
141                                 if (arg == NULL) {
142                                         warningf(pos, "missing argument for '*' field width in conversion specification");
143                                         return;
144                                 }
145                                 const type_t *const arg_type = arg->expression->base.datatype;
146                                 if (arg_type != type_int) {
147                                         warningf(pos, "argument for '*' field width in conversion specification is not an 'int', but an '%T'", arg_type);
148                                 }
149                                 arg = arg->next;
150                         } else {
151                                 while (iswdigit(*fmt)) {
152                                         ++fmt;
153                                 }
154                         }
155                 }
156
157                 /* precision */
158                 if (*fmt == '.') {
159                         ++fmt;
160                         if (*fmt == '*') {
161                                 if (arg == NULL) {
162                                         warningf(pos, "missing argument for '*' precision in conversion specification");
163                                         return;
164                                 }
165                                 const type_t *const arg_type = arg->expression->base.datatype;
166                                 if (arg_type != type_int) {
167                                         warningf(pos, "argument for '*' precision in conversion specification is not an 'int', but an '%T'", arg_type);
168                                 }
169                                 arg = arg->next;
170                         } else {
171                                 /* digit string may be omitted */
172                                 while (iswdigit(*fmt)) {
173                                         ++fmt;
174                                 }
175                         }
176                 }
177
178                 /* length modifier */
179                 format_length_modifier_t fmt_mod;
180                 switch (*fmt) {
181                         case 'h':
182                                 ++fmt;
183                                 if (*fmt == 'h') {
184                                         ++fmt;
185                                         fmt_mod = FMT_MOD_hh;
186                                 } else {
187                                         fmt_mod = FMT_MOD_h;
188                                 }
189                                 break;
190
191                         case 'l':
192                                 ++fmt;
193                                 if (*fmt == 'l') {
194                                         ++fmt;
195                                         fmt_mod = FMT_MOD_ll;
196                                 } else {
197                                         fmt_mod = FMT_MOD_l;
198                                 }
199                                 break;
200
201                         case 'L': ++fmt; fmt_mod = FMT_MOD_L;    break;
202                         case 'j': ++fmt; fmt_mod = FMT_MOD_j;    break;
203                         case 't': ++fmt; fmt_mod = FMT_MOD_t;    break;
204                         case 'z': ++fmt; fmt_mod = FMT_MOD_z;    break;
205                         case 'q': ++fmt; fmt_mod = FMT_MOD_q;    break;
206                         default:         fmt_mod = FMT_MOD_NONE; break;
207                 }
208
209                 if (*fmt == '\0') {
210                         warningf(pos, "dangling %% in format string");
211                         break;
212                 }
213
214                 const type_t   *expected_type;
215                 format_flags_t  allowed_flags;
216                 switch (*fmt) {
217                         case 'd':
218                         case 'i':
219                                 switch (fmt_mod) {
220                                         case FMT_MOD_NONE: expected_type = type_int;       break;
221                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
222                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
223                                         case FMT_MOD_l:    expected_type = type_long;      break;
224                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
225                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
226                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
227                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
228
229                                         default:
230                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
231                                                 goto next_arg;
232                                 }
233                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
234                                 break;
235
236                         case 'o':
237                         case 'X':
238                         case 'x':
239                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
240                                 goto eval_fmt_mod_unsigned;
241                                 break;
242
243                         case 'u':
244                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
245 eval_fmt_mod_unsigned:
246                                 switch (fmt_mod) {
247                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
248                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
249                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
250                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
251                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
252                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
253                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
254                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
255
256                                         default:
257                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
258                                                 goto next_arg;
259                                 }
260                                 break;
261
262                         case 'A':
263                         case 'a':
264                         case 'E':
265                         case 'e':
266                         case 'F':
267                         case 'f':
268                         case 'G':
269                         case 'g':
270                                 switch (fmt_mod) {
271                                         case FMT_MOD_l:    /* l modifier is ignored */
272                                         case FMT_MOD_NONE: expected_type = type_double;      break;
273                                         case FMT_MOD_L:    expected_type = type_long_double; break;
274
275                                         default:
276                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
277                                                 goto next_arg;
278                                 }
279                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
280                                 break;
281
282                         case 'C':
283                                 if (fmt_mod != FMT_MOD_NONE) {
284                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
285                                         goto next_arg;
286                                 }
287                                 expected_type = type_wchar_t;
288                                 allowed_flags = FMT_FLAG_NONE;
289                                 break;
290
291                         case 'c':
292                                 expected_type = type_int;
293                                 switch (fmt_mod) {
294                                         case FMT_MOD_NONE: expected_type = type_int;    break; /* TODO promoted char */
295                                         case FMT_MOD_l:    expected_type = type_wint_t; break;
296
297                                         default:
298                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
299                                                 goto next_arg;
300                                 }
301                                 allowed_flags = FMT_FLAG_NONE;
302                                 break;
303
304                         case 'S':
305                                 if (fmt_mod != FMT_MOD_NONE) {
306                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
307                                         goto next_arg;
308                                 }
309                                 expected_type = type_wchar_t_ptr;
310                                 allowed_flags = FMT_FLAG_NONE;
311                                 break;
312
313                         case 's':
314                                 switch (fmt_mod) {
315                                         case FMT_MOD_NONE: expected_type = type_string;      break;
316                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
317
318                                         default:
319                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
320                                                 goto next_arg;
321                                 }
322                                 allowed_flags = FMT_FLAG_NONE;
323                                 break;
324
325                         case 'p':
326                                 if (fmt_mod != FMT_MOD_NONE) {
327                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
328                                         goto next_arg;
329                                 }
330                                 expected_type = type_void_ptr;
331                                 allowed_flags = FMT_FLAG_NONE;
332                                 break;
333
334                         case 'n':
335                                 switch (fmt_mod) {
336                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
337                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
338                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
339                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
340                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
341                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
342                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
343                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
344
345                                         default:
346                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
347                                                 goto next_arg;
348                                 }
349                                 allowed_flags = FMT_FLAG_NONE;
350                                 break;
351
352                         default:
353                                 warningf(pos, "encountered unknown conversion specifier '%%%C'", (wint_t)*fmt);
354                                 goto next_arg;
355                 }
356
357                 if ((fmt_flags & ~allowed_flags) != 0) {
358                         /* TODO better warning message text */
359                         warningf(pos, "invalid format flags in conversion specification");
360                 }
361
362                 if (arg == NULL) {
363                         warningf(pos, "too few arguments for format string");
364                         return;
365                 }
366
367                 type_t *const arg_type = arg->expression->base.datatype;
368                 if (is_type_pointer(expected_type)) {
369                         type_t *const arg_skip = skip_typeref(arg_type);
370                         if (is_type_pointer(arg_skip)) {
371                                 type_t *const exp_to = skip_typeref(expected_type->pointer.points_to);
372                                 type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
373                                 if (arg_to == exp_to) {
374                                         goto next_arg;
375                                 }
376                         }
377                 } else {
378                         if (get_unqualified_type(skip_typeref(arg_type)) == expected_type) {
379                                 goto next_arg;
380                         }
381                 }
382                 if (is_type_valid(arg_type)) {
383                         warningf(pos,
384                                 "argument type '%T' does not match conversion specifier '%%%s%c'\n",
385                                 arg_type, get_length_modifier_name(fmt_mod), (char)*fmt);
386                 }
387
388 next_arg:
389                 arg = arg->next;
390         }
391         if (fmt + 1 != wstring->begin + wstring->size) {
392                 warningf(pos, "format string contains NUL");
393         }
394         if (arg != NULL) {
395                 warningf(pos, "too many arguments for format string");
396         }
397 }
398
399 void check_format(const call_expression_t *const call)
400 {
401         if (!warning.check_format)
402                 return;
403
404         const expression_t *const func_expr = call->function;
405         if (func_expr->kind != EXPR_REFERENCE)
406                 return;
407
408         const char            *const name = func_expr->reference.symbol->string;
409         const call_argument_t *      arg  = call->arguments;
410         if (strcmp(name, "wprintf") == 0) { /* TODO gammlig */
411                 check_format_arguments(arg, arg->next);
412         } else if (strcmp(name, "swprintf") == 0) {
413                 arg = arg->next->next; /* skip destination buffer and size */
414                 check_format_arguments(arg, arg->next);
415         }
416 }