New diagnostic functions diagnosticf(), errorf() and warningf() replacing the ad...
[cparser] / format_check.c
1 #include <wctype.h>
2
3 #include "ast_t.h"
4 #include "diagnostic.h"
5 #include "format_check.h"
6 #include "types.h"
7
8
9 typedef enum format_flag_t {
10         FMT_FLAG_NONE  = 0,
11         FMT_FLAG_HASH  = 1U << 0,
12         FMT_FLAG_ZERO  = 1U << 1,
13         FMT_FLAG_MINUS = 1U << 2,
14         FMT_FLAG_SPACE = 1U << 3,
15         FMT_FLAG_PLUS  = 1U << 4,
16         FMT_FLAG_TICK  = 1U << 5
17 } format_flag_t;
18
19 typedef unsigned format_flags_t;
20
21 typedef enum format_length_modifier_t {
22         FMT_MOD_NONE,
23         FMT_MOD_L,
24         FMT_MOD_hh,
25         FMT_MOD_h,
26         FMT_MOD_l,
27         FMT_MOD_ll,
28         FMT_MOD_j,
29         FMT_MOD_t,
30         FMT_MOD_z,
31         FMT_MOD_q
32 } format_length_modifier_t;
33
34 static void warn_invalid_length_modifier(const source_position_t pos,
35                                          const format_length_modifier_t mod,
36                                          const char conversion)
37 {
38         static const char* const names[] = {
39                 [FMT_MOD_NONE] = "",
40                 [FMT_MOD_L]    = "L",
41                 [FMT_MOD_hh]   = "hh",
42                 [FMT_MOD_h]    = "h",
43                 [FMT_MOD_l]    = "l",
44                 [FMT_MOD_ll]   = "ll",
45                 [FMT_MOD_j]    = "j",
46                 [FMT_MOD_t]    = "t",
47                 [FMT_MOD_z]    = "z",
48                 [FMT_MOD_q]    = "q"
49         };
50         assert(mod < sizeof(names) / sizeof(*names));
51
52         warningf(pos,
53                 "invalid length modifier '%s' for conversion specifier '%%%c'",
54                 names[mod], conversion
55         );
56 }
57
58 static void check_format_arguments(const call_argument_t *const fmt_arg, const call_argument_t* arg)
59 {
60         const expression_t *fmt_expr = fmt_arg->expression;
61         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
62                 fmt_expr = fmt_expr->unary.value;
63         }
64
65         if (fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
66                 return;
67
68         const source_position_t    pos     = fmt_expr->base.source_position;
69         const wide_string_t *const wstring = &fmt_expr->wide_string.value;
70         const wchar_rep_t *fmt = wstring->begin;
71         for (; *fmt != '\0'; ++fmt) {
72                 if (*fmt != '%')
73                         continue;
74                 ++fmt;
75
76                 if (*fmt == '%')
77                         continue;
78
79                 format_flags_t fmt_flags = FMT_FLAG_NONE;
80                 if (*fmt == '0') {
81                         ++fmt;
82                         fmt_flags |= FMT_FLAG_ZERO;
83                 }
84
85                 /* argument selector or minimum field width */
86                 if (iswdigit(*fmt)) {
87                         do {
88                                 ++fmt;
89                         } while (iswdigit(*fmt));
90
91                         /* digit string was ... */
92                         if (*fmt == '$') {
93                                 /* ... argument selector */
94                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
95                                 /* TODO implement */
96                                 return;
97                         }
98                         /* ... minimum field width */
99                 } else {
100                         /* flags */
101                         for (;;) {
102                                 format_flags_t flag;
103                                 switch (*fmt) {
104                                         case '#':  flag = FMT_FLAG_HASH;  break;
105                                         case '0':  flag = FMT_FLAG_ZERO;  break;
106                                         case '-':  flag = FMT_FLAG_MINUS; break;
107                                         case '\'': flag = FMT_FLAG_TICK;  break;
108
109                                         case ' ':
110                                                 if (fmt_flags & FMT_FLAG_PLUS) {
111                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification");
112                                                 }
113                                                 flag = FMT_FLAG_SPACE;
114                                                 break;
115
116                                         case '+':
117                                                 if (fmt_flags & FMT_FLAG_SPACE) {
118                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification");
119                                                 }
120                                                 flag = FMT_FLAG_PLUS;
121                                                 break;
122
123                                         default: goto break_fmt_flags;
124                                 }
125                                 if (fmt_flags & flag) {
126                                         warningf(pos, "repeated flag '%c' in conversion specification", (char)*fmt);
127                                 }
128                                 fmt_flags |= flag;
129                                 ++fmt;
130                         }
131 break_fmt_flags:
132
133                         /* minimum field width */
134                         if (*fmt == '*') {
135                                 if (arg == NULL) {
136                                         warningf(pos, "missing argument for '*' field width in conversion specification");
137                                         return;
138                                 }
139                                 const type_t *const arg_type = arg->expression->base.datatype;
140                                 if (arg_type != type_int) {
141                                         warningf(pos, "argument for '*' field width in conversion specification is not an 'int', but an '%T'", arg_type);
142                                 }
143                                 arg = arg->next;
144                         } else {
145                                 while (iswdigit(*fmt)) {
146                                         ++fmt;
147                                 }
148                         }
149                 }
150
151                 /* precision */
152                 if (*fmt == '.') {
153                         ++fmt;
154                         if (*fmt == '*') {
155                                 if (arg == NULL) {
156                                         warningf(pos, "missing argument for '*' precision in conversion specification");
157                                         return;
158                                 }
159                                 const type_t *const arg_type = arg->expression->base.datatype;
160                                 if (arg_type != type_int) {
161                                         warningf(pos, "argument for '*' precision in conversion specification is not an 'int', but an '%T'", arg_type);
162                                 }
163                                 arg = arg->next;
164                         } else {
165                                 /* digit string may be omitted */
166                                 while (iswdigit(*fmt)) {
167                                         ++fmt;
168                                 }
169                         }
170                 }
171
172                 /* length modifier */
173                 format_length_modifier_t fmt_mod;
174                 switch (*fmt) {
175                         case 'h':
176                                 ++fmt;
177                                 if (*fmt == 'h') {
178                                         ++fmt;
179                                         fmt_mod = FMT_MOD_hh;
180                                 } else {
181                                         fmt_mod = FMT_MOD_h;
182                                 }
183                                 break;
184
185                         case 'l':
186                                 ++fmt;
187                                 if (*fmt == 'l') {
188                                         ++fmt;
189                                         fmt_mod = FMT_MOD_ll;
190                                 } else {
191                                         fmt_mod = FMT_MOD_l;
192                                 }
193                                 break;
194
195                         case 'L': ++fmt; fmt_mod = FMT_MOD_L;    break;
196                         case 'j': ++fmt; fmt_mod = FMT_MOD_j;    break;
197                         case 't': ++fmt; fmt_mod = FMT_MOD_t;    break;
198                         case 'z': ++fmt; fmt_mod = FMT_MOD_z;    break;
199                         case 'q': ++fmt; fmt_mod = FMT_MOD_q;    break;
200                         default:         fmt_mod = FMT_MOD_NONE; break;
201                 }
202
203                 if (*fmt == '\0') {
204                         warningf(pos, "dangling %% in format string");
205                         break;
206                 }
207
208                 const type_t   *expected_type;
209                 format_flags_t  allowed_flags;
210                 switch (*fmt) {
211                         case 'd':
212                         case 'i':
213                                 switch (fmt_mod) {
214                                         case FMT_MOD_NONE: expected_type = type_int;       break;
215                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
216                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
217                                         case FMT_MOD_l:    expected_type = type_long;      break;
218                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
219                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
220                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
221                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
222
223                                         default:
224                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
225                                                 break;
226                                 }
227                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
228                                 break;
229
230                         case 'o':
231                         case 'X':
232                         case 'x':
233                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
234                                 goto eval_fmt_mod_unsigned;
235                                 break;
236
237                         case 'u':
238                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
239 eval_fmt_mod_unsigned:
240                                 switch (fmt_mod) {
241                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
242                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
243                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
244                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
245                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
246                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
247                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
248                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
249
250                                         default:
251                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
252                                                 break;
253                                 }
254                                 break;
255
256                         case 'A':
257                         case 'a':
258                         case 'E':
259                         case 'e':
260                         case 'F':
261                         case 'f':
262                         case 'G':
263                         case 'g':
264                                 switch (fmt_mod) {
265                                         case FMT_MOD_l:    /* l modifier is ignored */
266                                         case FMT_MOD_NONE: expected_type = type_double;      break;
267                                         case FMT_MOD_L:    expected_type = type_long_double; break;
268
269                                         default:
270                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
271                                                 break;
272                                 }
273                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
274                                 break;
275
276                         case 'C':
277                                 if (fmt_mod != FMT_MOD_NONE) {
278                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
279                                 }
280                                 expected_type = type_wchar_t;
281                                 allowed_flags = FMT_FLAG_NONE;
282                                 break;
283
284                         case 'c':
285                                 expected_type = type_int;
286                                 switch (fmt_mod) {
287                                         case FMT_MOD_NONE: expected_type = type_int;    break; /* TODO promoted char */
288                                         case FMT_MOD_l:    expected_type = type_wint_t; break;
289
290                                         default:
291                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
292                                                 break;
293                                 }
294                                 allowed_flags = FMT_FLAG_NONE;
295                                 break;
296
297                         case 'S':
298                                 if (fmt_mod != FMT_MOD_NONE) {
299                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
300                                 }
301                                 expected_type = type_wchar_t_ptr;
302                                 allowed_flags = FMT_FLAG_NONE;
303                                 break;
304
305                         case 's':
306                                 switch (fmt_mod) {
307                                         case FMT_MOD_NONE: expected_type = type_string;      break;
308                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
309
310                                         default:
311                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
312                                                 break;
313                                 }
314                                 allowed_flags = FMT_FLAG_NONE;
315                                 break;
316
317                         case 'p':
318                                 if (fmt_mod != FMT_MOD_NONE) {
319                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
320                                 }
321                                 expected_type = type_void_ptr;
322                                 allowed_flags = FMT_FLAG_NONE;
323                                 break;
324
325                         case 'n':
326                                 switch (fmt_mod) {
327                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
328                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
329                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
330                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
331                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
332                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
333                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
334                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
335
336                                         default:
337                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
338                                                 break;
339                                 }
340                                 allowed_flags = FMT_FLAG_NONE;
341                                 break;
342
343                         default:
344                                 warningf(pos, "encountered unknown conversion specifier '%%%C'", (wint_t)*fmt);
345                                 arg = arg->next;
346                                 continue;
347                 }
348
349                 if ((fmt_flags & ~allowed_flags) != 0) {
350                         /* TODO better warning message text */
351                         warningf(pos, "invalid format flags in conversion specification");
352                 }
353
354                 if (arg == NULL) {
355                         warningf(pos, "too few arguments for format string");
356                         return;
357                 }
358
359                 const type_t *const arg_type = arg->expression->base.datatype;
360                 if (arg_type != expected_type) {
361                         warningf(pos, "argument type '%T' does not match conversion specifier '%%%c'\n", arg_type, (char)*fmt);
362                 }
363
364                 arg = arg->next;
365         }
366         if (fmt + 1 != wstring->begin + wstring->size) {
367                 warningf(pos, "format string contains NUL");
368         }
369         if (arg != NULL) {
370                 warningf(pos, "too many arguments for format string");
371         }
372 }
373
374 void check_format(const call_expression_t *const call)
375 {
376         const expression_t *const func_expr = call->function;
377         if (func_expr->kind != EXPR_REFERENCE)
378                 return;
379
380         const char            *const name = func_expr->reference.symbol->string;
381         const call_argument_t *      arg  = call->arguments;
382         if (strcmp(name, "wprintf") == 0) { /* TODO gammlig */
383                 check_format_arguments(arg, arg->next);
384         } else if (strcmp(name, "swprintf") == 0) {
385                 arg = arg->next->next; /* skip destination buffer and size */
386                 check_format_arguments(arg, arg->next);
387         }
388 }