The ' '-flag (space) is allowed for signed conversions.
[cparser] / format_check.c
1 #include <wctype.h>
2
3 #include "ast_t.h"
4 #include "diagnostic.h"
5 #include "format_check.h"
6 #include "types.h"
7 #include "type_t.h"
8
9
10 typedef enum format_flag_t {
11         FMT_FLAG_NONE  = 0,
12         FMT_FLAG_HASH  = 1U << 0,
13         FMT_FLAG_ZERO  = 1U << 1,
14         FMT_FLAG_MINUS = 1U << 2,
15         FMT_FLAG_SPACE = 1U << 3,
16         FMT_FLAG_PLUS  = 1U << 4,
17         FMT_FLAG_TICK  = 1U << 5
18 } format_flag_t;
19
20 typedef unsigned format_flags_t;
21
22 typedef enum format_length_modifier_t {
23         FMT_MOD_NONE,
24         FMT_MOD_L,
25         FMT_MOD_hh,
26         FMT_MOD_h,
27         FMT_MOD_l,
28         FMT_MOD_ll,
29         FMT_MOD_j,
30         FMT_MOD_t,
31         FMT_MOD_z,
32         FMT_MOD_q
33 } format_length_modifier_t;
34
35 static const char* get_length_modifier_name(const format_length_modifier_t mod)
36 {
37         static const char* const names[] = {
38                 [FMT_MOD_NONE] = "",
39                 [FMT_MOD_L]    = "L",
40                 [FMT_MOD_hh]   = "hh",
41                 [FMT_MOD_h]    = "h",
42                 [FMT_MOD_l]    = "l",
43                 [FMT_MOD_ll]   = "ll",
44                 [FMT_MOD_j]    = "j",
45                 [FMT_MOD_t]    = "t",
46                 [FMT_MOD_z]    = "z",
47                 [FMT_MOD_q]    = "q"
48         };
49         assert(mod < sizeof(names) / sizeof(*names));
50         return names[mod];
51 }
52
53 static void warn_invalid_length_modifier(const source_position_t pos,
54                                          const format_length_modifier_t mod,
55                                          const wchar_rep_t conversion)
56 {
57         warningf(pos,
58                 "invalid length modifier '%s' for conversion specifier '%%%c'",
59                 get_length_modifier_name(mod), conversion
60         );
61 }
62
63 static void check_format_arguments(const call_argument_t *const fmt_arg, const call_argument_t* arg)
64 {
65         const expression_t *fmt_expr = fmt_arg->expression;
66         if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
67                 fmt_expr = fmt_expr->unary.value;
68         }
69
70         if (fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
71                 return;
72
73         const source_position_t    pos     = fmt_expr->base.source_position;
74         const wide_string_t *const wstring = &fmt_expr->wide_string.value;
75         const wchar_rep_t *fmt = wstring->begin;
76         for (; *fmt != '\0'; ++fmt) {
77                 if (*fmt != '%')
78                         continue;
79                 ++fmt;
80
81                 if (*fmt == '%')
82                         continue;
83
84                 format_flags_t fmt_flags = FMT_FLAG_NONE;
85                 if (*fmt == '0') {
86                         ++fmt;
87                         fmt_flags |= FMT_FLAG_ZERO;
88                 }
89
90                 /* argument selector or minimum field width */
91                 if (iswdigit(*fmt)) {
92                         do {
93                                 ++fmt;
94                         } while (iswdigit(*fmt));
95
96                         /* digit string was ... */
97                         if (*fmt == '$') {
98                                 /* ... argument selector */
99                                 fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
100                                 /* TODO implement */
101                                 return;
102                         }
103                         /* ... minimum field width */
104                 } else {
105                         /* flags */
106                         for (;;) {
107                                 format_flags_t flag;
108                                 switch (*fmt) {
109                                         case '#':  flag = FMT_FLAG_HASH;  break;
110                                         case '0':  flag = FMT_FLAG_ZERO;  break;
111                                         case '-':  flag = FMT_FLAG_MINUS; break;
112                                         case '\'': flag = FMT_FLAG_TICK;  break;
113
114                                         case ' ':
115                                                 if (fmt_flags & FMT_FLAG_PLUS) {
116                                                         warningf(pos, "' ' is overridden by prior '+' in conversion specification");
117                                                 }
118                                                 flag = FMT_FLAG_SPACE;
119                                                 break;
120
121                                         case '+':
122                                                 if (fmt_flags & FMT_FLAG_SPACE) {
123                                                         warningf(pos, "'+' overrides prior ' ' in conversion specification");
124                                                 }
125                                                 flag = FMT_FLAG_PLUS;
126                                                 break;
127
128                                         default: goto break_fmt_flags;
129                                 }
130                                 if (fmt_flags & flag) {
131                                         warningf(pos, "repeated flag '%c' in conversion specification", (char)*fmt);
132                                 }
133                                 fmt_flags |= flag;
134                                 ++fmt;
135                         }
136 break_fmt_flags:
137
138                         /* minimum field width */
139                         if (*fmt == '*') {
140                                 if (arg == NULL) {
141                                         warningf(pos, "missing argument for '*' field width in conversion specification");
142                                         return;
143                                 }
144                                 const type_t *const arg_type = arg->expression->base.datatype;
145                                 if (arg_type != type_int) {
146                                         warningf(pos, "argument for '*' field width in conversion specification is not an 'int', but an '%T'", arg_type);
147                                 }
148                                 arg = arg->next;
149                         } else {
150                                 while (iswdigit(*fmt)) {
151                                         ++fmt;
152                                 }
153                         }
154                 }
155
156                 /* precision */
157                 if (*fmt == '.') {
158                         ++fmt;
159                         if (*fmt == '*') {
160                                 if (arg == NULL) {
161                                         warningf(pos, "missing argument for '*' precision in conversion specification");
162                                         return;
163                                 }
164                                 const type_t *const arg_type = arg->expression->base.datatype;
165                                 if (arg_type != type_int) {
166                                         warningf(pos, "argument for '*' precision in conversion specification is not an 'int', but an '%T'", arg_type);
167                                 }
168                                 arg = arg->next;
169                         } else {
170                                 /* digit string may be omitted */
171                                 while (iswdigit(*fmt)) {
172                                         ++fmt;
173                                 }
174                         }
175                 }
176
177                 /* length modifier */
178                 format_length_modifier_t fmt_mod;
179                 switch (*fmt) {
180                         case 'h':
181                                 ++fmt;
182                                 if (*fmt == 'h') {
183                                         ++fmt;
184                                         fmt_mod = FMT_MOD_hh;
185                                 } else {
186                                         fmt_mod = FMT_MOD_h;
187                                 }
188                                 break;
189
190                         case 'l':
191                                 ++fmt;
192                                 if (*fmt == 'l') {
193                                         ++fmt;
194                                         fmt_mod = FMT_MOD_ll;
195                                 } else {
196                                         fmt_mod = FMT_MOD_l;
197                                 }
198                                 break;
199
200                         case 'L': ++fmt; fmt_mod = FMT_MOD_L;    break;
201                         case 'j': ++fmt; fmt_mod = FMT_MOD_j;    break;
202                         case 't': ++fmt; fmt_mod = FMT_MOD_t;    break;
203                         case 'z': ++fmt; fmt_mod = FMT_MOD_z;    break;
204                         case 'q': ++fmt; fmt_mod = FMT_MOD_q;    break;
205                         default:         fmt_mod = FMT_MOD_NONE; break;
206                 }
207
208                 if (*fmt == '\0') {
209                         warningf(pos, "dangling %% in format string");
210                         break;
211                 }
212
213                 const type_t   *expected_type = NULL;
214                 format_flags_t  allowed_flags;
215                 switch (*fmt) {
216                         case 'd':
217                         case 'i':
218                                 switch (fmt_mod) {
219                                         case FMT_MOD_NONE: expected_type = type_int;       break;
220                                         case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
221                                         case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
222                                         case FMT_MOD_l:    expected_type = type_long;      break;
223                                         case FMT_MOD_ll:   expected_type = type_long_long; break;
224                                         case FMT_MOD_j:    expected_type = type_intmax_t;  break;
225                                         case FMT_MOD_z:    expected_type = type_ssize_t;   break;
226                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
227
228                                         default:
229                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
230                                                 break;
231                                 }
232                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
233                                 break;
234
235                         case 'o':
236                         case 'X':
237                         case 'x':
238                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
239                                 goto eval_fmt_mod_unsigned;
240                                 break;
241
242                         case 'u':
243                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
244 eval_fmt_mod_unsigned:
245                                 switch (fmt_mod) {
246                                         case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
247                                         case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
248                                         case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
249                                         case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
250                                         case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
251                                         case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
252                                         case FMT_MOD_z:    expected_type = type_size_t;             break;
253                                         case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
254
255                                         default:
256                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
257                                                 break;
258                                 }
259                                 break;
260
261                         case 'A':
262                         case 'a':
263                         case 'E':
264                         case 'e':
265                         case 'F':
266                         case 'f':
267                         case 'G':
268                         case 'g':
269                                 switch (fmt_mod) {
270                                         case FMT_MOD_l:    /* l modifier is ignored */
271                                         case FMT_MOD_NONE: expected_type = type_double;      break;
272                                         case FMT_MOD_L:    expected_type = type_long_double; break;
273
274                                         default:
275                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
276                                                 break;
277                                 }
278                                 allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_SPACE | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
279                                 break;
280
281                         case 'C':
282                                 if (fmt_mod != FMT_MOD_NONE) {
283                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
284                                 }
285                                 expected_type = type_wchar_t;
286                                 allowed_flags = FMT_FLAG_NONE;
287                                 break;
288
289                         case 'c':
290                                 expected_type = type_int;
291                                 switch (fmt_mod) {
292                                         case FMT_MOD_NONE: expected_type = type_int;    break; /* TODO promoted char */
293                                         case FMT_MOD_l:    expected_type = type_wint_t; break;
294
295                                         default:
296                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
297                                                 break;
298                                 }
299                                 allowed_flags = FMT_FLAG_NONE;
300                                 break;
301
302                         case 'S':
303                                 if (fmt_mod != FMT_MOD_NONE) {
304                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
305                                 }
306                                 expected_type = type_wchar_t_ptr;
307                                 allowed_flags = FMT_FLAG_NONE;
308                                 break;
309
310                         case 's':
311                                 switch (fmt_mod) {
312                                         case FMT_MOD_NONE: expected_type = type_string;      break;
313                                         case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
314
315                                         default:
316                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
317                                                 break;
318                                 }
319                                 allowed_flags = FMT_FLAG_NONE;
320                                 break;
321
322                         case 'p':
323                                 if (fmt_mod != FMT_MOD_NONE) {
324                                         warn_invalid_length_modifier(pos, fmt_mod, *fmt);
325                                 }
326                                 expected_type = type_void_ptr;
327                                 allowed_flags = FMT_FLAG_NONE;
328                                 break;
329
330                         case 'n':
331                                 switch (fmt_mod) {
332                                         case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
333                                         case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
334                                         case FMT_MOD_h:    expected_type = type_short_ptr;       break;
335                                         case FMT_MOD_l:    expected_type = type_long_ptr;        break;
336                                         case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
337                                         case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
338                                         case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
339                                         case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
340
341                                         default:
342                                                 warn_invalid_length_modifier(pos, fmt_mod, *fmt);
343                                                 break;
344                                 }
345                                 allowed_flags = FMT_FLAG_NONE;
346                                 break;
347
348                         default:
349                                 warningf(pos, "encountered unknown conversion specifier '%%%C'", (wint_t)*fmt);
350                                 arg = arg->next;
351                                 continue;
352                 }
353
354                 if ((fmt_flags & ~allowed_flags) != 0) {
355                         /* TODO better warning message text */
356                         warningf(pos, "invalid format flags in conversion specification");
357                 }
358
359                 if (arg == NULL) {
360                         warningf(pos, "too few arguments for format string");
361                         return;
362                 }
363
364                 type_t *const arg_type = arg->expression->base.datatype;
365                 if (is_type_pointer(expected_type)) {
366                         type_t *const arg_skip = skip_typeref(arg_type);
367                         if (is_type_pointer(arg_skip)) {
368                                 type_t *const exp_to = skip_typeref(expected_type->pointer.points_to);
369                                 type_t *const arg_to = skip_typeref(arg_skip->pointer.points_to);
370                                 if (arg_to == exp_to) {
371                                         goto arg_type_ok;
372                                 }
373                         }
374                 } else {
375                         if (get_unqualified_type(skip_typeref(arg_type)) == expected_type) {
376                                 goto arg_type_ok;
377                         }
378                 }
379                 warningf(pos,
380                         "argument type '%T' does not match conversion specifier '%%%s%c'\n",
381                         arg_type, get_length_modifier_name(fmt_mod), (char)*fmt);
382 arg_type_ok:
383
384                 arg = arg->next;
385         }
386         if (fmt + 1 != wstring->begin + wstring->size) {
387                 warningf(pos, "format string contains NUL");
388         }
389         if (arg != NULL) {
390                 warningf(pos, "too many arguments for format string");
391         }
392 }
393
394 void check_format(const call_expression_t *const call)
395 {
396         const expression_t *const func_expr = call->function;
397         if (func_expr->kind != EXPR_REFERENCE)
398                 return;
399
400         const char            *const name = func_expr->reference.symbol->string;
401         const call_argument_t *      arg  = call->arguments;
402         if (strcmp(name, "wprintf") == 0) { /* TODO gammlig */
403                 check_format_arguments(arg, arg->next);
404         } else if (strcmp(name, "swprintf") == 0) {
405                 arg = arg->next->next; /* skip destination buffer and size */
406                 check_format_arguments(arg, arg->next);
407         }
408 }