Add a (partial) format string checker for wide string functions (currently wprintf...
authorChristoph Mallon <christoph.mallon@gmx.de>
Wed, 5 Dec 2007 20:32:38 +0000 (20:32 +0000)
committerChristoph Mallon <christoph.mallon@gmx.de>
Wed, 5 Dec 2007 20:32:38 +0000 (20:32 +0000)
[r18620]

Makefile
format_check.c [new file with mode: 0644]
format_check.h [new file with mode: 0644]
parser.c

index bcc4b13..995f2c0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -27,6 +27,7 @@ SOURCES := \
        ast.c \
        ast2firm.c \
        diagnostic.c \
+       format_check.c \
        lexer.c \
        main.c \
        parser.c \
diff --git a/format_check.c b/format_check.c
new file mode 100644 (file)
index 0000000..ba3c175
--- /dev/null
@@ -0,0 +1,393 @@
+#include <wctype.h>
+
+#include "ast_t.h"
+#include "diagnostic.h"
+#include "format_check.h"
+#include "types.h"
+
+
+typedef enum format_flags_t {
+       FMT_FLAG_NONE  = 0,
+       FMT_FLAG_HASH  = 1U << 0,
+       FMT_FLAG_ZERO  = 1U << 1,
+       FMT_FLAG_MINUS = 1U << 2,
+       FMT_FLAG_SPACE = 1U << 3,
+       FMT_FLAG_PLUS  = 1U << 4,
+       FMT_FLAG_TICK  = 1U << 5
+} format_flags_t;
+
+typedef enum format_length_modifier_t {
+       FMT_MOD_NONE,
+       FMT_MOD_L,
+       FMT_MOD_hh,
+       FMT_MOD_h,
+       FMT_MOD_l,
+       FMT_MOD_ll,
+       FMT_MOD_j,
+       FMT_MOD_t,
+       FMT_MOD_z,
+       FMT_MOD_q
+} format_length_modifier_t;
+
+static void warn_invalid_length_modifier(const source_position_t pos,
+                                         const format_length_modifier_t mod,
+                                         const char conversion)
+{
+       static const char* const names[] = {
+               [FMT_MOD_NONE] = "",
+               [FMT_MOD_L]    = "L",
+               [FMT_MOD_hh]   = "hh",
+               [FMT_MOD_h]    = "h",
+               [FMT_MOD_l]    = "l",
+               [FMT_MOD_ll]   = "ll",
+               [FMT_MOD_j]    = "j",
+               [FMT_MOD_t]    = "t",
+               [FMT_MOD_z]    = "z",
+               [FMT_MOD_q]    = "q"
+       };
+       assert(mod < sizeof(names) / sizeof(*names));
+
+       parse_warning_posf(pos,
+               "invalid length modifier '%s' for conversion specifier '%%%c'",
+               names[mod], conversion
+       );
+}
+
+static void check_format_arguments(const call_argument_t *const fmt_arg, const call_argument_t* arg)
+{
+       const expression_t *fmt_expr = fmt_arg->expression;
+       if (fmt_expr->type == EXPR_UNARY_CAST_IMPLICIT) {
+               fmt_expr = fmt_expr->unary.value;
+       }
+
+       if (fmt_expr->type != EXPR_WIDE_STRING_LITERAL)
+               return;
+
+       const source_position_t    pos     = fmt_expr->base.source_position;
+       const wide_string_t *const wstring = &fmt_expr->wide_string.value;
+       const wchar_rep_t *fmt = wstring->begin;
+       for (; *fmt != '\0'; ++fmt) {
+               if (*fmt != '%')
+                       continue;
+               ++fmt;
+
+               if (*fmt == '%')
+                       continue;
+
+               format_flags_t fmt_flags = FMT_FLAG_NONE;
+               if (*fmt == '0') {
+                       ++fmt;
+                       fmt_flags |= FMT_FLAG_ZERO;
+               }
+
+               /* argument selector or minimum field width */
+               if (iswdigit(*fmt)) {
+                       do {
+                               ++fmt;
+                       } while (iswdigit(*fmt));
+
+                       /* digit string was ... */
+                       if (*fmt == '$') {
+                               /* ... argument selector */
+                               fmt_flags = FMT_FLAG_NONE; /* reset possibly set 0-flag */
+                               /* TODO implement */
+                               return;
+                       }
+                       /* ... minimum field width */
+               } else {
+                       /* flags */
+                       for (;;) {
+                               format_flags_t flag;
+                               switch (*fmt) {
+                                       case '#':  flag = FMT_FLAG_HASH;  break;
+                                       case '0':  flag = FMT_FLAG_ZERO;  break;
+                                       case '-':  flag = FMT_FLAG_MINUS; break;
+                                       case '\'': flag = FMT_FLAG_TICK;  break;
+
+                                       case ' ':
+                                               if (fmt_flags & FMT_FLAG_PLUS) {
+                                                       parse_warning_pos(pos, "' ' is overridden by prior '+' in conversion specification");
+                                               }
+                                               flag = FMT_FLAG_SPACE;
+                                               break;
+
+                                       case '+':
+                                               if (fmt_flags & FMT_FLAG_SPACE) {
+                                                       parse_warning_pos(pos, "'+' overrides prior ' ' in conversion specification");
+                                               }
+                                               flag = FMT_FLAG_PLUS;
+                                               break;
+
+                                       default: goto break_fmt_flags;
+                               }
+                               if (fmt_flags & flag) {
+                                       parse_warning_posf(pos, "repeated flag '%c' in conversion specification", (char)*fmt);
+                               }
+                               fmt_flags |= flag;
+                               ++fmt;
+                       }
+break_fmt_flags:
+
+                       /* minimum field width */
+                       if (*fmt == '*') {
+                               if (arg == NULL) {
+                                       parse_warning_pos(pos, "missing argument for '*' field width in conversion specification");
+                                       return;
+                               }
+                               const type_t *const arg_type = arg->expression->base.datatype;
+                               if (arg_type != type_int) {
+                                       parse_warning_pos(pos, "argument for '*' field width in conversion specification is not an 'int', but an '");
+                                       print_type(arg_type);
+                                       fputc('\'', stderr);
+                               }
+                               arg = arg->next;
+                       } else {
+                               while (iswdigit(*fmt)) {
+                                       ++fmt;
+                               }
+                       }
+               }
+
+               /* precision */
+               if (*fmt == '.') {
+                       ++fmt;
+                       if (*fmt == '*') {
+                               if (arg == NULL) {
+                                       parse_warning_pos(pos, "missing argument for '*' precision in conversion specification");
+                                       return;
+                               }
+                               const type_t *const arg_type = arg->expression->base.datatype;
+                               if (arg_type != type_int) {
+                                       parse_warning_pos(pos, "argument for '*' precision in conversion specification is not an 'int', but an '");
+                                       print_type(arg_type);
+                                       fputc('\'', stderr);
+                               }
+                               arg = arg->next;
+                       } else {
+                               /* digit string may be omitted */
+                               while (iswdigit(*fmt)) {
+                                       ++fmt;
+                               }
+                       }
+               }
+
+               /* length modifier */
+               format_length_modifier_t fmt_mod;
+               switch (*fmt) {
+                       case 'h':
+                               ++fmt;
+                               if (*fmt == 'h') {
+                                       ++fmt;
+                                       fmt_mod = FMT_MOD_hh;
+                               } else {
+                                       fmt_mod = FMT_MOD_h;
+                               }
+                               break;
+
+                       case 'l':
+                               ++fmt;
+                               if (*fmt == 'l') {
+                                       ++fmt;
+                                       fmt_mod = FMT_MOD_ll;
+                               } else {
+                                       fmt_mod = FMT_MOD_l;
+                               }
+                               break;
+
+                       case 'L': ++fmt; fmt_mod = FMT_MOD_L;    break;
+                       case 'j': ++fmt; fmt_mod = FMT_MOD_j;    break;
+                       case 't': ++fmt; fmt_mod = FMT_MOD_t;    break;
+                       case 'z': ++fmt; fmt_mod = FMT_MOD_z;    break;
+                       case 'q': ++fmt; fmt_mod = FMT_MOD_q;    break;
+                       default:         fmt_mod = FMT_MOD_NONE; break;
+               }
+
+               if (*fmt == '\0') {
+                       parse_warning_pos(pos, "dangling % in format string");
+                       break;
+               }
+
+               const type_t   *expected_type;
+               format_flags_t  allowed_flags;
+               switch (*fmt) {
+                       case 'd':
+                       case 'i':
+                               switch (fmt_mod) {
+                                       case FMT_MOD_NONE: expected_type = type_int;       break;
+                                       case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
+                                       case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
+                                       case FMT_MOD_l:    expected_type = type_long;      break;
+                                       case FMT_MOD_ll:   expected_type = type_long_long; break;
+                                       case FMT_MOD_j:    expected_type = type_intmax_t;  break;
+                                       case FMT_MOD_z:    expected_type = type_ssize_t;   break;
+                                       case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, *fmt);
+                                               break;
+                               }
+                               allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_PLUS | FMT_FLAG_ZERO;
+                               break;
+
+                       case 'o':
+                       case 'X':
+                       case 'x':
+                               allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
+                               goto eval_fmt_mod_unsigned;
+                               break;
+
+                       case 'u':
+                               allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
+eval_fmt_mod_unsigned:
+                               switch (fmt_mod) {
+                                       case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
+                                       case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
+                                       case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
+                                       case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
+                                       case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
+                                       case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
+                                       case FMT_MOD_z:    expected_type = type_size_t;             break;
+                                       case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, *fmt);
+                                               break;
+                               }
+                               break;
+
+                       case 'A':
+                       case 'a':
+                       case 'E':
+                       case 'e':
+                       case 'F':
+                       case 'f':
+                       case 'G':
+                       case 'g':
+                               switch (fmt_mod) {
+                                       case FMT_MOD_l:    /* l modifier is ignored */
+                                       case FMT_MOD_NONE: expected_type = type_double;      break;
+                                       case FMT_MOD_L:    expected_type = type_long_double; break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, *fmt);
+                                               break;
+                               }
+                               allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_PLUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
+                               break;
+
+                       case 'C':
+                               if (fmt_mod != FMT_MOD_NONE) {
+                                       warn_invalid_length_modifier(pos, fmt_mod, *fmt);
+                               }
+                               expected_type = type_wchar_t;
+                               allowed_flags = FMT_FLAG_NONE;
+                               break;
+
+                       case 'c':
+                               expected_type = type_int;
+                               switch (fmt_mod) {
+                                       case FMT_MOD_NONE: expected_type = type_int;    break; /* TODO promoted char */
+                                       case FMT_MOD_l:    expected_type = type_wint_t; break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, *fmt);
+                                               break;
+                               }
+                               allowed_flags = FMT_FLAG_NONE;
+                               break;
+
+                       case 'S':
+                               if (fmt_mod != FMT_MOD_NONE) {
+                                       warn_invalid_length_modifier(pos, fmt_mod, *fmt);
+                               }
+                               expected_type = type_wchar_t_ptr;
+                               allowed_flags = FMT_FLAG_NONE;
+                               break;
+
+                       case 's':
+                               switch (fmt_mod) {
+                                       case FMT_MOD_NONE: expected_type = type_string;      break;
+                                       case FMT_MOD_l:    expected_type = type_wchar_t_ptr; break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, *fmt);
+                                               break;
+                               }
+                               allowed_flags = FMT_FLAG_NONE;
+                               break;
+
+                       case 'p':
+                               if (fmt_mod != FMT_MOD_NONE) {
+                                       warn_invalid_length_modifier(pos, fmt_mod, *fmt);
+                               }
+                               expected_type = type_void_ptr;
+                               allowed_flags = FMT_FLAG_NONE;
+                               break;
+
+                       case 'n':
+                               switch (fmt_mod) {
+                                       case FMT_MOD_NONE: expected_type = type_int_ptr;         break;
+                                       case FMT_MOD_hh:   expected_type = type_signed_char_ptr; break;
+                                       case FMT_MOD_h:    expected_type = type_short_ptr;       break;
+                                       case FMT_MOD_l:    expected_type = type_long_ptr;        break;
+                                       case FMT_MOD_ll:   expected_type = type_long_long_ptr;   break;
+                                       case FMT_MOD_j:    expected_type = type_intmax_t_ptr;    break;
+                                       case FMT_MOD_z:    expected_type = type_ssize_t_ptr;     break;
+                                       case FMT_MOD_t:    expected_type = type_ptrdiff_t_ptr;   break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, *fmt);
+                                               break;
+                               }
+                               allowed_flags = FMT_FLAG_NONE;
+                               break;
+
+                       default:
+                               parse_warning_posf(pos, "encountered unknown conversion specifier '%%%C'", (wint_t)*fmt);
+                               arg = arg->next;
+                               continue;
+               }
+
+               if ((fmt_flags & ~allowed_flags) != 0) {
+                       /* TODO better warning message text */
+                       parse_warning_pos(pos, "invalid format flags in conversion specification");
+               }
+
+               if (arg == NULL) {
+                       parse_warning_pos(pos, "too few arguments for format string");
+                       return;
+               }
+
+               const type_t *const arg_type = arg->expression->base.datatype;
+               if (arg_type != expected_type) {
+                       parser_print_warning_prefix_pos(pos);
+                       fputs("argument type '", stderr);
+                       print_type(arg_type);
+                       fprintf(stderr, "' does not match conversion specifier '%%%c'\n", (char)*fmt);
+               }
+
+               arg = arg->next;
+       }
+       if (fmt + 1 != wstring->begin + wstring->size) {
+               parse_warning_pos(pos, "format string contains NUL");
+       }
+       if (arg != NULL) {
+               parse_warning_pos(pos, "too many arguments for format string");
+       }
+}
+
+void check_format(const call_expression_t *const call)
+{
+       const expression_t *const func_expr = call->function;
+       if (func_expr->type != EXPR_REFERENCE)
+               return;
+
+       const char            *const name = func_expr->reference.symbol->string;
+       const call_argument_t *      arg  = call->arguments;
+       if (strcmp(name, "wprintf") == 0) { /* TODO gammlig */
+               check_format_arguments(arg, arg->next);
+       } else if (strcmp(name, "swprintf") == 0) {
+               arg = arg->next->next; /* skip destination buffer and size */
+               check_format_arguments(arg, arg->next);
+       }
+}
diff --git a/format_check.h b/format_check.h
new file mode 100644 (file)
index 0000000..f7bc0a5
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef FORMAT_CHECK_H
+#define FORMAT_CHECK_H
+
+#include "ast.h"
+
+
+void check_format(const call_expression_t *call);
+
+#endif
index 4adcec0..e7be7bc 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -5,6 +5,7 @@
 #include <stdbool.h>
 
 #include "diagnostic.h"
+#include "format_check.h"
 #include "parser.h"
 #include "lexer.h"
 #include "token_t.h"
@@ -3718,7 +3719,11 @@ static expression_t *parse_call_expression(unsigned precedence,
                                        argument->expression
                                                = create_implicit_cast(argument->expression, type);
                                }
+
+                               check_format(&result->call);
                        }
+               } else {
+                       check_format(&result->call);
                }
        }