ast2firm: Factorise code to convert a value to its storage type.
[cparser] / format_check.c
index 4dbd932..bbb47e3 100644 (file)
 
 #include <ctype.h>
 
+#include "adt/strutil.h"
 #include "adt/util.h"
 #include "format_check.h"
 #include "symbol_t.h"
 #include "ast_t.h"
 #include "entity_t.h"
 #include "diagnostic.h"
+#include "parser.h"
 #include "types.h"
 #include "type_t.h"
 #include "warning.h"
@@ -88,18 +90,16 @@ static const char* get_length_modifier_name(const format_length_modifier_t mod)
                [FMT_MOD_I32]  = "I32",
                [FMT_MOD_I64]  = "I64"
        };
-       assert(mod < lengthof(names));
+       assert((size_t)mod < lengthof(names));
        return names[mod];
 }
 
-static void warn_invalid_length_modifier(const source_position_t *pos,
+static void warn_invalid_length_modifier(const position_t *pos,
                                          const format_length_modifier_t mod,
                                          const utf32 conversion)
 {
-       warningf(pos,
-               "invalid length modifier '%s' for conversion specifier '%%%c'",
-               get_length_modifier_name(mod), conversion
-       );
+       char const *const lmod = get_length_modifier_name(mod);
+       warningf(WARN_FORMAT, pos, "invalid length modifier '%s' for conversion specifier '%%%c'", lmod, conversion);
 }
 
 /**
@@ -109,7 +109,7 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
                                         const call_argument_t *arg,
                                         const format_spec_t *spec)
 {
-       while (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
+       while (fmt_expr->kind == EXPR_UNARY_CAST) {
                fmt_expr = fmt_expr->unary.value;
        }
 
@@ -127,15 +127,14 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
                return nt > nf ? nt : nf;
        }
 
-       if (fmt_expr->kind != EXPR_STRING_LITERAL
-                       && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
+       if (fmt_expr->kind != EXPR_STRING_LITERAL)
                return -1;
 
-       const char *string = fmt_expr->literal.value.begin;
-       size_t      size   = fmt_expr->literal.value.size;
+       const char *string = fmt_expr->string_literal.value.begin;
+       size_t      size   = fmt_expr->string_literal.value.size;
        const char *c      = string;
 
-       const source_position_t *pos = &fmt_expr->base.source_position;
+       const position_t *pos = &fmt_expr->base.pos;
        unsigned num_fmt  = 0;
        unsigned num_args = 0;
        char     fmt;
@@ -144,10 +143,6 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
                        continue;
                fmt = *(++c);
 
-               if (fmt == '\0') {
-                       warningf(pos, "dangling %% in format string");
-                       break;
-               }
                if (fmt == '%')
                        continue;
 
@@ -186,14 +181,14 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
 
                                        case ' ':
                                                if (fmt_flags & FMT_FLAG_PLUS) {
-                                                       warningf(pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
+                                                       warningf(WARN_FORMAT, pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
                                                }
                                                flag = FMT_FLAG_SPACE;
                                                break;
 
                                        case '+':
                                                if (fmt_flags & FMT_FLAG_SPACE) {
-                                                       warningf(pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
+                                                       warningf(WARN_FORMAT, pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
                                                }
                                                flag = FMT_FLAG_PLUS;
                                                break;
@@ -201,7 +196,7 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
                                        default: goto break_fmt_flags;
                                }
                                if (fmt_flags & flag) {
-                                       warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
+                                       warningf(WARN_FORMAT, pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
                                }
                                fmt_flags |= flag;
                                fmt = *(++c);
@@ -213,12 +208,12 @@ break_fmt_flags:
                                ++num_args;
                                fmt = *(++c);
                                if (arg == NULL) {
-                                       warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
+                                       warningf(WARN_FORMAT, pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
                                        return -1;
                                }
                                const type_t *const arg_type = arg->expression->base.type;
                                if (arg_type != type_int) {
-                                       warningf(pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
+                                       warningf(WARN_FORMAT, pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
                                }
                                arg = arg->next;
                        } else {
@@ -230,17 +225,21 @@ break_fmt_flags:
 
                /* precision */
                if (fmt == '.') {
+                       if (fmt_flags & FMT_FLAG_ZERO) {
+                               warningf(WARN_FORMAT, pos, "'0' flag ignored with precision in conversion specification %u", num_fmt);
+                       }
+
                        ++num_args;
                        fmt = *(++c);
                        if (fmt == '*') {
                                fmt = *(++c);
                                if (arg == NULL) {
-                                       warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
+                                       warningf(WARN_FORMAT, pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
                                        return -1;
                                }
                                const type_t *const arg_type = arg->expression->base.type;
                                if (arg_type != type_int) {
-                                       warningf(pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
+                                       warningf(WARN_FORMAT, pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
                                }
                                arg = arg->next;
                        } else {
@@ -318,6 +317,10 @@ break_fmt_flags:
                                break;
                }
 
+               if (fmt == '\0') {
+                       warningf(WARN_FORMAT, pos, "dangling %% in format string");
+                       break;
+               }
 
                type_t            *expected_type;
                type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
@@ -326,17 +329,17 @@ break_fmt_flags:
                        case 'd':
                        case 'i':
                                switch (fmt_mod) {
-                                       case FMT_MOD_NONE: expected_type = type_int;       break;
-                                       case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
-                                       case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
-                                       case FMT_MOD_l:    expected_type = type_long;      break;
-                                       case FMT_MOD_ll:   expected_type = type_long_long; break;
-                                       case FMT_MOD_j:    expected_type = type_intmax_t;  break;
-                                       case FMT_MOD_z:    expected_type = type_ssize_t;   break;
-                                       case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
-                                       case FMT_MOD_I:    expected_type = type_ptrdiff_t; break;
-                                       case FMT_MOD_I32:  expected_type = type_int32;     break;
-                                       case FMT_MOD_I64:  expected_type = type_int64;     break;
+                                       case FMT_MOD_NONE: expected_type = type_int;         break;
+                                       case FMT_MOD_hh:   expected_type = type_signed_char; break;
+                                       case FMT_MOD_h:    expected_type = type_short;       break;
+                                       case FMT_MOD_l:    expected_type = type_long;        break;
+                                       case FMT_MOD_ll:   expected_type = type_long_long;   break;
+                                       case FMT_MOD_j:    expected_type = type_intmax_t;    break;
+                                       case FMT_MOD_z:    expected_type = type_ssize_t;     break;
+                                       case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
+                                       case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
+                                       case FMT_MOD_I32:  expected_type = type_int32;       break;
+                                       case FMT_MOD_I64:  expected_type = type_int64;       break;
 
                                        default:
                                                warn_invalid_length_modifier(pos, fmt_mod, fmt);
@@ -356,8 +359,8 @@ break_fmt_flags:
 eval_fmt_mod_unsigned:
                                switch (fmt_mod) {
                                        case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
-                                       case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
-                                       case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
+                                       case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
+                                       case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
                                        case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
                                        case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
                                        case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
@@ -468,10 +471,9 @@ eval_fmt_mod_unsigned:
                                break;
 
                        default:
-                               warningf(pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
+                               warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
                                if (arg == NULL) {
-                                       warningf(pos, "too few arguments for format string");
-                                       return -1;
+                                       goto too_few_args;
                                }
                                goto next_arg;
                }
@@ -488,11 +490,12 @@ eval_fmt_mod_unsigned:
                        if (wrong_flags & FMT_FLAG_TICK)  *p++ = '\'';
                        *p = '\0';
 
-                       warningf(pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
+                       warningf(WARN_FORMAT, pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
                }
 
                if (arg == NULL) {
-                       warningf(pos, "too few arguments for format string");
+too_few_args:
+                       warningf(WARN_FORMAT, pos, "too few arguments for format string");
                        return -1;
                }
 
@@ -518,11 +521,20 @@ eval_fmt_mod_unsigned:
                                }
                        } else if (get_unqualified_type(arg_skip) == expected_type_skip) {
                                goto next_arg;
+                       } else if (arg->expression->kind == EXPR_UNARY_CAST) {
+                               expression_t const *const expr        = arg->expression->unary.value;
+                               type_t             *const unprom_type = skip_typeref(expr->base.type);
+                               if (get_unqualified_type(unprom_type) == expected_type_skip) {
+                                       goto next_arg;
+                               }
+                               if (expected_type_skip == type_unsigned_int && !is_type_signed(unprom_type)) {
+                                       goto next_arg;
+                               }
                        }
                        if (is_type_valid(arg_skip)) {
-                               warningf(&arg->expression->base.source_position,
-                                       "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
-                                       arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
+                               position_t const *const apos = &arg->expression->base.pos;
+                               char       const *const mod  = get_length_modifier_name(fmt_mod);
+                               warningf(WARN_FORMAT, apos, "conversion '%%%s%c' at position %u specifies type '%T' but the argument has type '%T'", mod, (char)fmt, num_fmt, expected_type, arg_type);
                        }
                }
 next_arg:
@@ -530,7 +542,7 @@ next_arg:
        }
        assert(fmt == '\0');
        if (c+1 < string + size) {
-               warningf(pos, "format string contains '\\0'");
+               warningf(WARN_FORMAT, pos, "format string contains '\\0'");
        }
        return num_args;
 }
@@ -563,10 +575,8 @@ static void check_printf_format(call_argument_t const *arg,
        for (; arg != NULL; arg = arg->next)
                ++num_args;
        if (num_args > (size_t)num_fmt) {
-               warningf(&fmt_expr->base.source_position,
-                        "%u argument%s but only %u format specifier%s",
-                        num_args, num_args != 1 ? "s" : "",
-                        num_fmt,  num_fmt  != 1 ? "s" : "");
+               position_t const *const pos = &fmt_expr->base.pos;
+               warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt,  num_fmt  != 1 ? "s" : "");
        }
 }
 
@@ -585,38 +595,50 @@ static void check_scanf_format(const call_argument_t *arg,
        }
 
        const expression_t *fmt_expr = arg->expression;
-       if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
+       if (fmt_expr->kind == EXPR_UNARY_CAST) {
                fmt_expr = fmt_expr->unary.value;
        }
 
-       if (fmt_expr->kind != EXPR_STRING_LITERAL
-                       && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
+       if (fmt_expr->kind != EXPR_STRING_LITERAL)
                return;
 
-       const char *string = fmt_expr->literal.value.begin;
-       size_t      size   = fmt_expr->literal.value.size;
+       const char *string = fmt_expr->string_literal.value.begin;
+       size_t      size   = fmt_expr->string_literal.value.size;
        const char *c      = string;
 
        /* find the real args */
        for (; idx < spec->arg_idx && arg != NULL; ++idx)
                arg = arg->next;
 
-       const source_position_t *pos = &fmt_expr->base.source_position;
+       const position_t *pos = &fmt_expr->base.pos;
        unsigned num_fmt = 0;
        char     fmt;
        for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
                if (fmt != '%')
                        continue;
                fmt = *(++c);
-               if (fmt == '\0') {
-                       warningf(pos, "dangling '%%' in format string");
-                       break;
-               }
                if (fmt == '%')
                        continue;
 
                ++num_fmt;
 
+               bool suppress_assignment = false;
+               if (fmt == '*') {
+                       fmt = *++c;
+                       suppress_assignment = true;
+               }
+
+               size_t width = 0;
+               if ('0' <= fmt && fmt <= '9') {
+                       do {
+                               width = width * 10 + (fmt - '0');
+                               fmt   = *++c;
+                       } while ('0' <= fmt && fmt <= '9');
+                       if (width == 0) {
+                               warningf(WARN_FORMAT, pos, "field width is zero at format %u", num_fmt);
+                       }
+               }
+
                /* look for length modifiers */
                format_length_modifier_t fmt_mod = FMT_MOD_NONE;
                switch (fmt) {
@@ -679,7 +701,7 @@ static void check_scanf_format(const call_argument_t *arg,
                }
 
                if (fmt == '\0') {
-                       warningf(pos, "dangling % with conversion specififer in format string");
+                       warningf(WARN_FORMAT, pos, "dangling %% with conversion specififer in format string");
                        break;
                }
 
@@ -754,20 +776,32 @@ static void check_scanf_format(const call_argument_t *arg,
                                goto next_arg;
                        }
                        expected_type = type_wchar_t;
-                       break;
+                       goto check_c_width;
 
-               case 'c':
-                       expected_type = type_int;
+               case 'c': {
                        switch (fmt_mod) {
-                       case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
-                       case FMT_MOD_l:    expected_type = type_wint_t;  break;
+                       case FMT_MOD_NONE: expected_type = type_char;    break;
+                       case FMT_MOD_l:    expected_type = type_wchar_t; break;
                        case FMT_MOD_w:    expected_type = type_wchar_t; break;
 
                        default:
                                warn_invalid_length_modifier(pos, fmt_mod, fmt);
                                goto next_arg;
                        }
+
+check_c_width:
+                       if (width == 0)
+                               width = 1;
+                       if (!suppress_assignment && arg != NULL) {
+                               type_t *const type = skip_typeref(revert_automatic_type_conversion(arg->expression));
+                               if (is_type_array(type)       &&
+                                   type->array.size_constant &&
+                                   width > type->array.size) {
+                                       warningf(WARN_FORMAT, pos, "target buffer '%T' is too small for %u characters at format %u", type, width, num_fmt);
+                               }
+                       }
                        break;
+               }
 
                case 'S':
                        if (fmt_mod != FMT_MOD_NONE) {
@@ -778,7 +812,7 @@ static void check_scanf_format(const call_argument_t *arg,
                        break;
 
                case 's':
-               case '[':
+               case '[': {
                        switch (fmt_mod) {
                                case FMT_MOD_NONE: expected_type = type_char;    break;
                                case FMT_MOD_l:    expected_type = type_wchar_t; break;
@@ -788,17 +822,33 @@ static void check_scanf_format(const call_argument_t *arg,
                                        warn_invalid_length_modifier(pos, fmt_mod, fmt);
                                        goto next_arg;
                        }
+
+                       if (!suppress_assignment &&
+                           width != 0           &&
+                           arg   != NULL) {
+                               type_t *const type = skip_typeref(revert_automatic_type_conversion(arg->expression));
+                               if (is_type_array(type)       &&
+                                   type->array.size_constant &&
+                                   width >= type->array.size) {
+                                       warningf(WARN_FORMAT, pos, "target buffer '%T' is too small for %u characters and \\0 at format %u", type, width, num_fmt);
+                               }
+                       }
                        break;
+               }
 
                case 'p':
                        if (fmt_mod != FMT_MOD_NONE) {
                                warn_invalid_length_modifier(pos, fmt_mod, fmt);
                                goto next_arg;
                        }
-                       expected_type = type_void_ptr;
+                       expected_type = type_void;
                        break;
 
-               case 'n':
+               case 'n': {
+                       if (suppress_assignment) {
+                               warningf(WARN_FORMAT, pos, "conversion '%n' cannot be suppressed with '*' at format %u", num_fmt);
+                       }
+
                        switch (fmt_mod) {
                        case FMT_MOD_NONE: expected_type = type_int;         break;
                        case FMT_MOD_hh:   expected_type = type_signed_char; break;
@@ -814,19 +864,23 @@ static void check_scanf_format(const call_argument_t *arg,
                                goto next_arg;
                        }
                        break;
+               }
 
                default:
-                       warningf(pos, "encountered unknown conversion specifier '%%%c' at format %u",
-                                fmt, num_fmt);
-                       if (arg == NULL) {
-                               warningf(pos, "too few arguments for format string");
-                               return;
-                       }
+                       warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at format %u", fmt, num_fmt);
+                       if (suppress_assignment)
+                               continue;
+                       if (arg == NULL)
+                               goto too_few_args;
                        goto next_arg;
                }
 
+               if (suppress_assignment)
+                       continue;
+
                if (arg == NULL) {
-                       warningf(pos, "too few arguments for format string");
+too_few_args:
+                       warningf(WARN_FORMAT, pos, "too few arguments for format string");
                        return;
                }
 
@@ -858,9 +912,9 @@ static void check_scanf_format(const call_argument_t *arg,
                        }
 error_arg_type:
                        if (is_type_valid(arg_skip)) {
-                               warningf(&arg->expression->base.source_position,
-                                       "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
-                                       arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
+                               position_t const *const apos = &arg->expression->base.pos;
+                               char       const *const mod  = get_length_modifier_name(fmt_mod);
+                               warningf(WARN_FORMAT, apos, "conversion '%%%s%c' at position %u specifies type '%T*' but the argument has type '%T'", mod, (char)fmt, num_fmt, expected_type, arg_type);
                        }
                }
 next_arg:
@@ -868,7 +922,7 @@ next_arg:
        }
        assert(fmt == '\0');
        if (c+1 < string + size) {
-               warningf(pos, "format string contains '\\0'");
+               warningf(WARN_FORMAT, pos, "format string contains '\\0'");
        }
        if (arg != NULL) {
                unsigned num_args = num_fmt;
@@ -876,9 +930,7 @@ next_arg:
                        ++num_args;
                        arg = arg->next;
                }
-               warningf(pos, "%u argument%s but only %u format specifier%s",
-                        num_args, num_args != 1 ? "s" : "",
-                        num_fmt, num_fmt != 1 ? "s" : "");
+               warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt, num_fmt != 1 ? "s" : "");
        }
 }
 
@@ -933,7 +985,7 @@ static const format_spec_t builtin_table[] = {
 
 void check_format(const call_expression_t *const call)
 {
-       if (!warning.format)
+       if (!is_warn_on(WARN_FORMAT))
                return;
 
        const expression_t *const func_expr = call->function;
@@ -943,31 +995,27 @@ void check_format(const call_expression_t *const call)
        const entity_t        *const entity = func_expr->reference.entity;
        const call_argument_t *      arg    = call->arguments;
 
-       if (false) {
-               /* the declaration has a GNU format attribute, check it */
-       } else {
-               /*
-                * For some functions we always check the format, even if it was not
-                * specified. This allows to check format even in MS mode or without
-                * header included.
-                */
-               const char *const name = entity->base.symbol->string;
-               for (size_t i = 0; i < lengthof(builtin_table); ++i) {
-                       if (strcmp(name, builtin_table[i].name) == 0) {
-                               switch (builtin_table[i].fmt_kind) {
-                               case FORMAT_PRINTF:
-                                       check_printf_format(arg, &builtin_table[i]);
-                                       break;
-                               case FORMAT_SCANF:
-                                       check_scanf_format(arg, &builtin_table[i]);
-                                       break;
-                               case FORMAT_STRFTIME:
-                               case FORMAT_STRFMON:
-                                       /* TODO: implement other cases */
-                                       break;
-                               }
+       /*
+        * For some functions we always check the format, even if it was not
+        * specified. This allows to check format even in MS mode or without
+        * header included.
+        */
+       const char *const name = entity->base.symbol->string;
+       for (size_t i = 0; i < lengthof(builtin_table); ++i) {
+               if (streq(name, builtin_table[i].name)) {
+                       switch (builtin_table[i].fmt_kind) {
+                       case FORMAT_PRINTF:
+                               check_printf_format(arg, &builtin_table[i]);
+                               break;
+                       case FORMAT_SCANF:
+                               check_scanf_format(arg, &builtin_table[i]);
+                               break;
+                       case FORMAT_STRFTIME:
+                       case FORMAT_STRFMON:
+                               /* TODO: implement other cases */
                                break;
                        }
+                       break;
                }
        }
 }