ast2firm: Factorise code to convert a value to its storage type.
[cparser] / format_check.c
index 16eafd1..bbb47e3 100644 (file)
 
 #include <ctype.h>
 
+#include "adt/strutil.h"
 #include "adt/util.h"
 #include "format_check.h"
 #include "symbol_t.h"
 #include "ast_t.h"
 #include "entity_t.h"
 #include "diagnostic.h"
+#include "parser.h"
 #include "types.h"
 #include "type_t.h"
 #include "warning.h"
@@ -92,7 +94,7 @@ static const char* get_length_modifier_name(const format_length_modifier_t mod)
        return names[mod];
 }
 
-static void warn_invalid_length_modifier(const source_position_t *pos,
+static void warn_invalid_length_modifier(const position_t *pos,
                                          const format_length_modifier_t mod,
                                          const utf32 conversion)
 {
@@ -107,7 +109,7 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
                                         const call_argument_t *arg,
                                         const format_spec_t *spec)
 {
-       while (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
+       while (fmt_expr->kind == EXPR_UNARY_CAST) {
                fmt_expr = fmt_expr->unary.value;
        }
 
@@ -125,15 +127,14 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
                return nt > nf ? nt : nf;
        }
 
-       if (fmt_expr->kind != EXPR_STRING_LITERAL
-                       && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
+       if (fmt_expr->kind != EXPR_STRING_LITERAL)
                return -1;
 
-       const char *string = fmt_expr->literal.value.begin;
-       size_t      size   = fmt_expr->literal.value.size;
+       const char *string = fmt_expr->string_literal.value.begin;
+       size_t      size   = fmt_expr->string_literal.value.size;
        const char *c      = string;
 
-       const source_position_t *pos = &fmt_expr->base.source_position;
+       const position_t *pos = &fmt_expr->base.pos;
        unsigned num_fmt  = 0;
        unsigned num_args = 0;
        char     fmt;
@@ -142,10 +143,6 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
                        continue;
                fmt = *(++c);
 
-               if (fmt == '\0') {
-                       warningf(WARN_FORMAT, pos, "dangling %% in format string");
-                       break;
-               }
                if (fmt == '%')
                        continue;
 
@@ -228,6 +225,10 @@ break_fmt_flags:
 
                /* precision */
                if (fmt == '.') {
+                       if (fmt_flags & FMT_FLAG_ZERO) {
+                               warningf(WARN_FORMAT, pos, "'0' flag ignored with precision in conversion specification %u", num_fmt);
+                       }
+
                        ++num_args;
                        fmt = *(++c);
                        if (fmt == '*') {
@@ -316,6 +317,10 @@ break_fmt_flags:
                                break;
                }
 
+               if (fmt == '\0') {
+                       warningf(WARN_FORMAT, pos, "dangling %% in format string");
+                       break;
+               }
 
                type_t            *expected_type;
                type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
@@ -324,17 +329,17 @@ break_fmt_flags:
                        case 'd':
                        case 'i':
                                switch (fmt_mod) {
-                                       case FMT_MOD_NONE: expected_type = type_int;       break;
-                                       case FMT_MOD_hh:   expected_type = type_int;       break; /* TODO promoted signed char */
-                                       case FMT_MOD_h:    expected_type = type_int;       break; /* TODO promoted short */
-                                       case FMT_MOD_l:    expected_type = type_long;      break;
-                                       case FMT_MOD_ll:   expected_type = type_long_long; break;
-                                       case FMT_MOD_j:    expected_type = type_intmax_t;  break;
-                                       case FMT_MOD_z:    expected_type = type_ssize_t;   break;
-                                       case FMT_MOD_t:    expected_type = type_ptrdiff_t; break;
-                                       case FMT_MOD_I:    expected_type = type_ptrdiff_t; break;
-                                       case FMT_MOD_I32:  expected_type = type_int32;     break;
-                                       case FMT_MOD_I64:  expected_type = type_int64;     break;
+                                       case FMT_MOD_NONE: expected_type = type_int;         break;
+                                       case FMT_MOD_hh:   expected_type = type_signed_char; break;
+                                       case FMT_MOD_h:    expected_type = type_short;       break;
+                                       case FMT_MOD_l:    expected_type = type_long;        break;
+                                       case FMT_MOD_ll:   expected_type = type_long_long;   break;
+                                       case FMT_MOD_j:    expected_type = type_intmax_t;    break;
+                                       case FMT_MOD_z:    expected_type = type_ssize_t;     break;
+                                       case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
+                                       case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
+                                       case FMT_MOD_I32:  expected_type = type_int32;       break;
+                                       case FMT_MOD_I64:  expected_type = type_int64;       break;
 
                                        default:
                                                warn_invalid_length_modifier(pos, fmt_mod, fmt);
@@ -354,8 +359,8 @@ break_fmt_flags:
 eval_fmt_mod_unsigned:
                                switch (fmt_mod) {
                                        case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
-                                       case FMT_MOD_hh:   expected_type = type_int;                break; /* TODO promoted unsigned char */
-                                       case FMT_MOD_h:    expected_type = type_int;                break; /* TODO promoted unsigned short */
+                                       case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
+                                       case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
                                        case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
                                        case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
                                        case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
@@ -468,8 +473,7 @@ eval_fmt_mod_unsigned:
                        default:
                                warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
                                if (arg == NULL) {
-                                       warningf(WARN_FORMAT, pos, "too few arguments for format string");
-                                       return -1;
+                                       goto too_few_args;
                                }
                                goto next_arg;
                }
@@ -490,6 +494,7 @@ eval_fmt_mod_unsigned:
                }
 
                if (arg == NULL) {
+too_few_args:
                        warningf(WARN_FORMAT, pos, "too few arguments for format string");
                        return -1;
                }
@@ -516,11 +521,20 @@ eval_fmt_mod_unsigned:
                                }
                        } else if (get_unqualified_type(arg_skip) == expected_type_skip) {
                                goto next_arg;
+                       } else if (arg->expression->kind == EXPR_UNARY_CAST) {
+                               expression_t const *const expr        = arg->expression->unary.value;
+                               type_t             *const unprom_type = skip_typeref(expr->base.type);
+                               if (get_unqualified_type(unprom_type) == expected_type_skip) {
+                                       goto next_arg;
+                               }
+                               if (expected_type_skip == type_unsigned_int && !is_type_signed(unprom_type)) {
+                                       goto next_arg;
+                               }
                        }
                        if (is_type_valid(arg_skip)) {
-                               source_position_t const *const apos = &arg->expression->base.source_position;
-                               char              const *const mod  = get_length_modifier_name(fmt_mod);
-                               warningf(WARN_FORMAT, apos, "argument type '%T' does not match conversion specifier '%%%s%c' at position %u", arg_type, mod, (char)fmt, num_fmt);
+                               position_t const *const apos = &arg->expression->base.pos;
+                               char       const *const mod  = get_length_modifier_name(fmt_mod);
+                               warningf(WARN_FORMAT, apos, "conversion '%%%s%c' at position %u specifies type '%T' but the argument has type '%T'", mod, (char)fmt, num_fmt, expected_type, arg_type);
                        }
                }
 next_arg:
@@ -561,7 +575,7 @@ static void check_printf_format(call_argument_t const *arg,
        for (; arg != NULL; arg = arg->next)
                ++num_args;
        if (num_args > (size_t)num_fmt) {
-               source_position_t const *const pos = &fmt_expr->base.source_position;
+               position_t const *const pos = &fmt_expr->base.pos;
                warningf(WARN_FORMAT, pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt,  num_fmt  != 1 ? "s" : "");
        }
 }
@@ -581,38 +595,50 @@ static void check_scanf_format(const call_argument_t *arg,
        }
 
        const expression_t *fmt_expr = arg->expression;
-       if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
+       if (fmt_expr->kind == EXPR_UNARY_CAST) {
                fmt_expr = fmt_expr->unary.value;
        }
 
-       if (fmt_expr->kind != EXPR_STRING_LITERAL
-                       && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
+       if (fmt_expr->kind != EXPR_STRING_LITERAL)
                return;
 
-       const char *string = fmt_expr->literal.value.begin;
-       size_t      size   = fmt_expr->literal.value.size;
+       const char *string = fmt_expr->string_literal.value.begin;
+       size_t      size   = fmt_expr->string_literal.value.size;
        const char *c      = string;
 
        /* find the real args */
        for (; idx < spec->arg_idx && arg != NULL; ++idx)
                arg = arg->next;
 
-       const source_position_t *pos = &fmt_expr->base.source_position;
+       const position_t *pos = &fmt_expr->base.pos;
        unsigned num_fmt = 0;
        char     fmt;
        for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
                if (fmt != '%')
                        continue;
                fmt = *(++c);
-               if (fmt == '\0') {
-                       warningf(WARN_FORMAT, pos, "dangling '%%' in format string");
-                       break;
-               }
                if (fmt == '%')
                        continue;
 
                ++num_fmt;
 
+               bool suppress_assignment = false;
+               if (fmt == '*') {
+                       fmt = *++c;
+                       suppress_assignment = true;
+               }
+
+               size_t width = 0;
+               if ('0' <= fmt && fmt <= '9') {
+                       do {
+                               width = width * 10 + (fmt - '0');
+                               fmt   = *++c;
+                       } while ('0' <= fmt && fmt <= '9');
+                       if (width == 0) {
+                               warningf(WARN_FORMAT, pos, "field width is zero at format %u", num_fmt);
+                       }
+               }
+
                /* look for length modifiers */
                format_length_modifier_t fmt_mod = FMT_MOD_NONE;
                switch (fmt) {
@@ -750,20 +776,32 @@ static void check_scanf_format(const call_argument_t *arg,
                                goto next_arg;
                        }
                        expected_type = type_wchar_t;
-                       break;
+                       goto check_c_width;
 
-               case 'c':
-                       expected_type = type_int;
+               case 'c': {
                        switch (fmt_mod) {
-                       case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
-                       case FMT_MOD_l:    expected_type = type_wint_t;  break;
+                       case FMT_MOD_NONE: expected_type = type_char;    break;
+                       case FMT_MOD_l:    expected_type = type_wchar_t; break;
                        case FMT_MOD_w:    expected_type = type_wchar_t; break;
 
                        default:
                                warn_invalid_length_modifier(pos, fmt_mod, fmt);
                                goto next_arg;
                        }
+
+check_c_width:
+                       if (width == 0)
+                               width = 1;
+                       if (!suppress_assignment && arg != NULL) {
+                               type_t *const type = skip_typeref(revert_automatic_type_conversion(arg->expression));
+                               if (is_type_array(type)       &&
+                                   type->array.size_constant &&
+                                   width > type->array.size) {
+                                       warningf(WARN_FORMAT, pos, "target buffer '%T' is too small for %u characters at format %u", type, width, num_fmt);
+                               }
+                       }
                        break;
+               }
 
                case 'S':
                        if (fmt_mod != FMT_MOD_NONE) {
@@ -774,7 +812,7 @@ static void check_scanf_format(const call_argument_t *arg,
                        break;
 
                case 's':
-               case '[':
+               case '[': {
                        switch (fmt_mod) {
                                case FMT_MOD_NONE: expected_type = type_char;    break;
                                case FMT_MOD_l:    expected_type = type_wchar_t; break;
@@ -784,17 +822,33 @@ static void check_scanf_format(const call_argument_t *arg,
                                        warn_invalid_length_modifier(pos, fmt_mod, fmt);
                                        goto next_arg;
                        }
+
+                       if (!suppress_assignment &&
+                           width != 0           &&
+                           arg   != NULL) {
+                               type_t *const type = skip_typeref(revert_automatic_type_conversion(arg->expression));
+                               if (is_type_array(type)       &&
+                                   type->array.size_constant &&
+                                   width >= type->array.size) {
+                                       warningf(WARN_FORMAT, pos, "target buffer '%T' is too small for %u characters and \\0 at format %u", type, width, num_fmt);
+                               }
+                       }
                        break;
+               }
 
                case 'p':
                        if (fmt_mod != FMT_MOD_NONE) {
                                warn_invalid_length_modifier(pos, fmt_mod, fmt);
                                goto next_arg;
                        }
-                       expected_type = type_void_ptr;
+                       expected_type = type_void;
                        break;
 
-               case 'n':
+               case 'n': {
+                       if (suppress_assignment) {
+                               warningf(WARN_FORMAT, pos, "conversion '%n' cannot be suppressed with '*' at format %u", num_fmt);
+                       }
+
                        switch (fmt_mod) {
                        case FMT_MOD_NONE: expected_type = type_int;         break;
                        case FMT_MOD_hh:   expected_type = type_signed_char; break;
@@ -810,17 +864,22 @@ static void check_scanf_format(const call_argument_t *arg,
                                goto next_arg;
                        }
                        break;
+               }
 
                default:
                        warningf(WARN_FORMAT, pos, "encountered unknown conversion specifier '%%%c' at format %u", fmt, num_fmt);
-                       if (arg == NULL) {
-                               warningf(WARN_FORMAT, pos, "too few arguments for format string");
-                               return;
-                       }
+                       if (suppress_assignment)
+                               continue;
+                       if (arg == NULL)
+                               goto too_few_args;
                        goto next_arg;
                }
 
+               if (suppress_assignment)
+                       continue;
+
                if (arg == NULL) {
+too_few_args:
                        warningf(WARN_FORMAT, pos, "too few arguments for format string");
                        return;
                }
@@ -853,9 +912,9 @@ static void check_scanf_format(const call_argument_t *arg,
                        }
 error_arg_type:
                        if (is_type_valid(arg_skip)) {
-                               source_position_t const *const apos = &arg->expression->base.source_position;
-                               char              const *const mod  = get_length_modifier_name(fmt_mod);
-                               warningf(WARN_FORMAT, apos, "argument type '%T' does not match conversion specifier '%%%s%c' at position %u", arg_type, mod, (char)fmt, num_fmt);
+                               position_t const *const apos = &arg->expression->base.pos;
+                               char       const *const mod  = get_length_modifier_name(fmt_mod);
+                               warningf(WARN_FORMAT, apos, "conversion '%%%s%c' at position %u specifies type '%T*' but the argument has type '%T'", mod, (char)fmt, num_fmt, expected_type, arg_type);
                        }
                }
 next_arg:
@@ -943,7 +1002,7 @@ void check_format(const call_expression_t *const call)
         */
        const char *const name = entity->base.symbol->string;
        for (size_t i = 0; i < lengthof(builtin_table); ++i) {
-               if (strcmp(name, builtin_table[i].name) == 0) {
+               if (streq(name, builtin_table[i].name)) {
                        switch (builtin_table[i].fmt_kind) {
                        case FORMAT_PRINTF:
                                check_printf_format(arg, &builtin_table[i]);