X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=format_check.c;h=59902485ed6924aa50f0ed0049a37395488c7c9b;hb=cf3649b6e421b31035a3692a1d379c9ad4965c01;hp=0a31c2706ad4bf330f2b9575e18cd60e7492c6ce;hpb=a4f1ed0c99f2485820d192ccc047878913a6a16d;p=cparser diff --git a/format_check.c b/format_check.c index 0a31c27..5990248 100644 --- a/format_check.c +++ b/format_check.c @@ -23,6 +23,7 @@ #include "format_check.h" #include "symbol_t.h" #include "ast_t.h" +#include "entity_t.h" #include "diagnostic.h" #include "types.h" #include "type_t.h" @@ -59,6 +60,13 @@ typedef enum format_length_modifier_t { FMT_MOD_I64 } format_length_modifier_t; +typedef struct format_spec_t { + const char *name; /**< name of the function */ + format_kind_t fmt_kind; /**< kind */ + unsigned fmt_idx; /**< index of the format string */ + unsigned arg_idx; /**< index of the first argument */ +} format_spec_t; + static const char* get_length_modifier_name(const format_length_modifier_t mod) { static const char* const names[] = { @@ -145,15 +153,15 @@ static bool atend(vchar_t *self) { /** * Check printf-style format. */ -static void check_format_arguments(const call_argument_t *arg, unsigned idx_fmt, - unsigned idx_param) +static void check_printf_format(const call_argument_t *arg, const format_spec_t *spec) { - unsigned num_fmt = 0; - /* find format arg */ unsigned idx = 0; - for (; idx < idx_fmt; ++idx) + for (; idx < spec->fmt_idx; ++idx) { + if (arg == NULL) + return; arg = arg->next; + } const expression_t *fmt_expr = arg->expression; if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) { @@ -177,11 +185,12 @@ static void check_format_arguments(const call_argument_t *arg, unsigned idx_fmt, return; } /* find the real args */ - for(; idx < idx_param; ++idx) + for(; idx < spec->arg_idx && arg != NULL; ++idx) arg = arg->next; const source_position_t *pos = &fmt_expr->base.source_position; - unsigned fmt = vchar.first(&vchar); + unsigned fmt = vchar.first(&vchar); + unsigned num_fmt = 0; for (; fmt != '\0'; fmt = vchar.next(&vchar)) { if (fmt != '%') continue; @@ -224,14 +233,14 @@ static void check_format_arguments(const call_argument_t *arg, unsigned idx_fmt, case ' ': if (fmt_flags & FMT_FLAG_PLUS) { - warningf(pos, "' ' is overridden by prior '+' in conversion specification"); + warningf(pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt); } flag = FMT_FLAG_SPACE; break; case '+': if (fmt_flags & FMT_FLAG_SPACE) { - warningf(pos, "'+' overrides prior ' ' in conversion specification"); + warningf(pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt); } flag = FMT_FLAG_PLUS; break; @@ -239,7 +248,7 @@ static void check_format_arguments(const call_argument_t *arg, unsigned idx_fmt, default: goto break_fmt_flags; } if (fmt_flags & flag) { - warningf(pos, "repeated flag '%c' in conversion specification", (char)fmt); + warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt); } fmt_flags |= flag; fmt = vchar.next(&vchar); @@ -250,12 +259,12 @@ break_fmt_flags: if (fmt == '*') { fmt = vchar.next(&vchar); if (arg == NULL) { - warningf(pos, "missing argument for '*' field width in conversion specification"); + warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt); return; } const type_t *const arg_type = arg->expression->base.type; if (arg_type != type_int) { - warningf(pos, "argument for '*' field width in conversion specification is not an 'int', but an '%T'", arg_type); + warningf(pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type); } arg = arg->next; } else { @@ -271,12 +280,12 @@ break_fmt_flags: if (fmt == '*') { fmt = vchar.next(&vchar); if (arg == NULL) { - warningf(pos, "missing argument for '*' precision in conversion specification"); + warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt); return; } const type_t *const arg_type = arg->expression->base.type; if (arg_type != type_int) { - warningf(pos, "argument for '*' precision in conversion specification is not an 'int', but an '%T'", arg_type); + warningf(pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type); } arg = arg->next; } else { @@ -390,7 +399,6 @@ break_fmt_flags: case 'x': allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO; goto eval_fmt_mod_unsigned; - break; case 'u': allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO; @@ -509,23 +517,27 @@ eval_fmt_mod_unsigned: break; default: - warningf(pos, "encountered unknown conversion specifier '%%%C'", (wint_t)fmt); + warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt); + if (arg == NULL) { + warningf(pos, "too few arguments for format string"); + return; + } goto next_arg; } format_flags_t wrong_flags = fmt_flags & ~allowed_flags; if (wrong_flags != 0) { - char wrong[8]; - int idx = 0; - if (wrong_flags & FMT_FLAG_HASH) wrong[idx++] = '#'; - if (wrong_flags & FMT_FLAG_ZERO) wrong[idx++] = '0'; - if (wrong_flags & FMT_FLAG_MINUS) wrong[idx++] = '-'; - if (wrong_flags & FMT_FLAG_SPACE) wrong[idx++] = ' '; - if (wrong_flags & FMT_FLAG_PLUS) wrong[idx++] = '+'; - if (wrong_flags & FMT_FLAG_TICK) wrong[idx++] = '\''; - wrong[idx] = '\0'; - - warningf(pos, "invalid format flags \"%s\" in conversion specification %%%c", wrong, fmt); + char wrong[8]; + char *p = wrong; + if (wrong_flags & FMT_FLAG_HASH) *p++ = '#'; + if (wrong_flags & FMT_FLAG_ZERO) *p++ = '0'; + if (wrong_flags & FMT_FLAG_MINUS) *p++ = '-'; + if (wrong_flags & FMT_FLAG_SPACE) *p++ = ' '; + if (wrong_flags & FMT_FLAG_PLUS) *p++ = '+'; + if (wrong_flags & FMT_FLAG_TICK) *p++ = '\''; + *p = '\0'; + + warningf(pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt); } if (arg == NULL) { @@ -537,6 +549,13 @@ eval_fmt_mod_unsigned: type_t *const arg_type = arg->expression->base.type; type_t *const arg_skip = skip_typeref(arg_type); type_t *const expected_type_skip = skip_typeref(expected_type); + + if (fmt == 'p') { + /* allow any pointer type for %p, not just void */ + if (is_type_pointer(arg_skip)) + goto next_arg; + } + if (is_type_pointer(expected_type_skip)) { if (is_type_pointer(arg_skip)) { type_t *const exp_to = skip_typeref(expected_type_skip->pointer.points_to); @@ -553,15 +572,337 @@ eval_fmt_mod_unsigned: } if (is_type_valid(arg_skip)) { warningf(pos, - "argument type '%T' does not match conversion specifier '%%%s%c'", - arg_type, get_length_modifier_name(fmt_mod), (char)fmt); + "argument type '%T' does not match conversion specifier '%%%s%c' at position %u", + arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt); + } + } +next_arg: + arg = arg->next; + } + if (!atend(&vchar)) { + warningf(pos, "format string contains '\\0'"); + } + if (arg != NULL) { + unsigned num_args = num_fmt; + while (arg != NULL) { + ++num_args; + arg = arg->next; + } + warningf(pos, "%u argument%s but only %u format specifier%s", + num_args, num_args != 1 ? "s" : "", + num_fmt, num_fmt != 1 ? "s" : ""); + } +} + +/** + * Check scanf-style format. + */ +static void check_scanf_format(const call_argument_t *arg, const format_spec_t *spec) +{ + /* find format arg */ + unsigned idx = 0; + for (; idx < spec->fmt_idx; ++idx) { + if (arg == NULL) + return; + arg = arg->next; + } + + const expression_t *fmt_expr = arg->expression; + if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) { + fmt_expr = fmt_expr->unary.value; + } + + vchar_t vchar; + if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) { + vchar.string = &fmt_expr->wide_string.value; + vchar.size = fmt_expr->wide_string.value.size; + vchar.first = wstring_first; + vchar.next = wstring_next; + vchar.is_digit = wstring_isdigit; + } else if (fmt_expr->kind == EXPR_STRING_LITERAL) { + vchar.string = &fmt_expr->string.value; + vchar.size = fmt_expr->string.value.size; + vchar.first = string_first; + vchar.next = string_next; + vchar.is_digit = string_isdigit; + } else { + return; + } + /* find the real args */ + for (; idx < spec->arg_idx && arg != NULL; ++idx) + arg = arg->next; + + const source_position_t *pos = &fmt_expr->base.source_position; + unsigned fmt = vchar.first(&vchar); + unsigned num_fmt = 0; + for (; fmt != '\0'; fmt = vchar.next(&vchar)) { + if (fmt != '%') + continue; + fmt = vchar.next(&vchar); + + if (fmt == '%') + continue; + + ++num_fmt; + + /* length modifier */ + format_length_modifier_t fmt_mod; + switch (fmt) { + case 'h': + fmt = vchar.next(&vchar); + if (fmt == 'h') { + fmt = vchar.next(&vchar); + fmt_mod = FMT_MOD_hh; + } else { + fmt_mod = FMT_MOD_h; + } + break; + + case 'l': + fmt = vchar.next(&vchar); + if (fmt == 'l') { + fmt = vchar.next(&vchar); + fmt_mod = FMT_MOD_ll; + } else { + fmt_mod = FMT_MOD_l; + } + break; + + case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L; break; + case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j; break; + case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t; break; + case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z; break; + /* microsoft mode */ + case 'w': + if (c_mode & _MS) { + fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w; + } else { + fmt_mod = FMT_MOD_NONE; + } + break; + case 'I': + if (c_mode & _MS) { + fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I; + if (fmt == '3') { + fmt = vchar.next(&vchar); + if (fmt == '2') { + fmt = vchar.next(&vchar); + fmt_mod = FMT_MOD_I32; + } else { + /* rewind */ + --vchar.position; + } + } else if (fmt == '6') { + fmt = vchar.next(&vchar); + if (fmt == '4') { + fmt = vchar.next(&vchar); + fmt_mod = FMT_MOD_I64; + } else { + /* rewind */ + --vchar.position; + } + } + } else { + fmt_mod = FMT_MOD_NONE; + } + break; + default: + fmt_mod = FMT_MOD_NONE; + break; + } + + if (fmt == '\0') { + warningf(pos, "dangling %% in format string"); + break; + } + + type_t *expected_type; + switch (fmt) { + case 'd': + case 'i': + switch (fmt_mod) { + case FMT_MOD_NONE: expected_type = type_int; break; + case FMT_MOD_hh: expected_type = type_signed_char; break; + case FMT_MOD_h: expected_type = type_short; break; + case FMT_MOD_l: expected_type = type_long; break; + case FMT_MOD_ll: expected_type = type_long_long; break; + case FMT_MOD_j: expected_type = type_intmax_t; break; + case FMT_MOD_z: expected_type = type_ssize_t; break; + case FMT_MOD_t: expected_type = type_ptrdiff_t; break; + case FMT_MOD_I: expected_type = type_ptrdiff_t; break; + case FMT_MOD_I32: expected_type = type_int32; break; + case FMT_MOD_I64: expected_type = type_int64; break; + + default: + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + break; + + case 'o': + case 'X': + case 'x': + goto eval_fmt_mod_unsigned; + + case 'u': +eval_fmt_mod_unsigned: + switch (fmt_mod) { + case FMT_MOD_NONE: expected_type = type_unsigned_int; break; + case FMT_MOD_hh: expected_type = type_unsigned_char; break; + case FMT_MOD_h: expected_type = type_unsigned_short; break; + case FMT_MOD_l: expected_type = type_unsigned_long; break; + case FMT_MOD_ll: expected_type = type_unsigned_long_long; break; + case FMT_MOD_j: expected_type = type_uintmax_t; break; + case FMT_MOD_z: expected_type = type_size_t; break; + case FMT_MOD_t: expected_type = type_uptrdiff_t; break; + case FMT_MOD_I: expected_type = type_size_t; break; + case FMT_MOD_I32: expected_type = type_unsigned_int32; break; + case FMT_MOD_I64: expected_type = type_unsigned_int64; break; + + default: + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + break; + + case 'A': + case 'a': + case 'E': + case 'e': + case 'F': + case 'f': + case 'G': + case 'g': + switch (fmt_mod) { + case FMT_MOD_l: /* l modifier is ignored */ + case FMT_MOD_NONE: expected_type = type_double; break; + case FMT_MOD_L: expected_type = type_long_double; break; + + default: + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + break; + + case 'C': + if (fmt_mod != FMT_MOD_NONE) { + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + expected_type = type_wchar_t; + break; + + case 'c': + expected_type = type_int; + switch (fmt_mod) { + case FMT_MOD_NONE: expected_type = type_int; break; /* TODO promoted char */ + case FMT_MOD_l: expected_type = type_wint_t; break; + case FMT_MOD_w: expected_type = type_wchar_t; break; + + default: + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + break; + + case 'S': + if (fmt_mod != FMT_MOD_NONE) { + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + expected_type = type_wchar_t; + break; + + case 's': + case '[': + switch (fmt_mod) { + case FMT_MOD_NONE: expected_type = type_char; break; + case FMT_MOD_l: expected_type = type_wchar_t; break; + case FMT_MOD_w: expected_type = type_wchar_t; break; + + default: + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + break; + + case 'p': + if (fmt_mod != FMT_MOD_NONE) { + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + expected_type = type_void_ptr; + break; + + case 'n': + switch (fmt_mod) { + case FMT_MOD_NONE: expected_type = type_int; break; + case FMT_MOD_hh: expected_type = type_signed_char; break; + case FMT_MOD_h: expected_type = type_short; break; + case FMT_MOD_l: expected_type = type_long; break; + case FMT_MOD_ll: expected_type = type_long_long; break; + case FMT_MOD_j: expected_type = type_intmax_t; break; + case FMT_MOD_z: expected_type = type_ssize_t; break; + case FMT_MOD_t: expected_type = type_ptrdiff_t; break; + + default: + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + break; + + default: + warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt); + if (arg == NULL) { + warningf(pos, "too few arguments for format string"); + return; + } + goto next_arg; + } + + if (arg == NULL) { + warningf(pos, "too few arguments for format string"); + return; + } + + { /* create a scope here to prevent warning about the jump to next_arg */ + type_t *const arg_type = arg->expression->base.type; + type_t *const arg_skip = skip_typeref(arg_type); + type_t *const expected_type_skip = skip_typeref(expected_type); + + if (! is_type_pointer(arg_skip)) + goto error_arg_type; + type_t *const ptr_skip = skip_typeref(arg_skip->pointer.points_to); + + if (fmt == 'p') { + /* allow any pointer type for %p, not just void */ + if (is_type_pointer(ptr_skip)) + goto next_arg; + } + + /* do NOT allow const or restrict, all other should be ok */ + if (ptr_skip->base.qualifiers & (TYPE_QUALIFIER_CONST | TYPE_QUALIFIER_VOLATILE)) + goto error_arg_type; + type_t *const unqual_ptr = get_unqualified_type(ptr_skip); + if (unqual_ptr == expected_type_skip) { + goto next_arg; + } else if (expected_type_skip == type_char) { + /* char matches with unsigned char AND signed char */ + if (unqual_ptr == type_signed_char || unqual_ptr == type_unsigned_char) + goto next_arg; + } +error_arg_type: + if (is_type_valid(arg_skip)) { + warningf(pos, + "argument type '%T' does not match conversion specifier '%%%s%c' at position %u", + arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt); } } next_arg: arg = arg->next; } if (!atend(&vchar)) { - warningf(pos, "format string contains NUL"); + warningf(pos, "format string contains '\\0'"); } if (arg != NULL) { unsigned num_args = num_fmt; @@ -569,18 +910,13 @@ next_arg: ++num_args; arg = arg->next; } - warningf(pos, "%u argument%s but only %u format string%s", + warningf(pos, "%u argument%s but only %u format specifier%s", num_args, num_args != 1 ? "s" : "", num_fmt, num_fmt != 1 ? "s" : ""); } } -static const struct { - const char *name; - format_kind_t fmt_kind; - unsigned fmt_idx; - unsigned arg_idx; -} builtin_table[] = { +static const format_spec_t builtin_table[] = { { "printf", FORMAT_PRINTF, 0, 1 }, { "wprintf", FORMAT_PRINTF, 0, 1 }, { "sprintf", FORMAT_PRINTF, 1, 2 }, @@ -638,23 +974,30 @@ void check_format(const call_expression_t *const call) if (func_expr->kind != EXPR_REFERENCE) return; - const declaration_t *const decl = func_expr->reference.declaration; - const call_argument_t * arg = call->arguments; + const entity_t *const entity = func_expr->reference.entity; + const call_argument_t * arg = call->arguments; - if(false) { + if (false) { /* the declaration has a GNU format attribute, check it */ } else { /* * For some functions we always check the format, even if it was not specified. * This allows to check format even in MS mode or without header included. */ - const char *const name = decl->symbol->string; - for(size_t i = 0; i < sizeof(builtin_table) / sizeof(builtin_table[0]); ++i) { - if(strcmp(name, builtin_table[i].name) == 0) { - if(builtin_table[i].fmt_kind == FORMAT_PRINTF) { - check_format_arguments(arg, - builtin_table[i].fmt_idx, - builtin_table[i].arg_idx); + const char *const name = entity->base.symbol->string; + for (size_t i = 0; i < sizeof(builtin_table) / sizeof(builtin_table[0]); ++i) { + if (strcmp(name, builtin_table[i].name) == 0) { + switch (builtin_table[i].fmt_kind) { + case FORMAT_PRINTF: + check_printf_format(arg, &builtin_table[i]); + break; + case FORMAT_SCANF: + check_scanf_format(arg, &builtin_table[i]); + break; + case FORMAT_STRFTIME: + case FORMAT_STRFMON: + /* TODO: implement other cases */ + break; } break; }