Evaluate the argument of va_end for its side effects.
[cparser] / format_check.c
index 95ec544..5990248 100644 (file)
@@ -23,6 +23,7 @@
 #include "format_check.h"
 #include "symbol_t.h"
 #include "ast_t.h"
+#include "entity_t.h"
 #include "diagnostic.h"
 #include "types.h"
 #include "type_t.h"
@@ -59,6 +60,13 @@ typedef enum format_length_modifier_t {
        FMT_MOD_I64
 } format_length_modifier_t;
 
+typedef struct format_spec_t {
+       const char    *name;     /**< name of the function */
+       format_kind_t  fmt_kind; /**< kind */
+       unsigned       fmt_idx;  /**< index of the format string */
+       unsigned       arg_idx;  /**< index of the first argument */
+} format_spec_t;
+
 static const char* get_length_modifier_name(const format_length_modifier_t mod)
 {
        static const char* const names[] = {
@@ -145,19 +153,17 @@ static bool atend(vchar_t *self) {
 /**
  * Check printf-style format.
  */
-static void check_format_arguments(const call_argument_t *arg, unsigned idx_fmt,
-               unsigned idx_param)
+static void check_printf_format(const call_argument_t *arg, const format_spec_t *spec)
 {
-       const call_argument_t *fmt_arg;
-       unsigned num_fmt = 0;
-
        /* find format arg */
        unsigned idx = 0;
-       for (; idx < idx_fmt; ++idx)
+       for (; idx < spec->fmt_idx; ++idx) {
+               if (arg == NULL)
+                       return;
                arg = arg->next;
-       fmt_arg = arg;
+       }
 
-       const expression_t *fmt_expr = fmt_arg->expression;
+       const expression_t *fmt_expr = arg->expression;
        if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
                fmt_expr = fmt_expr->unary.value;
        }
@@ -179,11 +185,12 @@ static void check_format_arguments(const call_argument_t *arg, unsigned idx_fmt,
                return;
        }
        /* find the real args */
-       for(; idx < idx_param; ++idx)
+       for(; idx < spec->arg_idx && arg != NULL; ++idx)
                arg = arg->next;
 
        const source_position_t *pos = &fmt_expr->base.source_position;
-       unsigned fmt = vchar.first(&vchar);
+       unsigned fmt     = vchar.first(&vchar);
+       unsigned num_fmt = 0;
        for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
                if (fmt != '%')
                        continue;
@@ -226,14 +233,14 @@ static void check_format_arguments(const call_argument_t *arg, unsigned idx_fmt,
 
                                        case ' ':
                                                if (fmt_flags & FMT_FLAG_PLUS) {
-                                                       warningf(pos, "' ' is overridden by prior '+' in conversion specification");
+                                                       warningf(pos, "' ' is overridden by prior '+' in conversion specification %u", num_fmt);
                                                }
                                                flag = FMT_FLAG_SPACE;
                                                break;
 
                                        case '+':
                                                if (fmt_flags & FMT_FLAG_SPACE) {
-                                                       warningf(pos, "'+' overrides prior ' ' in conversion specification");
+                                                       warningf(pos, "'+' overrides prior ' ' in conversion specification %u", num_fmt);
                                                }
                                                flag = FMT_FLAG_PLUS;
                                                break;
@@ -241,7 +248,7 @@ static void check_format_arguments(const call_argument_t *arg, unsigned idx_fmt,
                                        default: goto break_fmt_flags;
                                }
                                if (fmt_flags & flag) {
-                                       warningf(pos, "repeated flag '%c' in conversion specification", (char)fmt);
+                                       warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
                                }
                                fmt_flags |= flag;
                                fmt = vchar.next(&vchar);
@@ -252,12 +259,12 @@ break_fmt_flags:
                        if (fmt == '*') {
                                fmt = vchar.next(&vchar);
                                if (arg == NULL) {
-                                       warningf(pos, "missing argument for '*' field width in conversion specification");
+                                       warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
                                        return;
                                }
                                const type_t *const arg_type = arg->expression->base.type;
                                if (arg_type != type_int) {
-                                       warningf(pos, "argument for '*' field width in conversion specification is not an 'int', but an '%T'", arg_type);
+                                       warningf(pos, "argument for '*' field width in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
                                }
                                arg = arg->next;
                        } else {
@@ -273,12 +280,12 @@ break_fmt_flags:
                        if (fmt == '*') {
                                fmt = vchar.next(&vchar);
                                if (arg == NULL) {
-                                       warningf(pos, "missing argument for '*' precision in conversion specification");
+                                       warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
                                        return;
                                }
                                const type_t *const arg_type = arg->expression->base.type;
                                if (arg_type != type_int) {
-                                       warningf(pos, "argument for '*' precision in conversion specification is not an 'int', but an '%T'", arg_type);
+                                       warningf(pos, "argument for '*' precision in conversion specification %u is not an 'int', but an '%T'", num_fmt, arg_type);
                                }
                                arg = arg->next;
                        } else {
@@ -392,7 +399,6 @@ break_fmt_flags:
                        case 'x':
                                allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_HASH | FMT_FLAG_ZERO;
                                goto eval_fmt_mod_unsigned;
-                               break;
 
                        case 'u':
                                allowed_flags = FMT_FLAG_MINUS | FMT_FLAG_ZERO;
@@ -511,23 +517,27 @@ eval_fmt_mod_unsigned:
                                break;
 
                        default:
-                               warningf(pos, "encountered unknown conversion specifier '%%%C'", (wint_t)fmt);
+                               warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
+                               if (arg == NULL) {
+                                       warningf(pos, "too few arguments for format string");
+                                       return;
+                               }
                                goto next_arg;
                }
 
                format_flags_t wrong_flags = fmt_flags & ~allowed_flags;
                if (wrong_flags != 0) {
-                       char wrong[8];
-                       int idx = 0;
-                       if (wrong_flags & FMT_FLAG_HASH)  wrong[idx++] = '#';
-                       if (wrong_flags & FMT_FLAG_ZERO)  wrong[idx++] = '0';
-                       if (wrong_flags & FMT_FLAG_MINUS) wrong[idx++] = '-';
-                       if (wrong_flags & FMT_FLAG_SPACE) wrong[idx++] = ' ';
-                       if (wrong_flags & FMT_FLAG_PLUS)  wrong[idx++] = '+';
-                       if (wrong_flags & FMT_FLAG_TICK)  wrong[idx++] = '\'';
-                       wrong[idx] = '\0';
-
-                       warningf(pos, "invalid format flags \"%s\" in conversion specification %%%c", wrong, fmt);
+                       char  wrong[8];
+                       char *p = wrong;
+                       if (wrong_flags & FMT_FLAG_HASH)  *p++ = '#';
+                       if (wrong_flags & FMT_FLAG_ZERO)  *p++ = '0';
+                       if (wrong_flags & FMT_FLAG_MINUS) *p++ = '-';
+                       if (wrong_flags & FMT_FLAG_SPACE) *p++ = ' ';
+                       if (wrong_flags & FMT_FLAG_PLUS)  *p++ = '+';
+                       if (wrong_flags & FMT_FLAG_TICK)  *p++ = '\'';
+                       *p = '\0';
+
+                       warningf(pos, "invalid format flags \"%s\" in conversion specification %%%c at position %u", wrong, fmt, num_fmt);
                }
 
                if (arg == NULL) {
@@ -539,6 +549,13 @@ eval_fmt_mod_unsigned:
                        type_t *const arg_type           = arg->expression->base.type;
                        type_t *const arg_skip           = skip_typeref(arg_type);
                        type_t *const expected_type_skip = skip_typeref(expected_type);
+
+                       if (fmt == 'p') {
+                               /* allow any pointer type for %p, not just void */
+                               if (is_type_pointer(arg_skip))
+                                       goto next_arg;
+                       }
+
                        if (is_type_pointer(expected_type_skip)) {
                                if (is_type_pointer(arg_skip)) {
                                        type_t *const exp_to = skip_typeref(expected_type_skip->pointer.points_to);
@@ -555,15 +572,337 @@ eval_fmt_mod_unsigned:
                        }
                        if (is_type_valid(arg_skip)) {
                                warningf(pos,
-                                       "argument type '%T' does not match conversion specifier '%%%s%c'",
-                                       arg_type, get_length_modifier_name(fmt_mod), (char)fmt);
+                                       "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
+                                       arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
+                       }
+               }
+next_arg:
+               arg = arg->next;
+       }
+       if (!atend(&vchar)) {
+               warningf(pos, "format string contains '\\0'");
+       }
+       if (arg != NULL) {
+               unsigned num_args = num_fmt;
+               while (arg != NULL) {
+                       ++num_args;
+                       arg = arg->next;
+               }
+               warningf(pos, "%u argument%s but only %u format specifier%s",
+                       num_args, num_args != 1 ? "s" : "",
+                       num_fmt, num_fmt != 1 ? "s" : "");
+       }
+}
+
+/**
+ * Check scanf-style format.
+ */
+static void check_scanf_format(const call_argument_t *arg, const format_spec_t *spec)
+{
+       /* find format arg */
+       unsigned idx = 0;
+       for (; idx < spec->fmt_idx; ++idx) {
+               if (arg == NULL)
+                       return;
+               arg = arg->next;
+       }
+
+       const expression_t *fmt_expr = arg->expression;
+       if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
+               fmt_expr = fmt_expr->unary.value;
+       }
+
+       vchar_t vchar;
+       if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
+               vchar.string   = &fmt_expr->wide_string.value;
+               vchar.size     = fmt_expr->wide_string.value.size;
+               vchar.first    = wstring_first;
+               vchar.next     = wstring_next;
+               vchar.is_digit = wstring_isdigit;
+       } else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
+               vchar.string   = &fmt_expr->string.value;
+               vchar.size     = fmt_expr->string.value.size;
+               vchar.first    = string_first;
+               vchar.next     = string_next;
+               vchar.is_digit = string_isdigit;
+       } else {
+               return;
+       }
+       /* find the real args */
+       for (; idx < spec->arg_idx && arg != NULL; ++idx)
+               arg = arg->next;
+
+       const source_position_t *pos = &fmt_expr->base.source_position;
+       unsigned fmt     = vchar.first(&vchar);
+       unsigned num_fmt = 0;
+       for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
+               if (fmt != '%')
+                       continue;
+               fmt = vchar.next(&vchar);
+
+               if (fmt == '%')
+                       continue;
+
+               ++num_fmt;
+
+               /* length modifier */
+               format_length_modifier_t fmt_mod;
+               switch (fmt) {
+                       case 'h':
+                               fmt = vchar.next(&vchar);
+                               if (fmt == 'h') {
+                                       fmt = vchar.next(&vchar);
+                                       fmt_mod = FMT_MOD_hh;
+                               } else {
+                                       fmt_mod = FMT_MOD_h;
+                               }
+                               break;
+
+                       case 'l':
+                               fmt = vchar.next(&vchar);
+                               if (fmt == 'l') {
+                                       fmt = vchar.next(&vchar);
+                                       fmt_mod = FMT_MOD_ll;
+                               } else {
+                                       fmt_mod = FMT_MOD_l;
+                               }
+                               break;
+
+                       case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
+                       case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
+                       case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
+                       case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
+                       /* microsoft mode */
+                       case 'w':
+                               if (c_mode & _MS) {
+                                       fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
+                               } else {
+                                       fmt_mod = FMT_MOD_NONE;
+                               }
+                               break;
+                       case 'I':
+                               if (c_mode & _MS) {
+                                       fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
+                                       if (fmt == '3') {
+                                               fmt = vchar.next(&vchar);
+                                               if (fmt == '2') {
+                                                       fmt = vchar.next(&vchar);
+                                                       fmt_mod = FMT_MOD_I32;
+                                               } else {
+                                                       /* rewind */
+                                                       --vchar.position;
+                                               }
+                                       } else if (fmt == '6') {
+                                               fmt = vchar.next(&vchar);
+                                               if (fmt == '4') {
+                                                       fmt = vchar.next(&vchar);
+                                                       fmt_mod = FMT_MOD_I64;
+                                               } else {
+                                                       /* rewind */
+                                                       --vchar.position;
+                                               }
+                                       }
+                               } else {
+                                       fmt_mod = FMT_MOD_NONE;
+                               }
+                               break;
+                       default:
+                               fmt_mod = FMT_MOD_NONE;
+                               break;
+               }
+
+               if (fmt == '\0') {
+                       warningf(pos, "dangling %% in format string");
+                       break;
+               }
+
+               type_t            *expected_type;
+               switch (fmt) {
+                       case 'd':
+                       case 'i':
+                               switch (fmt_mod) {
+                                       case FMT_MOD_NONE: expected_type = type_int;         break;
+                                       case FMT_MOD_hh:   expected_type = type_signed_char; break;
+                                       case FMT_MOD_h:    expected_type = type_short;       break;
+                                       case FMT_MOD_l:    expected_type = type_long;        break;
+                                       case FMT_MOD_ll:   expected_type = type_long_long;   break;
+                                       case FMT_MOD_j:    expected_type = type_intmax_t;    break;
+                                       case FMT_MOD_z:    expected_type = type_ssize_t;     break;
+                                       case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
+                                       case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
+                                       case FMT_MOD_I32:  expected_type = type_int32;       break;
+                                       case FMT_MOD_I64:  expected_type = type_int64;       break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                                               goto next_arg;
+                               }
+                               break;
+
+                       case 'o':
+                       case 'X':
+                       case 'x':
+                               goto eval_fmt_mod_unsigned;
+
+                       case 'u':
+eval_fmt_mod_unsigned:
+                               switch (fmt_mod) {
+                                       case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
+                                       case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
+                                       case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
+                                       case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
+                                       case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
+                                       case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
+                                       case FMT_MOD_z:    expected_type = type_size_t;             break;
+                                       case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
+                                       case FMT_MOD_I:    expected_type = type_size_t;             break;
+                                       case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
+                                       case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                                               goto next_arg;
+                               }
+                               break;
+
+                       case 'A':
+                       case 'a':
+                       case 'E':
+                       case 'e':
+                       case 'F':
+                       case 'f':
+                       case 'G':
+                       case 'g':
+                               switch (fmt_mod) {
+                                       case FMT_MOD_l:    /* l modifier is ignored */
+                                       case FMT_MOD_NONE: expected_type = type_double;      break;
+                                       case FMT_MOD_L:    expected_type = type_long_double; break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                                               goto next_arg;
+                               }
+                               break;
+
+                       case 'C':
+                               if (fmt_mod != FMT_MOD_NONE) {
+                                       warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                                       goto next_arg;
+                               }
+                               expected_type = type_wchar_t;
+                               break;
+
+                       case 'c':
+                               expected_type = type_int;
+                               switch (fmt_mod) {
+                                       case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
+                                       case FMT_MOD_l:    expected_type = type_wint_t;  break;
+                                       case FMT_MOD_w:    expected_type = type_wchar_t; break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                                               goto next_arg;
+                               }
+                               break;
+
+                       case 'S':
+                               if (fmt_mod != FMT_MOD_NONE) {
+                                       warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                                       goto next_arg;
+                               }
+                               expected_type = type_wchar_t;
+                               break;
+
+                       case 's':
+                       case '[':
+                               switch (fmt_mod) {
+                                       case FMT_MOD_NONE: expected_type = type_char;    break;
+                                       case FMT_MOD_l:    expected_type = type_wchar_t; break;
+                                       case FMT_MOD_w:    expected_type = type_wchar_t; break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                                               goto next_arg;
+                               }
+                               break;
+
+                       case 'p':
+                               if (fmt_mod != FMT_MOD_NONE) {
+                                       warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                                       goto next_arg;
+                               }
+                               expected_type = type_void_ptr;
+                               break;
+
+                       case 'n':
+                               switch (fmt_mod) {
+                                       case FMT_MOD_NONE: expected_type = type_int;         break;
+                                       case FMT_MOD_hh:   expected_type = type_signed_char; break;
+                                       case FMT_MOD_h:    expected_type = type_short;       break;
+                                       case FMT_MOD_l:    expected_type = type_long;        break;
+                                       case FMT_MOD_ll:   expected_type = type_long_long;   break;
+                                       case FMT_MOD_j:    expected_type = type_intmax_t;    break;
+                                       case FMT_MOD_z:    expected_type = type_ssize_t;     break;
+                                       case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
+
+                                       default:
+                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                                               goto next_arg;
+                               }
+                               break;
+
+                       default:
+                               warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
+                               if (arg == NULL) {
+                                       warningf(pos, "too few arguments for format string");
+                                       return;
+                               }
+                               goto next_arg;
+               }
+
+               if (arg == NULL) {
+                       warningf(pos, "too few arguments for format string");
+                       return;
+               }
+
+               {       /* create a scope here to prevent warning about the jump to next_arg */
+                       type_t *const arg_type           = arg->expression->base.type;
+                       type_t *const arg_skip           = skip_typeref(arg_type);
+                       type_t *const expected_type_skip = skip_typeref(expected_type);
+
+                       if (! is_type_pointer(arg_skip))
+                               goto error_arg_type;
+                       type_t *const ptr_skip = skip_typeref(arg_skip->pointer.points_to);
+
+                       if (fmt == 'p') {
+                               /* allow any pointer type for %p, not just void */
+                               if (is_type_pointer(ptr_skip))
+                                       goto next_arg;
+                       }
+
+                       /* do NOT allow const or restrict, all other should be ok */
+                       if (ptr_skip->base.qualifiers & (TYPE_QUALIFIER_CONST | TYPE_QUALIFIER_VOLATILE))
+                               goto error_arg_type;
+                       type_t *const unqual_ptr = get_unqualified_type(ptr_skip);
+                       if (unqual_ptr == expected_type_skip) {
+                               goto next_arg;
+                       } else if (expected_type_skip == type_char) {
+                               /* char matches with unsigned char AND signed char */
+                               if (unqual_ptr == type_signed_char || unqual_ptr == type_unsigned_char)
+                                       goto next_arg;
+                       }
+error_arg_type:
+                       if (is_type_valid(arg_skip)) {
+                               warningf(pos,
+                                       "argument type '%T' does not match conversion specifier '%%%s%c' at position %u",
+                                       arg_type, get_length_modifier_name(fmt_mod), (char)fmt, num_fmt);
                        }
                }
 next_arg:
                arg = arg->next;
        }
        if (!atend(&vchar)) {
-               warningf(pos, "format string contains NUL");
+               warningf(pos, "format string contains '\\0'");
        }
        if (arg != NULL) {
                unsigned num_args = num_fmt;
@@ -571,18 +910,13 @@ next_arg:
                        ++num_args;
                        arg = arg->next;
                }
-               warningf(pos, "%u argument%s but only %u format string%s",
+               warningf(pos, "%u argument%s but only %u format specifier%s",
                        num_args, num_args != 1 ? "s" : "",
                        num_fmt, num_fmt != 1 ? "s" : "");
        }
 }
 
-static const struct {
-       const char    *name;
-       format_kind_t  fmt_kind;
-       unsigned       fmt_idx;
-       unsigned       arg_idx;
-} builtin_table[] = {
+static const format_spec_t builtin_table[] = {
        { "printf",        FORMAT_PRINTF,   0, 1 },
        { "wprintf",       FORMAT_PRINTF,   0, 1 },
        { "sprintf",       FORMAT_PRINTF,   1, 2 },
@@ -640,23 +974,30 @@ void check_format(const call_expression_t *const call)
        if (func_expr->kind != EXPR_REFERENCE)
                return;
 
-       const declaration_t   *const decl = func_expr->reference.declaration;
-       const call_argument_t *      arg  = call->arguments;
+       const entity_t        *const entity = func_expr->reference.entity;
+       const call_argument_t *      arg    = call->arguments;
 
-       if(false) {
+       if (false) {
                /* the declaration has a GNU format attribute, check it */
        } else {
                /*
                 * For some functions we always check the format, even if it was not specified.
                 * This allows to check format even in MS mode or without header included.
                 */
-               const char            *const name = decl->symbol->string;
-               for(size_t i = 0; i < sizeof(builtin_table) / sizeof(builtin_table[0]); ++i) {
-                       if(strcmp(name, builtin_table[i].name) == 0) {
-                               if(builtin_table[i].fmt_kind == FORMAT_PRINTF) {
-                                       check_format_arguments(arg,
-                                                              builtin_table[i].fmt_idx,
-                                                              builtin_table[i].arg_idx);
+               const char *const name = entity->base.symbol->string;
+               for (size_t i = 0; i < sizeof(builtin_table) / sizeof(builtin_table[0]); ++i) {
+                       if (strcmp(name, builtin_table[i].name) == 0) {
+                               switch (builtin_table[i].fmt_kind) {
+                               case FORMAT_PRINTF:
+                                       check_printf_format(arg, &builtin_table[i]);
+                                       break;
+                               case FORMAT_SCANF:
+                                       check_scanf_format(arg, &builtin_table[i]);
+                                       break;
+                               case FORMAT_STRFTIME:
+                               case FORMAT_STRFMON:
+                                       /* TODO: implement other cases */
+                                       break;
                                }
                                break;
                        }