fix cases where compoundlits are constant/get an entity
[cparser] / preprocessor.c
index 6e3daf3..74b8277 100644 (file)
@@ -136,6 +136,11 @@ static symbol_t *symbol_percentcolon;
 static symbol_t *symbol_percentcolonpercentcolon;
 static symbol_t *symbol_percentgreater;
 
+static symbol_t *symbol_L;
+static symbol_t *symbol_U;
+static symbol_t *symbol_u;
+static symbol_t *symbol_u8;
+
 static void init_symbols(void)
 {
        symbol_colongreater             = symbol_table_insert(":>");
@@ -144,6 +149,11 @@ static void init_symbols(void)
        symbol_percentcolon             = symbol_table_insert("%:");
        symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
        symbol_percentgreater           = symbol_table_insert("%>");
+
+       symbol_L  = symbol_table_insert("L");
+       symbol_U  = symbol_table_insert("U");
+       symbol_u  = symbol_table_insert("u");
+       symbol_u8 = symbol_table_insert("u8");
 }
 
 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
@@ -684,6 +694,18 @@ string_t make_string(char const *const string)
        return sym_make_string(STRING_ENCODING_CHAR);
 }
 
+static utf32 get_string_encoding_limit(string_encoding_t const enc)
+{
+       switch (enc) {
+       case STRING_ENCODING_CHAR:   return 0xFF;
+       case STRING_ENCODING_CHAR16: return 0xFFFF;
+       case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
+       case STRING_ENCODING_UTF8:   return 0xFFFFFFFF;
+       case STRING_ENCODING_WIDE:   return 0xFFFFFFFF; // FIXME depends on settings
+       }
+       panic("invalid string encoding");
+}
+
 static void parse_string(utf32 const delimiter, token_kind_t const kind,
                          string_encoding_t const enc,
                          char const *const context)
@@ -692,15 +714,16 @@ static void parse_string(utf32 const delimiter, token_kind_t const kind,
 
        eat(delimiter);
 
+       utf32 const limit = get_string_encoding_limit(enc);
        while (true) {
                switch (input.c) {
                case '\\': {
                        if (resolve_escape_sequences) {
                                utf32 const tc = parse_escape_sequence();
+                               if (tc > limit) {
+                                       warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
+                               }
                                if (enc == STRING_ENCODING_CHAR) {
-                                       if (tc >= 0x100) {
-                                               warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
-                                       }
                                        obstack_1grow(&symbol_obstack, tc);
                                } else {
                                        obstack_grow_utf8(&symbol_obstack, tc);
@@ -1133,6 +1156,17 @@ static inline void eat_token(token_kind_t const kind)
        next_input_token();
 }
 
+static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
+{
+       if (sym == symbol_L) return STRING_ENCODING_WIDE;
+       if (c_mode & _C11) {
+               if (sym == symbol_U)  return STRING_ENCODING_CHAR32;
+               if (sym == symbol_u)  return STRING_ENCODING_CHAR16;
+               if (sym == symbol_u8) return STRING_ENCODING_UTF8;
+       }
+       return STRING_ENCODING_CHAR;
+}
+
 static void parse_symbol(void)
 {
        assert(obstack_object_size(&symbol_obstack) == 0);
@@ -1190,19 +1224,26 @@ end_symbol:
        obstack_1grow(&symbol_obstack, '\0');
        char *string = obstack_finish(&symbol_obstack);
 
-       /* might be a wide string or character constant ( L"string"/L'c' ) */
-       if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
-               obstack_free(&symbol_obstack, string);
-               parse_string_literal(STRING_ENCODING_WIDE);
-               return;
-       } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
-               obstack_free(&symbol_obstack, string);
-               parse_character_constant(STRING_ENCODING_WIDE);
-               return;
-       }
-
        symbol_t *symbol = symbol_table_insert(string);
 
+       /* Might be a prefixed string or character constant: L/U/u/u8"string". */
+       if (input.c == '"') {
+               string_encoding_t const enc = identify_encoding_prefix(symbol);
+               if (enc != STRING_ENCODING_CHAR) {
+                       parse_string_literal(enc);
+                       return;
+               }
+       } else if (input.c == '\'') {
+               string_encoding_t const enc = identify_encoding_prefix(symbol);
+               if (enc != STRING_ENCODING_CHAR) {
+                       if (enc == STRING_ENCODING_UTF8) {
+                               errorf(&pp_token.base.source_position, "'u8' is not a valid encoding for a chracter constant");
+                       }
+                       parse_character_constant(enc);
+                       return;
+               }
+       }
+
        pp_token.kind        = symbol->ID;
        pp_token.base.symbol = symbol;
 
@@ -1491,8 +1532,7 @@ digraph_percentcolon:
        default:
 dollar_sign:
                if (error_on_unknown_chars) {
-                       errorf(&pp_token.base.source_position,
-                              "unknown character '%lc' found\n", input.c);
+                       errorf(&pp_token.base.source_position, "unknown character '%lc' found", input.c);
                        next_char();
                        goto restart;
                } else {