Improve error recovery in parse_local_label_declaration().

[cparser] / lexer.c
diff --git a/lexer.c b/lexer.c

index e29ec4a..d9bd2c4 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -19,6 +19,7 @@
   */
  #include <config.h>
  
+#include "adt/strutil.h"
  #include "input.h"
  #include "diagnostic.h"
  #include "lexer.h"
@@ -30,7 +31,6 @@
  #include "adt/util.h"
  #include "types.h"
  #include "type_t.h"
-#include "target_architecture.h"
  #include "parser.h"
  #include "warning.h"
  #include "lang_features.h"
@@ -57,7 +57,6 @@ static source_position_t  lexer_pos;
  token_t                   lexer_token;
  static symbol_t          *symbol_L;
  static strset_t           stringset;
-static char              *encoding;
  bool                      allow_dollar_in_symbol = true;
  
  /**
@@ -277,8 +276,8 @@ end_symbol:
         char     *string = obstack_finish(&symbol_obstack);
         symbol_t *symbol = symbol_table_insert(string);
  
-       lexer_token.kind              = symbol->ID;
-       lexer_token.identifier.symbol = symbol;
+       lexer_token.kind        = symbol->ID;
+       lexer_token.base.symbol = symbol;
  
         if (symbol->string != string) {
                 obstack_free(&symbol_obstack, string);
@@ -324,12 +323,29 @@ finish_suffix:
         }
  
         obstack_1grow(&symbol_obstack, '\0');
-       size_t    size   = obstack_object_size(&symbol_obstack);
-       char     *string = obstack_finish(&symbol_obstack);
+       size_t size   = obstack_object_size(&symbol_obstack) - 1;
+       char  *string = obstack_finish(&symbol_obstack);
  
         lexer_token.number.suffix = identify_string(string, size);
  }
  
+static void parse_exponent(void)
+{
+       if (c == '-' || c == '+') {
+               obstack_1grow(&symbol_obstack, (char)c);
+               next_char();
+       }
+
+       if (isdigit(c)) {
+               do {
+                       obstack_1grow(&symbol_obstack, (char)c);
+                       next_char();
+               } while (isdigit(c));
+       } else {
+               errorf(&lexer_token.base.source_position, "exponent has no digits");
+       }
+}
+
  /**
   * Parses a hex number including hex floats and set the
   * lexer_token.
@@ -339,7 +355,6 @@ static void parse_number_hex(void)
         bool is_float   = false;
         bool has_digits = false;
  
-       assert(obstack_object_size(&symbol_obstack) == 0);
         while (isxdigit(c)) {
                 has_digits = true;
                 obstack_1grow(&symbol_obstack, (char) c);
@@ -361,16 +376,7 @@ static void parse_number_hex(void)
                 is_float = true;
                 obstack_1grow(&symbol_obstack, (char) c);
                 next_char();
-
-               if (c == '-' || c == '+') {
-                       obstack_1grow(&symbol_obstack, (char) c);
-                       next_char();
-               }
-
-               while (isxdigit(c)) {
-                       obstack_1grow(&symbol_obstack, (char) c);
-                       next_char();
-               }
+               parse_exponent();
         } else if (is_float) {
                 errorf(&lexer_token.base.source_position,
                        "hexadecimal floatingpoint constant requires an exponent");
@@ -381,12 +387,35 @@ static void parse_number_hex(void)
         char   *string = obstack_finish(&symbol_obstack);
         lexer_token.number.number = identify_string(string, size);
  
-       lexer_token.kind    =
-               is_float ? T_FLOATINGPOINT_HEXADECIMAL : T_INTEGER_HEXADECIMAL;
+       lexer_token.kind = is_float ? T_FLOATINGPOINT : T_INTEGER;
  
         if (!has_digits) {
-               errorf(&lexer_token.base.source_position,
-                      "invalid number literal '0x%S'", &lexer_token.number.number);
+               errorf(&lexer_token.base.source_position, "invalid number literal '%S'", &lexer_token.number.number);
+               lexer_token.number.number.begin = "0";
+               lexer_token.number.number.size  = 1;
+       }
+
+       parse_number_suffix();
+}
+
+static void parse_number_bin(void)
+{
+       bool has_digits = false;
+
+       while (c == '0' || c == '1') {
+               has_digits = true;
+               obstack_1grow(&symbol_obstack, (char)c);
+               next_char();
+       }
+       obstack_1grow(&symbol_obstack, '\0');
+
+       size_t  const size   = obstack_object_size(&symbol_obstack) - 1;
+       char   *const string = obstack_finish(&symbol_obstack);
+       lexer_token.number.number = identify_string(string, size);
+       lexer_token.kind          = T_INTEGER;
+
+       if (!has_digits) {
+               errorf(&lexer_token.base.source_position, "invalid number literal '%S'", &lexer_token.number.number);
                 lexer_token.number.number.begin = "0";
                 lexer_token.number.number.size  = 1;
         }
@@ -414,15 +443,21 @@ static void parse_number(void)
  
         assert(obstack_object_size(&symbol_obstack) == 0);
         if (c == '0') {
+               obstack_1grow(&symbol_obstack, (char)c);
                 next_char();
                 if (c == 'x' || c == 'X') {
+                       obstack_1grow(&symbol_obstack, (char)c);
                         next_char();
                         parse_number_hex();
                         return;
-               } else {
-                       has_digits = true;
+               } else if (c == 'b' || c == 'B') {
+                       /* GCC extension: binary constant 0x[bB][01]+.  */
+                       obstack_1grow(&symbol_obstack, (char)c);
+                       next_char();
+                       parse_number_bin();
+                       return;
                 }
-               obstack_1grow(&symbol_obstack, '0');
+               has_digits = true;
         }
  
         while (isdigit(c)) {
@@ -446,16 +481,7 @@ static void parse_number(void)
                 is_float = true;
                 obstack_1grow(&symbol_obstack, 'e');
                 next_char();
-
-               if (c == '-' || c == '+') {
-                       obstack_1grow(&symbol_obstack, (char) c);
-                       next_char();
-               }
-
-               while (isdigit(c)) {
-                       obstack_1grow(&symbol_obstack, (char) c);
-                       next_char();
-               }
+               parse_exponent();
         }
  
         obstack_1grow(&symbol_obstack, '\0');
@@ -463,21 +489,19 @@ static void parse_number(void)
         char   *string = obstack_finish(&symbol_obstack);
         lexer_token.number.number = identify_string(string, size);
  
-       /* is it an octal number? */
         if (is_float) {
                 lexer_token.kind = T_FLOATINGPOINT;
-       } else if (string[0] == '0') {
-               lexer_token.kind = T_INTEGER_OCTAL;
-
-               /* check for invalid octal digits */
-               for (size_t i= 0; i < size; ++i) {
-                       char t = string[i];
-                       if (t >= '8')
-                               errorf(&lexer_token.base.source_position,
-                                      "invalid digit '%c' in octal number", t);
-               }
         } else {
                 lexer_token.kind = T_INTEGER;
+
+               if (string[0] == '0') {
+                       /* check for invalid octal digits */
+                       for (size_t i= 0; i < size; ++i) {
+                               char t = string[i];
+                               if (t >= '8')
+                                       errorf(&lexer_token.base.source_position, "invalid digit '%c' in octal number", t);
+                       }
+               }
         }
  
         if (!has_digits) {
@@ -632,26 +656,6 @@ string_t make_string(const char *string)
         return identify_string(space, len);
  }
  
-static void grow_symbol(utf32 const tc)
-{
-       struct obstack *const o  = &symbol_obstack;
-       if (tc < 0x80U) {
-               obstack_1grow(o, tc);
-       } else if (tc < 0x800) {
-               obstack_1grow(o, 0xC0 | (tc >> 6));
-               obstack_1grow(o, 0x80 | (tc & 0x3F));
-       } else if (tc < 0x10000) {
-               obstack_1grow(o, 0xE0 | ( tc >> 12));
-               obstack_1grow(o, 0x80 | ((tc >>  6) & 0x3F));
-               obstack_1grow(o, 0x80 | ( tc        & 0x3F));
-       } else {
-               obstack_1grow(o, 0xF0 | ( tc >> 18));
-               obstack_1grow(o, 0x80 | ((tc >> 12) & 0x3F));
-               obstack_1grow(o, 0x80 | ((tc >>  6) & 0x3F));
-               obstack_1grow(o, 0x80 | ( tc        & 0x3F));
-       }
-}
-
  /**
   * Parse a string literal and set lexer_token.
   */
@@ -670,18 +674,16 @@ static void parse_string_literal(void)
                         break;
                 }
  
-               case EOF: {
+               case EOF:
                         errorf(&lexer_token.base.source_position, "string has no end");
-                       lexer_token.kind = T_ERROR;
-                       return;
-               }
+                       goto end_of_string;
  
                 case '"':
                         next_char();
                         goto end_of_string;
  
                 default:
-                       grow_symbol(c);
+                       obstack_grow_symbol(&symbol_obstack, c);
                         next_char();
                         break;
                 }
@@ -711,7 +713,7 @@ static void parse_wide_character_constant(void)
                 switch (c) {
                 case '\\': {
                         const utf32 tc = parse_escape_sequence();
-                       grow_symbol(tc);
+                       obstack_grow_symbol(&symbol_obstack, tc);
                         break;
                 }
  
@@ -724,15 +726,12 @@ static void parse_wide_character_constant(void)
                         next_char();
                         goto end_of_wide_char_constant;
  
-               case EOF: {
-                       errorf(&lexer_token.base.source_position,
-                              "EOF while parsing character constant");
-                       lexer_token.kind = T_ERROR;
-                       return;
-               }
+               case EOF:
+                       errorf(&lexer_token.base.source_position, "EOF while parsing character constant");
+                       goto end_of_wide_char_constant;
  
                 default:
-                       grow_symbol(c);
+                       obstack_grow_symbol(&symbol_obstack, c);
                         next_char();
                         break;
                 }
@@ -788,15 +787,12 @@ static void parse_character_constant(void)
                         next_char();
                         goto end_of_char_constant;
  
-               case EOF: {
-                       errorf(&lexer_token.base.source_position,
-                              "EOF while parsing character constant");
-                       lexer_token.kind = T_ERROR;
-                       return;
-               }
+               case EOF:
+                       errorf(&lexer_token.base.source_position, "EOF while parsing character constant");
+                       goto end_of_char_constant;
  
                 default:
-                       grow_symbol(c);
+                       obstack_grow_symbol(&symbol_obstack, c);
                         next_char();
                         break;
  
@@ -904,37 +900,6 @@ static void eat_until_newline(void)
         }
  }
  
-/**
- * Handle the define directive.
- */
-static void define_directive(void)
-{
-       lexer_next_preprocessing_token();
-       if (lexer_token.kind != T_IDENTIFIER) {
-               parse_error("expected identifier after #define\n");
-               eat_until_newline();
-       }
-}
-
-/**
- * Handle the ifdef directive.
- */
-static void ifdef_directive(int is_ifndef)
-{
-       (void) is_ifndef;
-       lexer_next_preprocessing_token();
-       //expect_identifier();
-       //extect_newline();
-}
-
-/**
- * Handle the endif directive.
- */
-static void endif_directive(void)
-{
-       //expect_newline();
-}
-
  /**
   * Parse the line directive.
   */
@@ -949,7 +914,24 @@ static void parse_line_directive(void)
         }
         if (pp_token.kind == T_STRING_LITERAL) {
                 lexer_pos.input_name = pp_token.string.string.begin;
+               lexer_pos.is_system_header = false;
                 next_pp_token();
+
+               /* attempt to parse numeric flags as outputted by gcc preprocessor */
+               while (pp_token.kind == T_INTEGER) {
+                       /* flags:
+                        * 1 - indicates start of a new file
+                        * 2 - indicates return from a file
+                        * 3 - indicates system header
+                        * 4 - indicates implicit extern "C" in C++ mode
+                        *
+                        * currently we're only interested in "3"
+                        */
+                       if (streq(pp_token.number.number.begin, "3")) {
+                               lexer_pos.is_system_header = true;
+                       }
+                       next_pp_token();
+               }
         }
  
         eat_until_newline();
@@ -980,8 +962,6 @@ typedef enum stdc_pragma_value_kind_t {
   */
  static void parse_pragma(void)
  {
-       bool unknown_pragma = true;
-
         next_pp_token();
         if (pp_token.kind != T_IDENTIFIER) {
                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
@@ -990,55 +970,34 @@ static void parse_pragma(void)
                 return;
         }
  
-       symbol_t *symbol = pp_token.identifier.symbol;
-       if (symbol->pp_ID == TP_STDC) {
-               stdc_pragma_kind_t kind = STDC_UNKNOWN;
+       stdc_pragma_kind_t kind = STDC_UNKNOWN;
+       if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
                 /* a STDC pragma */
-               if (c_mode & _C99) {
-                       next_pp_token();
+               next_pp_token();
  
-                       switch (pp_token.identifier.symbol->pp_ID) {
-                       case TP_FP_CONTRACT:
-                               kind = STDC_FP_CONTRACT;
-                               break;
-                       case TP_FENV_ACCESS:
-                               kind = STDC_FENV_ACCESS;
-                               break;
-                       case TP_CX_LIMITED_RANGE:
-                               kind = STDC_CX_LIMITED_RANGE;
-                               break;
-                       default:
-                               break;
+               switch (pp_token.base.symbol->pp_ID) {
+               case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
+               case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
+               case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
+               default:                  break;
+               }
+               if (kind != STDC_UNKNOWN) {
+                       next_pp_token();
+                       stdc_pragma_value_kind_t value;
+                       switch (pp_token.base.symbol->pp_ID) {
+                       case TP_ON:      value = STDC_VALUE_ON;      break;
+                       case TP_OFF:     value = STDC_VALUE_OFF;     break;
+                       case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
+                       default:         value = STDC_VALUE_UNKNOWN; break;
                         }
-                       if (kind != STDC_UNKNOWN) {
-                               stdc_pragma_value_kind_t value = STDC_VALUE_UNKNOWN;
-                               next_pp_token();
-                               switch (pp_token.identifier.symbol->pp_ID) {
-                               case TP_ON:
-                                       value = STDC_VALUE_ON;
-                                       break;
-                               case TP_OFF:
-                                       value = STDC_VALUE_OFF;
-                                       break;
-                               case TP_DEFAULT:
-                                       value = STDC_VALUE_DEFAULT;
-                                       break;
-                               default:
-                                       break;
-                               }
-                               if (value != STDC_VALUE_UNKNOWN) {
-                                       unknown_pragma = false;
-                               } else {
-                                       errorf(&pp_token.base.source_position,
-                                              "bad STDC pragma argument");
-                               }
+                       if (value == STDC_VALUE_UNKNOWN) {
+                               kind = STDC_UNKNOWN;
+                               errorf(&pp_token.base.source_position, "bad STDC pragma argument");
                         }
                 }
-       } else {
-               unknown_pragma = true;
         }
         eat_until_newline();
-       if (unknown_pragma) {
+       if (kind == STDC_UNKNOWN) {
                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
                          "encountered unknown #pragma");
         }
@@ -1050,39 +1009,18 @@ static void parse_pragma(void)
  static void parse_preprocessor_identifier(void)
  {
         assert(pp_token.kind == T_IDENTIFIER);
-       symbol_t *symbol = pp_token.identifier.symbol;
-
-       switch (symbol->pp_ID) {
-       case TP_include:
-               printf("include - enable header name parsing!\n");
-               break;
-       case TP_define:
-               define_directive();
-               break;
-       case TP_ifdef:
-               ifdef_directive(0);
-               break;
-       case TP_ifndef:
-               ifdef_directive(1);
-               break;
-       case TP_endif:
-               endif_directive();
-               break;
+       switch (pp_token.base.symbol->pp_ID) {
         case TP_line:
                 next_pp_token();
                 parse_line_directive();
                 break;
-       case TP_if:
-       case TP_else:
-       case TP_elif:
-       case TP_undef:
-       case TP_error:
-               /* TODO; output the rest of the line */
-               parse_error("#error directive: ");
-               break;
         case TP_pragma:
                 parse_pragma();
                 break;
+       case TP_error:
+               /* TODO; output the rest of the line */
+               parse_error("#error directive");
+               break;
         }
  }
  
@@ -1147,6 +1085,7 @@ void lexer_next_preprocessing_token(void)
  {
         while (true) {
                 lexer_token.base.source_position = lexer_pos;
+               lexer_token.base.symbol          = NULL;
  
                 switch (c) {
                 case ' ':
@@ -1162,7 +1101,7 @@ void lexer_next_preprocessing_token(void)
                 SYMBOL_CHARS
                         parse_symbol();
                         /* might be a wide string ( L"string" ) */
-                       if (lexer_token.identifier.symbol == symbol_L) {
+                       if (lexer_token.base.symbol == symbol_L) {
                                 switch (c) {
                                         case '"':  parse_wide_string_literal();     break;
                                         case '\'': parse_wide_character_constant(); break;
@@ -1317,8 +1256,7 @@ void lexer_next_preprocessing_token(void)
  dollar_sign:
                         errorf(&lexer_pos, "unknown character '%c' found", c);
                         next_char();
-                       lexer_token.kind = T_ERROR;
-                       return;
+                       break;
                 }
         }
  }
@@ -1352,26 +1290,14 @@ static void input_error(unsigned delta_lines, unsigned delta_cols,
         errorf(&lexer_pos, "%s", message);
  }
  
-void select_input_encoding(char const* new_encoding)
-{
-       if (encoding != NULL)
-               xfree(encoding);
-       encoding = xstrdup(new_encoding);
-}
-
-void lexer_open_stream(FILE *stream, const char *input_name)
+void lexer_switch_input(input_t *new_input, const char *input_name)
  {
-       if (input != NULL) {
-               input_free(input);
-               input = NULL;
-       }
-
         lexer_pos.lineno     = 0;
         lexer_pos.colno      = 0;
         lexer_pos.input_name = input_name;
  
         set_input_error_callback(input_error);
-       input  = input_from_stream(stream, encoding);
+       input  = new_input;
         bufpos = NULL;
         bufend = NULL;
  
@@ -1382,10 +1308,6 @@ void lexer_open_stream(FILE *stream, const char *input_name)
  
  void exit_lexer(void)
  {
-       if (input != NULL) {
-               input_free(input);
-               input = NULL;
-       }
         strset_destroy(&stringset);
  }
  
@@ -1393,6 +1315,6 @@ static __attribute__((unused))
  void dbg_pos(const source_position_t source_position)
  {
         fprintf(stdout, "%s:%u:%u\n", source_position.input_name,
-               source_position.lineno, source_position.colno);
+               source_position.lineno, (unsigned)source_position.colno);
         fflush(stdout);
  }