Initial import of c parser
[cparser] / parser.c
1 #include <config.h>
2
3 #include <assert.h>
4
5 #include "lexer_t.h"
6 #include "token_t.h"
7 #include "type_t.h"
8 #include "ast_t.h"
9 #include "adt/bitfiddle.h"
10
11 #define PRINT_TOKENS
12
13 static lexer_t lexer;
14 static token_t token;
15
16 static inline
17 void next_token()
18 {
19         lexer_next_token(&lexer, &token);
20
21 #ifdef PRINT_TOKENS
22         print_token(stderr, &token);
23         fprintf(stderr, "\n");
24 #endif
25 }
26
27 static inline
28 void eat(token_type_t type)
29 {
30         assert(token.type == type);
31         next_token();
32 }
33
34 void parser_print_error_prefix()
35 {
36     fputs(lexer.source_position.input_name, stderr);
37     fputc(':', stderr);
38     fprintf(stderr, "%d", lexer.source_position.linenr);
39     fputs(": error: ", stderr);
40 }
41
42 static
43 void parse_error(const char *message)
44 {
45         parser_print_error_prefix();
46         fprintf(stderr, "parse error: %s\n", message);
47 }
48
49 #define expect(expected) \
50     if(UNLIKELY(token.type != (expected))) { \
51         /*parse_error_expected(NULL, (expected), 0);*/ \
52         /*eat_until_semi();*/ \
53         return NULL; \
54     } \
55     next_token();
56
57 typedef enum {
58         SPECIFIER_SIGNED    = 1 << 0,
59         SPECIFIER_UNSIGNED  = 1 << 1,
60         SPECIFIER_LONG      = 1 << 2,
61         SPECIFIER_INT       = 1 << 3,
62         SPECIFIER_DOUBLE    = 1 << 4,
63         SPECIFIER_CHAR      = 1 << 5,
64         SPECIFIER_SHORT     = 1 << 6,
65         SPECIFIER_LONG_LONG = 1 << 7,
66         SPECIFIER_FLOAT     = 1 << 8,
67         SPECIFIER_BOOL      = 1 << 9,
68         SPECIFIER_VOID      = 1 << 10,
69 #ifdef PROVIDE_COMPLEX
70         SPECIFIER_COMPLEX   = 1 << 11,
71 #endif
72 #ifdef PROVIDE_IMAGINARY
73         SPECIFIER_IMAGINARY = 1 << 12,
74 #endif
75 } specifiers_t;
76
77 typedef enum {
78         TYPE_QUALIFIER_CONST    = 1 << 0,
79         TYPE_QUALIFIER_RESTRICT = 1 << 1,
80         TYPE_QUALIFIER_VOLATILE = 1 << 2,
81         TYPE_QUALIFIER_INLINE   = 1 << 3,
82 } type_qualifier_t;
83
84 typedef enum {
85         STORAGE_CLASS_NONE,
86         STORAGE_CLASS_TYPEDEF,
87         STORAGE_CLASS_EXTERN,
88         STORAGE_CLASS_STATIC,
89         STORAGE_CLASS_AUTO,
90         STORAGE_CLASS_REGISTER
91 } storage_class_t;
92
93 typedef struct declaration_specifiers_t  declaration_specifiers_t;
94 struct declaration_specifiers_t {
95         storage_class_t  storage_class;
96         int              type_qualifiers;
97 };
98
99 static
100 void parse_declaration_specifiers(declaration_specifiers_t *specifiers)
101 {
102         type_type_t        type_type       = TYPE_INVALID;
103         atomic_type_type_t atomic_type     = ATOMIC_TYPE_INVALID;
104         unsigned           type_specifiers = 0;
105
106         while(1) {
107                 switch(token.type) {
108
109                 /* storage class */
110 #define MATCH_STORAGE_CLASS(token, class)                                \
111                 case token:                                                      \
112                         if(specifiers->storage_class != STORAGE_CLASS_NONE) {        \
113                                 parse_error("multiple storage classes in declaration "   \
114                                             "specifiers");                               \
115                         }                                                            \
116                         specifiers->storage_class = class;                           \
117                         next_token();                                                \
118                         break;
119
120                 MATCH_STORAGE_CLASS(T_typedef,  STORAGE_CLASS_TYPEDEF)
121                 MATCH_STORAGE_CLASS(T_extern,   STORAGE_CLASS_EXTERN)
122                 MATCH_STORAGE_CLASS(T_static,   STORAGE_CLASS_STATIC)
123                 MATCH_STORAGE_CLASS(T_auto,     STORAGE_CLASS_AUTO)
124                 MATCH_STORAGE_CLASS(T_register, STORAGE_CLASS_REGISTER)
125
126                 /* type qualifiers */
127 #define MATCH_TYPE_QUALIFIER(token, qualifier)                          \
128                 case token:                                                     \
129                         specifiers->type_qualifiers |= qualifier;                   \
130                         next_token();                                               \
131                         break;
132
133                 MATCH_TYPE_QUALIFIER(T_const,    TYPE_QUALIFIER_CONST);
134                 MATCH_TYPE_QUALIFIER(T_restrict, TYPE_QUALIFIER_RESTRICT);
135                 MATCH_TYPE_QUALIFIER(T_volatile, TYPE_QUALIFIER_VOLATILE);
136                 MATCH_TYPE_QUALIFIER(T_inline,   TYPE_QUALIFIER_INLINE);
137
138                 /* type specifiers */
139 #define MATCH_SPECIFIER(token, specifier, name)                         \
140                 case token:                                                     \
141                         next_token();                                               \
142                         if(type_specifiers & specifier) {                           \
143                                 parse_error("multiple " name " type specifiers given"); \
144                         } else {                                                    \
145                                 type_specifiers |= specifier;                           \
146                         }                                                           \
147                         break;
148
149                 MATCH_SPECIFIER(T_void,       SPECIFIER_VOID,      "void")
150                 MATCH_SPECIFIER(T_char,       SPECIFIER_CHAR,      "char")
151                 MATCH_SPECIFIER(T_short,      SPECIFIER_SHORT,     "short")
152                 MATCH_SPECIFIER(T_int,        SPECIFIER_INT,       "int")
153                 MATCH_SPECIFIER(T_float,      SPECIFIER_FLOAT,     "float")
154                 MATCH_SPECIFIER(T_double,     SPECIFIER_DOUBLE,    "double")
155                 MATCH_SPECIFIER(T_signed,     SPECIFIER_SIGNED,    "signed")
156                 MATCH_SPECIFIER(T_unsigned,   SPECIFIER_UNSIGNED,  "unsigned")
157                 MATCH_SPECIFIER(T__Bool,      SPECIFIER_BOOL,      "_Bool")
158 #ifdef PROVIDE_COMPLEX
159                 MATCH_SPECIFIER(T__Complex,   SPECIFIER_COMPLEX,   "_Complex")
160 #endif
161 #ifdef PROVIDE_IMAGINARY
162                 MATCH_SPECIFIER(T__Imaginary, SPECIFIER_IMAGINARY, "_Imaginary")
163 #endif
164                 case T_long:
165                         next_token();
166                         if(type_specifiers & SPECIFIER_LONG_LONG) {
167                                 parse_error("too many long type specifiers given");
168                         } else if(type_specifiers & SPECIFIER_LONG) {
169                                 type_specifiers |= SPECIFIER_LONG_LONG;
170                         } else {
171                                 type_specifiers |= SPECIFIER_LONG;
172                         }
173                         break;
174
175                 /* struct or union specifier */
176                 /* enum specifier */
177                 /* typedef name */
178
179                 /* function specifier */
180                 default:
181                         return;;
182                 }
183         }
184
185         if(type_type == TYPE_INVALID) {
186                 /* match valid basic types */
187                 switch(type_specifiers) {
188                 case SPECIFIER_VOID:
189                         atomic_type = ATOMIC_TYPE_VOID;
190                         break;
191                 case SPECIFIER_CHAR:
192                         atomic_type = ATOMIC_TYPE_CHAR;
193                         break;
194                 case SPECIFIER_SIGNED | SPECIFIER_CHAR:
195                         atomic_type = ATOMIC_TYPE_SCHAR;
196                         break;
197                 case SPECIFIER_UNSIGNED | SPECIFIER_CHAR:
198                         atomic_type = ATOMIC_TYPE_UCHAR;
199                         break;
200                 case SPECIFIER_SHORT:
201                 case SPECIFIER_SIGNED | SPECIFIER_SHORT:
202                 case SPECIFIER_SHORT | SPECIFIER_INT:
203                 case SPECIFIER_SIGNED | SPECIFIER_SHORT | SPECIFIER_INT:
204                         atomic_type = ATOMIC_TYPE_SHORT;
205                         break;
206                 case SPECIFIER_UNSIGNED | SPECIFIER_SHORT:
207                 case SPECIFIER_UNSIGNED | SPECIFIER_SHORT | SPECIFIER_INT:
208                         atomic_type = ATOMIC_TYPE_USHORT;
209                         break;
210                 case SPECIFIER_INT:
211                 case SPECIFIER_SIGNED:
212                 case SPECIFIER_SIGNED | SPECIFIER_INT:
213                         atomic_type = ATOMIC_TYPE_INT;
214                         break;
215                 case SPECIFIER_UNSIGNED:
216                 case SPECIFIER_UNSIGNED | SPECIFIER_INT:
217                         atomic_type = ATOMIC_TYPE_UINT;
218                         break;
219                 case SPECIFIER_LONG:
220                 case SPECIFIER_SIGNED | SPECIFIER_LONG:
221                 case SPECIFIER_LONG | SPECIFIER_INT:
222                 case SPECIFIER_SIGNED | SPECIFIER_LONG | SPECIFIER_INT:
223                         atomic_type = ATOMIC_TYPE_LONG;
224                         break;
225                 case SPECIFIER_UNSIGNED | SPECIFIER_LONG:
226                 case SPECIFIER_UNSIGNED | SPECIFIER_LONG | SPECIFIER_INT:
227                         atomic_type = ATOMIC_TYPE_ULONG;
228                         break;
229                 case SPECIFIER_LONG_LONG:
230                 case SPECIFIER_SIGNED | SPECIFIER_LONG_LONG:
231                 case SPECIFIER_LONG_LONG | SPECIFIER_INT:
232                 case SPECIFIER_SIGNED | SPECIFIER_LONG_LONG | SPECIFIER_INT:
233                         atomic_type = ATOMIC_TYPE_LONGLONG;
234                         break;
235                 case SPECIFIER_UNSIGNED | SPECIFIER_LONG_LONG:
236                 case SPECIFIER_UNSIGNED | SPECIFIER_LONG_LONG | SPECIFIER_INT:
237                         atomic_type = ATOMIC_TYPE_ULONGLONG;
238                         break;
239                 case SPECIFIER_FLOAT:
240                         atomic_type = ATOMIC_TYPE_FLOAT;
241                         break;
242                 case SPECIFIER_DOUBLE:
243                         atomic_type = ATOMIC_TYPE_DOUBLE;
244                         break;
245                 case SPECIFIER_LONG | SPECIFIER_DOUBLE:
246                         atomic_type = ATOMIC_TYPE_LONG_DOUBLE;
247                         break;
248                 case SPECIFIER_BOOL:
249                         atomic_type = ATOMIC_TYPE_BOOL;
250                         break;
251         #ifdef PROVIDE_COMPLEX
252                 case SPECIFIER_FLOAT | SPECIFIER_COMPLEX:
253                         atomic_type = ATOMIC_TYPE_FLOAT_COMPLEX;
254                         break;
255                 case SPECIFIER_DOUBLE | SPECIFIER_COMPLEX:
256                         atomic_type = ATOMIC_TYPE_DOUBLE_COMPLEX;
257                         break;
258                 case SPECIFIER_LONG | SPECIFIER_DOUBLE | SPECIFIER_COMPLEX:
259                         atomic_type = ATOMIC_TYPE_LONG_DOUBLE_COMPLEX;
260                         break;
261         #endif
262         #ifdef PROVIDE_IMAGINARY
263                 case SPECIFIER_FLOAT | SPECIFIER_IMAGINARY:
264                         atomic_type = ATOMIC_TYPE_FLOAT_IMAGINARY;
265                         break;
266                 case SPECIFIER_DOUBLE | SPECIFIER_IMAGINARY:
267                         atomic_type = ATOMIC_TYPE_DOUBLE_IMAGINARY;
268                         break;
269                 case SPECIFIER_LONG | SPECIFIER_DOUBLE | SPECIFIER_IMAGINARY:
270                         atomic_type = ATOMIC_TYPE_LONG_DOUBLE_IMAGINARY;
271                         break;
272         #endif
273                 default:
274                         /* invalid specifier combination, give an error message */
275                         if(type_specifiers == 0) {
276                                 parse_error("no type specifiers given in declaration");
277                         } else if((type_specifiers & SPECIFIER_SIGNED) &&
278                                   (type_specifiers & SPECIFIER_UNSIGNED)) {
279                                 parse_error("signed and unsigned specifiers gives");
280                         } else if(type_specifiers & (SPECIFIER_SIGNED | SPECIFIER_UNSIGNED)) {
281                                 parse_error("only integer types can be signed or unsigned");
282                         } else {
283                                 parse_error("multiple datatypes in declaration");
284                         }
285                 }
286         } else {
287                 if(type_specifiers != 0) {
288                         parse_error("multiple datatypes in declaration");
289                 }
290         }
291 }
292
293 typedef struct declarator_t declarator_t;
294 struct declarator_t {
295         /* pointer stuff... */
296         symbol_t     *symbol;
297
298         declarator_t *next;
299 };
300
301 declarator_t *parse_declarator()
302 {
303         while(token.type == '*') {
304                 /* pointer */
305                 next_token();
306                 //parse_type_qualifiers();
307         }
308
309         declarator_t *declarator;
310
311         switch(token.type) {
312         case T_IDENTIFIER:
313                 declarator = allocate_ast(sizeof(declarator[0]));
314                 memset(declarator, 0, sizeof(declarator[0]));
315                 declarator->symbol = token.v.symbol;
316                 return declarator;
317         case '(':
318                 next_token();
319                 declarator = parse_declarator();
320                 expect(')')
321                 return declarator;
322         default:
323                 parse_error("problem while parsing declarator");
324         }
325
326         if(token.type == '(') {
327                 next_token();
328
329                 /* parse parameter-type-list or identifier-list */
330
331                 expect(')');
332         } else if(token.type == '[') {
333                 next_token();
334
335                 /* multiple type qualifiers, and static */
336
337                 /* assignment_expression or '*' or nothing */
338
339                 expect(']');
340         }
341
342         return declarator;
343 }
344
345 declarator_t *parse_init_declarator()
346 {
347         declarator_t *declarator = parse_declarator();
348         if(token.type == '=') {
349                 next_token();
350                 //parse_initialize();
351         }
352
353         return declarator;
354 }
355
356 typedef struct declaration_t declaration_t;
357 struct declaration_t {
358         declaration_specifiers_t  specifiers;
359         declaration_t            *declarators;
360 };
361
362 void parse_declaration()
363 {
364         declaration_specifiers_t specifiers;
365         memset(&specifiers, 0, sizeof(specifiers));
366         parse_declaration_specifiers(&specifiers);
367 }
368
369 #if 0
370 namespace_t *parse(FILE *in, const char *input_name)
371 {
372         namespace_t *namespace = parse_namespace();
373
374         return namespace;
375 }
376 #endif