Initial import of c parser
authorMatthias Braun <matze@braunis.de>
Sat, 9 Jun 2007 22:03:29 +0000 (22:03 +0000)
committerMatthias Braun <matze@braunis.de>
Sat, 9 Jun 2007 22:03:29 +0000 (22:03 +0000)
[r18314]

66 files changed:
Makefile [new file with mode: 0644]
TODO [new file with mode: 0644]
adt/align.h [new file with mode: 0644]
adt/array.h [new file with mode: 0644]
adt/bitfiddle.h [new file with mode: 0644]
adt/error.h [new file with mode: 0644]
adt/fourcc.h [new file with mode: 0644]
adt/hash_string.h [new file with mode: 0644]
adt/hashset.c [new file with mode: 0644]
adt/hashset.h [new file with mode: 0644]
adt/obst.h [new file with mode: 0644]
adt/pset.c [new file with mode: 0644]
adt/pset.h [new file with mode: 0644]
adt/strset.c [new file with mode: 0644]
adt/strset.h [new file with mode: 0644]
adt/util.h [new file with mode: 0644]
adt/xmalloc.c [new file with mode: 0644]
adt/xmalloc.h [new file with mode: 0644]
ast.c [new file with mode: 0644]
ast.h [new file with mode: 0644]
ast_t.h [new file with mode: 0644]
config.h [new file with mode: 0644]
lexer.c [new file with mode: 0644]
lexer.h [new file with mode: 0644]
lexer_t.h [new file with mode: 0644]
lextest/legalc/comment.c [new file with mode: 0644]
lextest/legalc/operators.c [new file with mode: 0644]
lextest/legalc/splicetest.c [new file with mode: 0644]
lextest/legalc/string.c [new file with mode: 0644]
lextest/legalc/test1.c [new file with mode: 0644]
lextest/legalc/test2.c [new file with mode: 0644]
lextest/legalc/trigraphs.c [new file with mode: 0644]
lextest/tokenstreams/operators [new file with mode: 0644]
lextest/tokenstreams/stringtrigraphs [new file with mode: 0644]
lextest/tokenstreams/t [new file with mode: 0644]
lextest/tokenstreams/t2 [new file with mode: 0644]
lextest/tokenstreams/t3 [new file with mode: 0644]
lextest/warnings/h1.h [new file with mode: 0644]
lextest/warnings/h10.h [new file with mode: 0644]
lextest/warnings/h11.h [new file with mode: 0644]
lextest/warnings/h12.h [new file with mode: 0644]
lextest/warnings/h13.h [new file with mode: 0644]
lextest/warnings/h14.h [new file with mode: 0644]
lextest/warnings/h15.h [new file with mode: 0644]
lextest/warnings/h16.h [new file with mode: 0644]
lextest/warnings/h2.h [new file with mode: 0644]
lextest/warnings/h3.h [new file with mode: 0644]
lextest/warnings/h4.h [new file with mode: 0644]
lextest/warnings/h5.h [new file with mode: 0644]
lextest/warnings/h6.h [new file with mode: 0644]
lextest/warnings/h7.h [new file with mode: 0644]
lextest/warnings/h8.h [new file with mode: 0644]
lextest/warnings/h9.h [new file with mode: 0644]
main.c [new file with mode: 0644]
parser.c [new file with mode: 0644]
preprocessor_tokens.inc [new file with mode: 0644]
symbol.h [new file with mode: 0644]
symbol_table.c [new file with mode: 0644]
symbol_table.h [new file with mode: 0644]
symbol_table_t.h [new file with mode: 0644]
token.c [new file with mode: 0644]
token_t.h [new file with mode: 0644]
tokens.inc [new file with mode: 0644]
type.c [new file with mode: 0644]
type.h [new file with mode: 0644]
type_t.h [new file with mode: 0644]

diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..a3961a0
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,58 @@
+GOAL = cparser
+
+#FIRM_CFLAGS = `pkg-config --cflags libfirm`
+#FIRM_LIBS = `pkg-config --libs libfirm`
+FIRM_CFLAGS = -I$(HOME)/projects/firm/libfirm/include -I$(HOME)/projects/firm/libcore
+FIRM_LIBS = -L$(HOME)/projects/firm/build/i686-pc-linux-gnu/debug -lfirm -llpp -lcore -lm
+
+CFLAGS += -Wall -W -Werror -O0 -g3 -std=c99
+CFLAGS += -DHAVE_CONFIG_H
+CFLAGS += -I .
+CFLAGS += $(FIRM_CFLAGS)
+
+LFLAGS = $(FIRM_LIBS) -llpp -ldl --export-dynamic
+
+SOURCES := \
+       adt/hashset.c \
+       adt/pset.c \
+       adt/strset.c \
+       adt/xmalloc.c \
+       ast.c \
+       lexer.c \
+       main.c \
+       parser.c \
+       symbol_table.c \
+       token.c \
+       type.c
+
+OBJECTS = $(SOURCES:%.c=build/%.o)
+
+Q = @
+
+.PHONY : all clean dirs
+
+all: $(GOAL)
+
+ifeq ($(findstring $(MAKECMDGOALS), clean depend),)
+-include .depend
+endif
+
+.depend: $(SOURCES)
+       @echo "===> DEPEND"
+       @rm -f $@ && touch $@ && makedepend -p "$@ build/" -Y -f $@ -- $(CFLAGS) -- $(SOURCES) 2> /dev/null && rm $@.bak
+
+$(GOAL): build/adt $(OBJECTS)
+       @echo "===> LD $@"
+       $(Q)$(CC) -rdynamic $(OBJECTS) $(LFLAGS) -o $(GOAL)
+
+build/adt:
+       @echo "===> MKDIR $@"
+       $(Q)mkdir -p $@
+
+build/%.o: %.c
+       @echo '===> CC $<'
+       $(Q)$(CC) $(CFLAGS) -c $< -o $@
+
+clean:
+       @echo '===> CLEAN'
+       $(Q)rm -rf build $(GOAL) .depend
diff --git a/TODO b/TODO
new file mode 100644 (file)
index 0000000..1dd8883
--- /dev/null
+++ b/TODO
@@ -0,0 +1,5 @@
+Lexer:
+- proper support of preprocessor
+- parse float numbers
+- octal&hex escape sequences
+- handling of newline is probably not correct for all strange OSes
diff --git a/adt/align.h b/adt/align.h
new file mode 100644 (file)
index 0000000..01e877f
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Project:     libFIRM
+ * File name:   ir/adt/align.h
+ * Purpose:     macros for alignment.
+ * Author:      Markus Armbruster
+ * Modified by:
+ * Created:     1999 by getting from fiasco
+ * CVS-ID:      $Id$
+ * Copyright:   (c) 1995, 1996 Markus Armbruster
+ * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
+ */
+#ifndef _ALIGN_H
+#define _ALIGN_H
+
+#include <stddef.h>
+
+/**
+ * @file align.h
+ */
+
+/** A size handled efficiently by malloc(), at least 1K.  */
+#define PREF_MALLOC_SIZE 2048
+
+
+/** A wrapper around GNU C's __attribute__ */
+
+/* According to the documentation, the attributes we are interested in
+   work with 2.5, but we encountered trouble before 2.7.  */
+#if defined (__GNUC__) && __GNUC__ >= 2 && __GNUC_MINOR__ >= 7
+# define HAVE_ATTRIBUTE 1
+# define ATTRIBUTE(attrs) __attribute__ (attrs)
+#else
+# define ATTRIBUTE(attrs)
+#endif
+
+
+/* Alignment */
+
+/** A type that has most constrained alignment.  */
+typedef union {
+  long double d;
+  void *p;
+  long l;
+} aligned_type ATTRIBUTE ((aligned));
+
+/** Inquiring about the alignment of a type.  */
+#ifdef __GNUC__
+# define ALIGNOF(type) __alignof__ (type)
+#else
+# define ALIGNOF(type) offsetof (struct { char c; type d; }, d)
+#endif
+
+/** Maximal alignment required for any type.  */
+#define MAX_ALIGN ALIGNOF (aligned_type)
+
+#endif /* _ALIGN_H */
diff --git a/adt/array.h b/adt/array.h
new file mode 100644 (file)
index 0000000..3fc8c86
--- /dev/null
@@ -0,0 +1,334 @@
+/*
+ * Project:     libFIRM
+ * File name:   ir/adt/array.h
+ * Purpose:     Declarations for Array.
+ * Author:      Markus Armbruster
+ * Modified by:
+ * Created:     1999 by getting from fiasco
+ * CVS-ID:      $Id$
+ * Copyright:   (c) 1995, 1996 Markus Armbruster
+ * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
+ */
+
+/**
+ * @file array.h   Dynamic and flexible arrays for C.
+ */
+
+#ifndef _ARRAY_H
+#define _ARRAY_H
+
+#include <assert.h>
+#include <stddef.h>
+#include <obstack.h>
+
+#include "fourcc.h"
+#include "align.h"
+#include "xmalloc.h"
+
+#define ARR_D_MAGIC    FOURCC('A','R','R','D')
+#define ARR_A_MAGIC    FOURCC('A','R','R','A')
+#define ARR_F_MAGIC    FOURCC('A','R','R','F')
+
+/**
+ * Creates a flexible array.
+ *
+ * @param type     The element type of the new array.
+ * @param nelts    a size_t expression evaluating to the number of elements
+ *
+ * This macro creates a flexible array of a given type at runtime.
+ * The size of the array can be changed later.
+ *
+ * @return A pointer to the flexible array (can be used as a pointer to the
+ *         first element of this array).
+ */
+#define NEW_ARR_F(type, nelts)                                         \
+  ((type *)_new_arr_f ((nelts), sizeof(type) * (nelts)))
+
+/**
+ * Creates a new flxible array with the same number of elements as a
+ * given one.
+ *
+ * @param type     The element type of the new array.
+ * @param arr      An array from which the number of elements will be taken
+ *
+ * This macro creates a flexible array of a given type at runtime.
+ * The size of the array can be changed later.
+ *
+ * @return A pointer to the flexible array (can be used as a pointer to the
+ *         first element of this array).
+ */
+#define CLONE_ARR_F(arr)                       \
+  NEW_ARR_F (__typeof__(arr[0]), ARR_LEN ((arr)))
+
+/**
+ * Duplicates an array and returns the new flexible one.
+ *
+ * @param type     The element type of the new array.
+ * @param arr      An array from which the elements will be duplicated
+ *
+ * This macro creates a flexible array of a given type at runtime.
+ * The size of the array can be changed later.
+ *
+ * @return A pointer to the flexible array (can be used as a pointer to the
+ *         first element of this array).
+ */
+#define DUP_ARR_F(arr)                                                 \
+  memcpy (CLONE_ARR_F (__typeof__(arr[0]), (arr)), (arr), sizeof(__typeof__(arr[0])) * ARR_LEN((arr)))
+
+/**
+ * Delete a flexible array.
+ *
+ * @param arr    The flexible array.
+ */
+#define DEL_ARR_F(arr) (_del_arr_f ((arr)))
+
+/**
+ * Creates a dynamic array on an obstack.
+ *
+ * @param type     The element type of the new array.
+ * @param obstack  A struct obstack * were the data will be allocated
+ * @param nelts    A size_t expression evaluating to the number of elements
+ *
+ * This macro creates a dynamic array of a given type at runtime.
+ * The size of the array cannot be changed later.
+ *
+ * @return A pointer to the dynamic array (can be used as a pointer to the
+ *         first element of this array).
+ */
+#define NEW_ARR_D(type, obstack, nelts)                                        \
+  (  nelts                                                             \
+   ? (type *)_new_arr_d ((obstack), (nelts), sizeof(type) * (nelts))   \
+   : (type *)arr_mt_descr.v.elts)
+
+/**
+ * Creates a new dynamic array with the same number of elements as a
+ * given one.
+ *
+ * @param type     The element type of the new array.
+ * @param obstack  An struct obstack * were the data will be allocated
+ * @param arr      An array from which the number of elements will be taken
+ *
+ * This macro creates a dynamic array of a given type at runtime.
+ * The size of the array cannot be changed later.
+ *
+ * @return A pointer to the dynamic array (can be used as a pointer to the
+ *         first element of this array).
+ */
+#define CLONE_ARR_D(obstack, arr)              \
+  NEW_ARR_D (__typeof__(arr[0]), (obstack), ARR_LEN ((arr)))
+
+/**
+ * Duplicates an array and returns the new dynamic one.
+ *
+ * @param type     The element type of the new array.
+ * @param obstack  An struct obstack * were the data will be allocated
+ * @param arr      An array from which the elements will be duplicated
+ *
+ * This macro creates a dynamic array of a given type at runtime.
+ * The size of the array cannot be changed later.
+ *
+ * @return A pointer to the dynamic array (can be used as a pointer to the
+ *         first element of this array).
+ */
+#define DUP_ARR_D(obstack, arr)                                                        \
+  memcpy (CLONE_ARR_D (__typeof__(arr[0]), (obstack), (arr)), (arr), sizeof(__typeof__(arr[0])) * ARR_LEN ((arr)))
+
+/**
+ * Create an automatic array which will be deleted at return from function.
+ * Beware, the data will be allocated on the function stack!
+ *
+ * @param type     The element type of the new array.
+ * @param var      A lvalue of type (type *) which will hold the new array.
+ * @param n        number of elements in this array.
+ *
+ * This macro creates a dynamic array on the functions stack of a given type at runtime.
+ * The size of the array cannot be changed later.
+ */
+#define NEW_ARR_A(type, var, n)                                                                        \
+  do {                                                                                         \
+    int _nelts = (n);                                                                          \
+    assert (_nelts >= 0);                                                                      \
+    (var) = (void *)((_arr_descr *)alloca (_ARR_ELTS_OFFS + sizeof(type) * _nelts))->v.elts;   \
+    _ARR_SET_DBGINF (_ARR_DESCR ((var)), ARR_A_MAGIC, sizeof (type));                          \
+    (void)(_ARR_DESCR ((var))->nelts = _nelts);                                                        \
+  } while (0)
+
+/**
+ * Creates a new automatic array with the same number of elements as a
+ * given one.
+ *
+ * @param var      A lvalue of type (type *) which will hold the new array.
+ * @param arr      An array from which the elements will be duplicated
+ *
+ * This macro creates a dynamic array of a given type at runtime.
+ * The size of the array cannot be changed later.
+ *
+ * @return A pointer to the dynamic array (can be used as a pointer to the
+ *         first element of this array).
+ */
+#define CLONE_ARR_A(var, arr)          \
+  NEW_ARR_A (__typeof__(arr[0]), (var), ARR_LEN ((arr)))
+
+/**
+ * Duplicates an array and returns a new automatic one.
+ *
+ * @param type     The element type of the new array.
+ * @param var      A lvalue of type (type *) which will hold the new array.
+ * @param arr      An array from with the number of elements will be taken
+ *
+ * This macro creates a dynamic array of a given type at runtime.
+ * The size of the array cannot be changed later.
+ *
+ * @return A pointer to the dynamic array (can be used as a pointer to the
+ *         first element of this array).
+ */
+#define DUP_ARR_A(var, arr)                                    \
+  do { CLONE_ARR_A(__typeof__(arr[0]), (var), (arr));                                  \
+       memcpy ((var), (arr), sizeof (__typeof__(arr[0])) * ARR_LEN ((arr))); } \
+  while (0)
+
+/**
+ * Declare an initialized (zero'ed) array of fixed size.
+ * This macro should be used at file scope only.
+ *
+ * @param type     The element type of the new array.
+ * @param var      A lvalue of type (type *) which will hold the new array.
+ * @param _nelts   Number of elements in this new array.
+ */
+#define DECL_ARR_S(type, var, _nelts)                                  \
+  ARR_STRUCT(type, (_nelts) ? (_nelts) : 1) _##var;                    \
+  type *var = (_ARR_SET_DBGINF (&_##var, ARR_A_MAGIC, sizeof (type)),  \
+              _##var.nelts = _nelts,                                   \
+              _##var.v.elts)
+
+/**
+ * Returns the length of an array
+ *
+ * @param arr  a flexible, dynamic, automatic or static array.
+ */
+#define ARR_LEN(arr) (ARR_VRFY ((arr)), _ARR_DESCR((arr))->nelts)
+
+/**
+ * Resize a flexible array, allocate more data if needed but do NOT
+ * reduce.
+ *
+ * @param arr      The array, which must be an lvalue.
+ * @param n        The new size of the array.
+ *
+ * @remark  This macro may change arr, so update all references!
+ */
+#define ARR_RESIZE(arr, n)                                     \
+  ((arr) = _arr_resize ((arr), (n), sizeof(__typeof__(arr[0]))))
+
+/**
+ * Resize a flexible array, always reallocate data.
+ *
+ * @param arr      The array, which must be an lvalue.
+ * @param n        The new size of the array.
+ *
+ * @remark  This macro may change arr, so update all references!
+ */
+#define ARR_SETLEN(arr, n)                                     \
+  ((arr) = _arr_setlen ((arr), (n), sizeof(__typeof__(arr[0])) * (n)))
+
+/** Set a length smaller than the current length of the array.  Do not
+ *  resize. len must be <= ARR_LEN(arr). */
+#define ARR_SHRINKLEN(arr,len)                                          \
+   (ARR_VRFY ((arr)), assert(_ARR_DESCR((arr))->nelts >= len),             \
+    _ARR_DESCR((arr))->nelts = len)
+
+/**
+ * Resize a flexible array by growing it by delta elements.
+ *
+ * @param arr      The array, which must be an lvalue.
+ * @param delta    The delta number of elements.
+ *
+ * @remark  This macro may change arr, so update all references!
+ */
+#define ARR_EXTEND(arr, delta)                 \
+  ARR_RESIZE ((arr), ARR_LEN ((arr)) + (delta))
+
+/**
+ * Resize a flexible array to hold n elements only if it is currently shorter
+ * than n.
+ *
+ * @param arr      The array, which must be an lvalue.
+ * @param n        The new size of the array.
+ *
+ * @remark  This macro may change arr, so update all references!
+ */
+#define ARR_EXTO(arr, n)                                               \
+  ((n) >= ARR_LEN ((arr)) ? ARR_RESIZE ((arr), (n)+1) : (arr))
+
+/**
+ * Append one element to a flexible array.
+ *
+ * @param arr      The array, which must be an lvalue.
+ * @param elt      The new element, must be of type (type).
+ */
+#define ARR_APP1(arr, elt)                                     \
+  (ARR_EXTEND ((arr), 1), (arr)[ARR_LEN ((arr))-1] = (elt))
+
+
+#ifdef NDEBUG
+# define ARR_VRFY(arr) ((void)0)
+# define ARR_IDX_VRFY(arr, idx) ((void)0)
+#else
+# define ARR_VRFY(arr)                                                                 \
+    assert (   (   (_ARR_DESCR((arr))->magic == ARR_D_MAGIC)                           \
+               || (_ARR_DESCR((arr))->magic == ARR_A_MAGIC)                            \
+               || (_ARR_DESCR((arr))->magic == ARR_F_MAGIC))                           \
+           && (   (_ARR_DESCR((arr))->magic != ARR_F_MAGIC)                            \
+               || (_ARR_DESCR((arr))->u.allocated >= _ARR_DESCR((arr))->nelts))        \
+           && (_ARR_DESCR((arr))->nelts >= 0))
+# define ARR_IDX_VRFY(arr, idx)                                \
+    assert ((0 <= (idx)) && ((idx) < ARR_LEN ((arr))))
+#endif
+
+
+/* Private !!!
+   Don't try this at home, kids, we're trained professionals ;->
+   ... or at the IPD, either. */
+#ifdef NDEBUG
+# define _ARR_DBGINF_DECL
+# define _ARR_SET_DBGINF(descr, co, es)
+#else
+# define _ARR_DBGINF_DECL int magic; size_t eltsize;
+# define _ARR_SET_DBGINF(descr, co, es)                                        \
+    ( (descr)->magic = (co), (descr)->eltsize = (es) )
+#endif
+
+/**
+ * Construct an array header.
+ */
+#define ARR_STRUCT(type, _nelts)                                               \
+  struct {                                                                     \
+    _ARR_DBGINF_DECL                                                           \
+    union {                                                                    \
+      struct obstack *obstack; /* dynamic: allocated on this obstack */        \
+      int allocated;                   /* flexible: #slots allocated */        \
+    } u;                                                                       \
+    int nelts;                                                                 \
+    union {                                                                    \
+      type elts[(_nelts)];                                                     \
+      aligned_type align[1];                                                   \
+    } v;                                                                       \
+  }
+
+/**
+ * The array descriptor header type.
+ */
+typedef ARR_STRUCT (aligned_type, 1) _arr_descr;
+
+extern _arr_descr arr_mt_descr;
+
+void *_new_arr_f (int, size_t);
+void _del_arr_f (void *);
+void *_new_arr_d (struct obstack *obstack, int nelts, size_t elts_size);
+void *_arr_resize (void *, int, size_t);
+void *_arr_setlen (void *, int, size_t);
+
+#define _ARR_ELTS_OFFS offsetof (_arr_descr, v.elts)
+#define _ARR_DESCR(elts) ((_arr_descr *)(void *)((char *)(elts) - _ARR_ELTS_OFFS))
+
+#endif
diff --git a/adt/bitfiddle.h b/adt/bitfiddle.h
new file mode 100644 (file)
index 0000000..624a2d6
--- /dev/null
@@ -0,0 +1,194 @@
+/**
+ * @file
+ * @date    28.9.2004
+ * @brief   Functions from hackers delight.
+ * @author  Sebastian Hack, Matthias Braun
+ * @version $Id$
+ */
+#ifndef _FIRM_BITFIDDLE_H_
+#define _FIRM_BITFIDDLE_H_
+
+#include <limits.h>
+#include "util.h"
+
+/* some functions here assume ints are 32 bit wide */
+#define HACKDEL_WORDSIZE 32
+COMPILETIME_ASSERT(sizeof(unsigned) == 4, unsignedsize)
+COMPILETIME_ASSERT(UINT_MAX == 4294967295U, uintmax)
+
+/**
+ * Add saturated.
+ * @param x Summand 1.
+ * @param y Summand 2.
+ * @return x + y or INT_MAX/INT_MIN if an overflow occurred and x,y was positive/negative.
+ *
+ * @note See hacker's delight, page 27.
+ */
+static inline __attribute__((const))
+int add_saturated(int x, int y)
+{
+       int sum      = x + y;
+       /*
+               An overflow occurs, if the sign of the both summands is equal
+               and the one of the sum is different from the summand's one.
+               The sign bit is 1, if an overflow occurred, 0 otherwise.
+               int overflow = ~(x ^ y) & (sum ^ x);
+       */
+       int overflow = (x ^ sum) & (y ^ sum);
+
+       /*
+               The infinity to use.
+               Make a mask of the sign bit of x and y (they are the same if an
+               overflow occurred).
+               INT_MIN == ~INT_MAX, so if the sign was negative, INT_MAX becomes
+               INT_MIN.
+       */
+       int inf = (x >> (sizeof(x) * 8 - 1)) ^ INT_MAX;
+
+       return overflow < 0 ? inf : sum;
+}
+
+/**
+ * Compute the count of set bits in a 32-bit word.
+ * @param x A 32-bit word.
+ * @return The number of bits set in x.
+ */
+static inline __attribute__((const))
+unsigned popcnt(unsigned x) {
+       x -= ((x >> 1) & 0x55555555);
+       x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+       x = (x + (x >> 4)) & 0x0f0f0f0f;
+       x += x >> 8;
+       x += x >> 16;
+       return x & 0x3f;
+}
+
+/**
+ * Compute the number of leading zeros in a word.
+ * @param x The word.
+ * @return The number of leading (from the most significant bit) zeros.
+ */
+static inline __attribute__((const))
+unsigned nlz(unsigned x) {
+#ifdef USE_X86_ASSEMBLY
+       unsigned res;
+       if(x == 0)
+               return 32;
+
+       __asm__("bsrl %1,%0"
+                       : "=r" (res)
+                       : "r" (x));
+       return 31 - res;
+#else
+       x |= x >> 1;
+       x |= x >> 2;
+       x |= x >> 4;
+       x |= x >> 8;
+       x |= x >> 16;
+       return popcnt(~x);
+#endif
+}
+
+/**
+ * Compute the number of trailing zeros in a word.
+ * @param x The word.
+ * @return The number of trailing zeros.
+ */
+static inline __attribute__((const))
+unsigned ntz(unsigned x) {
+#ifdef USE_X86_ASSEMBLY
+       unsigned res;
+       if(x == 0)
+               return 32;
+
+       __asm__("bsfl %1,%0"
+                       : "=r" (res)
+                       : "r" (x));
+       return  res;
+#else
+       return HACKDEL_WORDSIZE - nlz(~x & (x - 1));
+#endif
+}
+
+/**
+ * Compute the greatest power of 2 smaller or equal to a value.
+ * This is also known as the binary logarithm.
+ * @param x The value.
+ * @return The power of two.
+ */
+#define log2_floor(x) (HACKDEL_WORDSIZE - 1 - nlz(x))
+
+/**
+ * Compute the smallest power of 2 greater or equal to a value.
+ * This is also known as the binary logarithm.
+ * @param x The value.
+ * @return The power of two.
+ */
+#define log2_ceil(x) (HACKDEL_WORDSIZE - nlz((x) - 1))
+
+/**
+ * Round up to the next multiple of a power of two.
+ * @param x A value.
+ * @param pot A power of two.
+ * @return x rounded up to the next multiple of pot.
+ */
+#define round_up2(x,pot) (((x) + ((pot) - 1)) & (~((pot) - 1)))
+
+/**
+ * Returns the biggest power of 2 that is equal or smaller than @p x
+ * (see hackers delight power-of-2 boundaries, page 48)
+ */
+static inline __attribute__((const))
+unsigned floor_po2(unsigned x)
+{
+#ifdef USE_X86_ASSEMBLY // in this case nlz is fast
+       if(x == 0)
+               return 0;
+       // note that x != 0 here, so nlz(x) < 32!
+       return 0x80000000U >> nlz(x);
+#else
+       x |= x >> 1;
+       x |= x >> 2;
+       x |= x >> 4;
+       x |= x >> 8;
+       x |= x >> 16;
+       return x - (x >> 1);
+#endif
+}
+
+/**
+ * Returns the smallest power of 2 that is equal or greater than x
+ * @remark x has to be <= 0x8000000 of course
+ * @note see hackers delight power-of-2 boundaries, page 48
+ */
+static inline __attribute__((const))
+unsigned ceil_po2(unsigned x)
+{
+       if(x == 0)
+               return 0;
+       assert(x < (1U << 31));
+
+#ifdef USE_X86_ASSEMBLY // in this case nlz is fast
+       // note that x != 0 here!
+       return 0x80000000U >> (nlz(x-1) - 1);
+#else
+       x = x - 1;
+       x |= x >> 1;
+       x |= x >> 2;
+       x |= x >> 4;
+       x |= x >> 8;
+       x |= x >> 16;
+       return x + 1;
+#endif
+}
+
+/**
+ * Tests whether @p x is a power of 2
+ */
+static inline __attribute__((const))
+int is_po2(unsigned x)
+{
+       return (x & (x-1)) == 0;
+}
+
+#endif
diff --git a/adt/error.h b/adt/error.h
new file mode 100644 (file)
index 0000000..9b971db
--- /dev/null
@@ -0,0 +1,8 @@
+// placeholder file...
+#include <stdio.h>
+
+void abort(void);
+
+static inline __attribute__((noreturn))
+void panic(const char *msg)
+{ fprintf(stderr, "Panic: %s\n", msg); abort(); }
diff --git a/adt/fourcc.h b/adt/fourcc.h
new file mode 100644 (file)
index 0000000..8b5a7cb
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * Project:     libFIRM
+ * File name:   ir/adt/fourcc.h
+ * Purpose:     define the famous infame FOURCC macro.
+ * Author:
+ * Modified by:
+ * Created:     02.01.2004
+ * CVS-ID:      $Id$
+ * Copyright:   (C) 2004 University of Karlsruhe
+ * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
+ */
+#ifndef _FOURCC_H
+#define _FOURCC_H
+
+/** define a readable fourcc code */
+#define FOURCC(a,b,c,d)         ((a) | ((b) << 8) | ((c) << 16) | ((d) << 24))
+
+#endif /* _FOURCC_H */
diff --git a/adt/hash_string.h b/adt/hash_string.h
new file mode 100644 (file)
index 0000000..f2557a2
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef _FIRM_HASH_STRING_H_
+#define _FIRM_HASH_STRING_H_
+
+#define _FIRM_FNV_OFFSET_BASIS 2166136261U
+#define _FIRM_FNV_FNV_PRIME 16777619U
+
+static inline __attribute__((pure))
+unsigned hash_string(const char* str)
+{
+       const unsigned char *p;
+       unsigned hash = _FIRM_FNV_OFFSET_BASIS;
+
+       for(p = (const unsigned char*) str; *p != 0; ++p) {
+               hash *= _FIRM_FNV_FNV_PRIME;
+               hash ^= *p;
+       }
+
+       return hash;
+}
+
+static inline __attribute__((pure))
+unsigned hash_string_size(const char* str, size_t size)
+{
+       size_t i;
+       unsigned hash = _FIRM_FNV_OFFSET_BASIS;
+
+       for(i = 0; i < size; ++i) {
+               hash *= _FIRM_FNV_FNV_PRIME;
+               hash ^= str[i];
+       }
+
+       return hash;
+}
+
+#endif
diff --git a/adt/hashset.c b/adt/hashset.c
new file mode 100644 (file)
index 0000000..c2ac584
--- /dev/null
@@ -0,0 +1,596 @@
+/**
+ * @file
+ * @date    17.03.2007
+ * @brief   Geberic hashset implementation
+ * @author  Matthias Braun, inspiration from densehash from google sparsehash
+ *          package
+ * @version $Id$
+ *
+ *
+ * You have to specialize this file by defining:
+ *
+ * <ul>
+ *  <li><b>HashSet</b>         The name of the hashset type</li>
+ *  <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
+ *  <li><b>ValueType</b>       The type of the stored data values</li>
+ *  <li><b>NullValue</b>       A special value representing no values</li>
+ *  <li><b>DeletedValue</b>    A special value representing deleted entries</li>
+ *  <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
+ * </ul>
+ *
+ * Note that by default it is assumed that the data values themselfes are used
+ * as keys. However you can change that with additional defines:
+ *
+ * <ul>
+ *  <li><b>KeyType</b>         The type of the keys identifying data values.
+ *                             Defining this implies, that a data value contains
+ *                             more than just the key.</li>
+ *  <li><b>GetKey(value)</b>   Extracts the key from a data value</li>
+ *  <li><b>KeysEqual(hashset,key1,key2)</b>  Tests wether 2 keys are equal</li>
+ *  <li><b>DO_REHASH</b>       Instead of storing the hash-values, recalculate
+ *                             them on demand from the datavalues. (usefull if
+ *                             calculating the hash-values takes less time than
+ *                             a memory access)</li>
+ * </ul>
+ *
+ * You can further fine tune your hashset by defining the following:
+ *
+ * <ul>
+ *  <li><b>JUMP(num_probes)</b> The probing method</li>
+ *  <li><b>Alloc(count)</b>     Allocates count hashset entries (NOT bytes)</li>
+ *  <li><b>Free(ptr)</b>        Frees a block of memory allocated by Alloc</li>
+ *  <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
+ *                                      the Null value</li>
+ *  <li><b>ADDITIONAL_DATA<b>   Additional fields appended to the hashset struct</li>
+ * </ul>
+ */
+#ifdef HashSet
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "bitfiddle.h"
+#include "util.h"
+
+/* quadratic probing */
+#ifndef JUMP
+#define JUMP(num_probes)      (num_probes)
+#endif
+
+#ifndef Hash
+#define ID_HASH
+#define Hash(this,value)      ((unsigned)(value))
+#endif
+
+#ifdef DO_REHASH
+#define HashSetEntry                   ValueType
+#define EntrySetHash(entry,new_hash)
+#define EntryGetHash(this,entry)       Hash(this,entry)
+#define EntryGetValue(entry)           (entry)
+#else
+#define EntryGetHash(this,entry)       (entry).hash
+#define EntrySetHash(entry,new_hash)   (entry).hash = (new_hash)
+#define EntryGetValue(entry)           (entry).data
+#endif
+
+#ifndef Alloc
+#include "xmalloc.h"
+#define Alloc(size)    (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
+#define Free(ptr)      free(ptr)
+#endif
+
+#ifdef ID_HASH
+#define InsertReturnValue               int
+#define GetInsertReturnValue(entry,new) (new)
+#else
+#define InsertReturnValue               ValueType
+#define GetInsertReturnValue(entry,new) EntryGetValue(entry)
+#endif
+
+#ifndef KeyType
+#define KeyType                  ValueType
+#define GetKey(value)            (value)
+#define InitData(this,value,key) (value) = (key)
+#endif
+
+#ifndef ConstKeyType
+#define ConstKeyType             const KeyType
+#endif
+
+#ifndef EntrySetEmpty
+#define EntrySetEmpty(entry)    EntryGetValue(entry) = NullValue
+#endif
+#ifndef EntrySetDeleted
+#define EntrySetDeleted(entry)  EntryGetValue(entry) = DeletedValue
+#endif
+#ifndef EntryIsEmpty
+#define EntryIsEmpty(entry)     (EntryGetValue(entry) == NullValue)
+#endif
+#ifndef EntryIsDeleted
+#define EntryIsDeleted(entry)   (EntryGetValue(entry) == DeletedValue)
+#endif
+#ifndef SetRangeEmpty
+#define SetRangeEmpty(ptr,size)                \
+{                                              \
+       size_t _i;                                 \
+       size_t _size = (size);                     \
+       HashSetEntry *entries = (ptr);             \
+       for(_i = 0; _i < _size; ++_i) {            \
+               HashSetEntry *entry = & entries[_i];   \
+               EntrySetEmpty(*entry);                 \
+       }                                          \
+}
+#endif
+
+#ifndef HT_OCCUPANCY_FLT
+/** how full before we double size */
+#define HT_OCCUPANCY_FLT  0.5f
+#endif
+
+#ifndef HT_EMPTY_FLT
+/** how empty before we half size */
+#define HT_EMPTY_FLT      (0.4f * (HT_OCCUPANCY_FLT))
+#endif
+
+#ifndef HT_MIN_BUCKETS
+/** default smallest bucket size */
+#define HT_MIN_BUCKETS    32
+#endif
+
+#define ILLEGAL_POS       ((size_t)-1)
+
+#ifndef hashset_init
+#error You have to redefine hashset_init
+#endif
+#ifndef hashset_init_size
+#error You have to redefine hashset_init_size
+#endif
+#ifndef hashset_destroy
+#error You have to redefine hashset_destroy
+#endif
+#ifndef hashset_insert
+#error You have to redefine hashset_insert
+#endif
+#ifndef hashset_remove
+#error You have to redefine hashset_remove
+#endif
+#ifndef hashset_find
+#error You have to redefine hashset_find
+#endif
+#ifndef hashset_size
+#error You have to redefine hashset_size
+#endif
+#ifndef hashset_iterator_init
+#error You have to redefine hashset_iterator_init
+#endif
+#ifndef hashset_iterator_next
+#error You have to redefine hashset_iterator_next
+#endif
+#ifndef hashset_remove_iterator
+#error You have to redefine hashset_remove_iterator
+#endif
+
+/**
+ * Returns the number of elements in the hashset
+ */
+size_t hashset_size(const HashSet *this)
+{
+       return this->num_elements - this->num_deleted;
+}
+
+/**
+ * Inserts an element into a hashset without growing the set (you have to make
+ * sure there's enough room for that.
+ * @note also see comments for hashset_insert()
+ * @internal
+ */
+static inline
+InsertReturnValue insert_nogrow(HashSet *this, KeyType key)
+{
+       size_t num_probes = 0;
+       size_t num_buckets = this->num_buckets;
+       size_t hashmask = num_buckets - 1;
+       unsigned hash = Hash(this, key);
+       size_t bucknum = hash & hashmask;
+       size_t insert_pos = ILLEGAL_POS;
+
+       assert((num_buckets & (num_buckets - 1)) == 0);
+
+       while(1) {
+               HashSetEntry *entry = & this->entries[bucknum];
+
+               if(EntryIsEmpty(*entry)) {
+                       size_t p;
+                       HashSetEntry *nentry;
+
+                       if(insert_pos != ILLEGAL_POS) {
+                               p = insert_pos;
+                       } else {
+                               p = bucknum;
+                       }
+
+                       nentry = &this->entries[p];
+                       InitData(this, EntryGetValue(*nentry), key);
+                       EntrySetHash(*nentry, hash);
+                       this->num_elements++;
+                       return GetInsertReturnValue(*nentry, 1);
+               }
+               if(EntryIsDeleted(*entry)) {
+                       if(insert_pos == ILLEGAL_POS)
+                               insert_pos = bucknum;
+               } else if(EntryGetHash(this, *entry) == hash) {
+                       if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
+                               // Value already in the set, return it
+                               return GetInsertReturnValue(*entry, 0);
+                       }
+               }
+
+               ++num_probes;
+               bucknum = (bucknum + JUMP(num_probes)) & hashmask;
+               assert(num_probes < num_buckets);
+       }
+}
+
+/**
+ * Inserts an element into a hashset under the assumption that the hashset
+ * contains no deleted entries and the element doesn't exist in the hashset yet.
+ * @internal
+ */
+static
+void insert_new(HashSet *this, unsigned hash, ValueType value)
+{
+       size_t num_probes = 0;
+       size_t num_buckets = this->num_buckets;
+       size_t hashmask = num_buckets - 1;
+       size_t bucknum = hash & hashmask;
+       size_t insert_pos = ILLEGAL_POS;
+
+       assert(value != NullValue);
+
+       while(1) {
+               HashSetEntry *entry = & this->entries[bucknum];
+
+               if(EntryIsEmpty(*entry)) {
+                       size_t p;
+                       HashSetEntry *nentry;
+
+                       if(insert_pos != ILLEGAL_POS) {
+                               p = insert_pos;
+                       } else {
+                               p = bucknum;
+                       }
+                       nentry = &this->entries[p];
+
+                       EntryGetValue(*nentry) = value;
+                       EntrySetHash(*nentry, hash);
+                       this->num_elements++;
+                       return;
+               }
+               assert(!EntryIsDeleted(*entry));
+
+               ++num_probes;
+               bucknum = (bucknum + JUMP(num_probes)) & hashmask;
+               assert(num_probes < num_buckets);
+       }
+}
+
+/**
+ * calculate shrink and enlarge limits
+ * @internal
+ */
+static inline
+void reset_thresholds(HashSet *this)
+{
+       this->enlarge_threshold = (size_t) (this->num_buckets * HT_OCCUPANCY_FLT);
+       this->shrink_threshold = (size_t) (this->num_buckets * HT_EMPTY_FLT);
+       this->consider_shrink = 0;
+}
+
+/**
+ * Resize the hashset
+ * @internal
+ */
+static inline
+void resize(HashSet *this, size_t new_size)
+{
+       size_t num_buckets = this->num_buckets;
+       size_t i;
+       HashSetEntry *old_entries = this->entries;
+       HashSetEntry *new_entries;
+
+       /* allocate a new array with double size */
+       new_entries = Alloc(new_size);
+       SetRangeEmpty(new_entries, new_size);
+
+       /* use the new array */
+       this->entries = new_entries;
+       this->num_buckets = new_size;
+       this->num_elements = 0;
+       this->num_deleted = 0;
+#ifndef NDEBUG
+       this->entries_version++;
+#endif
+       reset_thresholds(this);
+
+       /* reinsert all elements */
+       for(i = 0; i < num_buckets; ++i) {
+               HashSetEntry *entry = & old_entries[i];
+               if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
+                       continue;
+
+               insert_new(this, EntryGetHash(this, *entry), EntryGetValue(*entry));
+       }
+
+       /* now we can free the old array */
+       Free(old_entries);
+}
+
+/**
+ * grow the hashset if adding 1 more elements would make it too crowded
+ * @internal
+ */
+static inline
+void maybe_grow(HashSet *this)
+{
+       size_t resize_to;
+
+       if(LIKELY(this->num_elements + 1 <= this->enlarge_threshold))
+               return;
+
+       /* double table size */
+       resize_to = this->num_buckets * 2;
+       resize(this, resize_to);
+}
+
+/**
+ * shrink the hashset if it is only sparsely filled
+ * @internal
+ */
+static inline
+void maybe_shrink(HashSet *this)
+{
+       size_t size;
+       size_t resize_to;
+
+       if(!this->consider_shrink)
+               return;
+
+       this->consider_shrink = 0;
+       size = hashset_size(this);
+       if(LIKELY(size > this->shrink_threshold))
+               return;
+
+       resize_to = ceil_po2(size);
+
+       if(resize_to < 4)
+               resize_to = 4;
+
+       resize(this, resize_to);
+}
+
+/**
+ * Insert an element into the hashset. If no element with key key exists yet,
+ * then a new one is created and initialized with the InitData function.
+ * Otherwise the exisiting element is returned (for hashs where key is equal to
+ * value, nothing is returned.)
+ *
+ * @param this   the hashset
+ * @param key    the key that identifies the data
+ * @returns      the existing or newly created data element (or nothing in case of hashs where keys are the while value)
+ */
+InsertReturnValue hashset_insert(HashSet *this, KeyType key)
+{
+#ifndef NDEBUG
+       this->entries_version++;
+#endif
+
+       maybe_shrink(this);
+       maybe_grow(this);
+       return insert_nogrow(this, key);
+}
+
+/**
+ * Searchs for an element with key @p key.
+ *
+ * @param this      the hashset
+ * @param key       the key to search for
+ * @returns         the found value or NullValue if nothing was found
+ */
+ValueType hashset_find(const HashSet *this, ConstKeyType key)
+{
+       size_t num_probes = 0;
+       size_t num_buckets = this->num_buckets;
+       size_t hashmask = num_buckets - 1;
+       unsigned hash = Hash(this, key);
+       size_t bucknum = hash & hashmask;
+
+       while(1) {
+               HashSetEntry *entry = & this->entries[bucknum];
+
+               if(EntryIsEmpty(*entry)) {
+                       return NullValue;
+               }
+               if(EntryIsDeleted(*entry)) {
+                       // value is deleted
+               } else if(EntryGetHash(this, *entry) == hash) {
+                       if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
+                               // found the value
+                               return EntryGetValue(*entry);
+                       }
+               }
+
+               ++num_probes;
+               bucknum = (bucknum + JUMP(num_probes)) & hashmask;
+               assert(num_probes < num_buckets);
+       }
+}
+
+/**
+ * Removes an element from a hashset. Does nothing if the set doesn't contain
+ * the element.
+ *
+ * @param this    the hashset
+ * @param key     key that identifies the data to remove
+ */
+void hashset_remove(HashSet *this, ConstKeyType key)
+{
+       size_t num_probes = 0;
+       size_t num_buckets = this->num_buckets;
+       size_t hashmask = num_buckets - 1;
+       unsigned hash = Hash(this, key);
+       size_t bucknum = hash & hashmask;
+
+#ifndef NDEBUG
+       this->entries_version++;
+#endif
+
+       while(1) {
+               HashSetEntry *entry = & this->entries[bucknum];
+
+               if(EntryIsEmpty(*entry)) {
+                       return;
+               }
+               if(EntryIsDeleted(*entry)) {
+                       // entry is deleted
+               } else if(EntryGetHash(this, *entry) == hash) {
+                       if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
+                               EntrySetDeleted(*entry);
+                               this->num_deleted++;
+                               this->consider_shrink = 1;
+                               return;
+                       }
+               }
+
+               ++num_probes;
+               bucknum = (bucknum + JUMP(num_probes)) & hashmask;
+               assert(num_probes < num_buckets);
+       }
+}
+
+/**
+ * Initializes hashset with a specific size
+ * @internal
+ */
+static inline
+void init_size(HashSet *this, size_t initial_size)
+{
+       if(initial_size < 4)
+               initial_size = 4;
+
+       this->entries = Alloc(initial_size);
+       SetRangeEmpty(this->entries, initial_size);
+       this->num_buckets = initial_size;
+       this->consider_shrink = 0;
+       this->num_elements = 0;
+       this->num_deleted = 0;
+#ifndef NDEBUG
+       this->entries_version = 0;
+#endif
+
+       reset_thresholds(this);
+}
+
+/**
+ * Initialializes a hashset with the default size. The memory for the set has to
+ * already allocated.
+ */
+void hashset_init(HashSet *this)
+{
+       init_size(this, HT_MIN_BUCKETS);
+}
+
+/**
+ * Destroys a hashset, freeing all used memory (except the memory for the
+ * HashSet struct itself).
+ */
+void hashset_destroy(HashSet *this)
+{
+       Free(this->entries);
+#ifndef NDEBUG
+       this->entries = NULL;
+#endif
+}
+
+/**
+ * Initializes a hashset expecting expected_element size
+ */
+void hashset_init_size(HashSet *this, size_t expected_elements)
+{
+       size_t needed_size;
+       size_t po2size;
+
+       if(expected_elements >= UINT_MAX/2) {
+               abort();
+       }
+
+       needed_size = expected_elements * (1.0 / HT_OCCUPANCY_FLT);
+       po2size = ceil_po2(needed_size);
+       init_size(this, po2size);
+}
+
+/**
+ * Initializes a hashset iterator. The memory for the allocator has to be
+ * already allocated.
+ * @note it is not allowed to remove or insert elements while iterating
+ */
+void hashset_iterator_init(HashSetIterator *this, const HashSet *hashset)
+{
+       this->current_bucket = hashset->entries - 1;
+       this->end = hashset->entries + hashset->num_buckets;
+#ifndef NDEBUG
+       this->set = hashset;
+       this->entries_version = hashset->entries_version;
+#endif
+}
+
+/**
+ * Returns the next value in the iterator or NULL if no value is left
+ * in the hashset.
+ * @note it is not allowed to remove or insert elements while iterating
+ */
+ValueType hashset_iterator_next(HashSetIterator *this)
+{
+       HashSetEntry *current_bucket = this->current_bucket;
+       HashSetEntry *end = this->end;
+
+       if(current_bucket >= end)
+               return NullValue;
+
+       /* using hashset_insert or hashset_remove is not allowed while iterating */
+       assert(this->entries_version == this->set->entries_version);
+
+       do {
+               current_bucket++;
+       } while(current_bucket < end &&
+                       (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket)));
+
+       if(current_bucket >= end)
+               return NullValue;
+
+       this->current_bucket = current_bucket;
+       return EntryGetValue(*current_bucket);
+}
+
+/**
+ * Removes the element the iterator points to. Removing an element a second time
+ * has no result.
+ */
+void hashset_remove_iterator(HashSet *this, const HashSetIterator *iter)
+{
+       HashSetEntry *entry = iter->current_bucket;
+
+       /* iterator_next needs to have been called at least once */
+       assert(entry >= this->entries);
+       /* needs to be on a valid element */
+       assert(entry < this->entries + this->num_buckets);
+
+       if(EntryIsDeleted(*entry))
+               return;
+
+       EntrySetDeleted(*entry);
+       this->num_deleted++;
+       this->consider_shrink = 1;
+}
+
+#endif
diff --git a/adt/hashset.h b/adt/hashset.h
new file mode 100644 (file)
index 0000000..bcd90c5
--- /dev/null
@@ -0,0 +1,58 @@
+/**
+ * @file
+ * @date    16.03.2007
+ * @brief   Generic hashset functions
+ * @author  Matthias Braun
+ * @version $Id$
+ */
+
+/* You have to specialize this header by defining HashSet, HashSetIterator and
+ * ValueType */
+#ifdef HashSet
+
+#include <stdlib.h>
+
+#ifdef DO_REHASH
+#define HashSetEntry ValueType
+#else
+typedef struct HashSetEntry {
+       ValueType data;
+       unsigned hash;
+} HashSetEntry;
+#endif
+
+#ifndef NO_TYPEDEFS
+typedef struct HashSet         HashSet;
+typedef struct HashSetIterator HashSetIterator;
+#endif
+
+struct HashSet {
+       HashSetEntry *entries;
+       size_t num_buckets;
+       size_t enlarge_threshold;
+       size_t shrink_threshold;
+       size_t num_elements;
+       size_t num_deleted;
+       int consider_shrink;
+#ifndef NDEBUG
+       unsigned entries_version;
+#endif
+#ifdef ADDITIONAL_DATA
+       ADDITIONAL_DATA;
+#endif
+};
+
+struct HashSetIterator {
+       HashSetEntry *current_bucket;
+       HashSetEntry *end;
+#ifndef NDEBUG
+       const HashSet *set;
+       unsigned entries_version;
+#endif
+};
+
+#ifdef DO_REHASH
+#undef HashSetEntry
+#endif
+
+#endif
diff --git a/adt/obst.h b/adt/obst.h
new file mode 100644 (file)
index 0000000..c829a63
--- /dev/null
@@ -0,0 +1,15 @@
+/**
+ * @file
+ * @date    17.03.2007
+ * @brief   Makes using obstack.h easier by including obstack_chunk_alloc and
+ *          obstack_chunk_free
+ * @author  Martin Trapp, Christian Schaefer
+ * @version $Id$
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <obstack.h>
+#include "xmalloc.h"
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free  free
diff --git a/adt/pset.c b/adt/pset.c
new file mode 100644 (file)
index 0000000..96fb187
--- /dev/null
@@ -0,0 +1,36 @@
+/* collides with libfirm */
+#if 0
+
+#include <config.h>
+
+#include "pset.h"
+
+/** probing method: quadratic probing */
+#define DO_REHASH
+#define HashSet                    pset_t
+#define HashSetIterator            pset_iterator_t
+#define ValueType                  void*
+#define NullValue                  NULL
+#define DeletedValue               ((void*)-1)
+#define KeysEqual(this,key1,key2)  1
+#define SetRangeEmpty(ptr,size)    memset(ptr, 0, (size) * sizeof(HashSetEntry))
+
+#define hashset_init            pset_init
+#define hashset_init_size       pset_init_size
+#define hashset_destroy         pset_destroy
+#define hashset_insert          pset_insert
+#define hashset_remove          pset_remove
+#define hashset_find            pset_find
+#define hashset_size            pset_size
+#define hashset_iterator_init   pset_iterator_init
+#define hashset_iterator_next   pset_iterator_next
+#define hashset_remove_iterator pset_remove_iterator
+
+#include "hashset.c"
+
+int pset_contains(const pset_t *pset, const ValueType val)
+{
+       return pset_find(pset, val) != NullValue;
+}
+
+#endif
diff --git a/adt/pset.h b/adt/pset.h
new file mode 100644 (file)
index 0000000..8ae8b35
--- /dev/null
@@ -0,0 +1,111 @@
+/**
+ * @file
+ * @date    17.03.2007
+ * @brief   A hashset that contains pointers
+ * @author  Matthias Braun
+ * @version $Id$
+ */
+#ifndef _FIRM_PSET_H_
+#define _FIRM_PSET_H_
+
+/* collides with libfirm... */
+#if 0
+
+#define HashSet          pset_t
+#define HashSetIterator  pset_iterator_t
+#define ValueType        void*
+#define DO_REHASH
+#include "hashset.h"
+#undef DO_REHASH
+#undef HashSet
+#undef HashSetIterator
+#undef ValueType
+
+/**
+ * Initializes a pset
+ *
+ * @param pset   Pointer to allocated space for the pset
+ */
+void pset_init(pset_t *pset);
+
+/**
+ * Initializes a pset
+ *
+ * @param pset                Pointer to allocated space for the pset
+ * @param expected_elements   Number of elements expected in the pset (rougly)
+ */
+void pset_init_size(pset_t *pset, size_t expected_elements);
+
+/**
+ * Destroys a pset and frees the memory allocated for hashtable. The memory of
+ * the pset itself is not freed.
+ *
+ * @param pset   Pointer to the pset
+ */
+void pset_destroy(pset_t *pset);
+
+/**
+ * Inserts an element into a pset.
+ *
+ * @param pset   Pointer to the pset
+ * @param ptr    Pointer to insert into the pset
+ * @returns      1 if the pointer was inserted, 0 if it was already there
+ */
+int pset_insert(pset_t *pset, void *ptr);
+
+/**
+ * Removes an element from a pset. Does nothing if the pset doesn't contain the
+ * element.
+ *
+ * @param pset   Pointer to the pset
+ * @param ptr    Pointer to remove from the pset
+ */
+void pset_remove(pset_t *pset, const void *ptr);
+
+/**
+ * Tests whether a pset contains a pointer
+ *
+ * @param pset   Pointer to the pset
+ * @param ptr    The pointer to test
+ * @returns      1 @p pset contains the @p ptr, 0 otherwise
+ */
+int pset_contains(const pset_t *pset, const void *ptr);
+
+/**
+ * Returns the number of pointers contained in the pset
+ *
+ * @param pset   Pointer to the pset
+ * @returns      Number of pointers contained in the pset
+ */
+size_t pset_size(const pset_t *pset);
+
+/**
+ * Initializes a pset iterator. Sets the iterator before the first element in
+ * the pset.
+ *
+ * @param iterator   Pointer to already allocated iterator memory
+ * @param pset       Pointer to the pset
+ */
+void pset_iterator_init(pset_iterator_t *iterator, const pset_t *pset);
+
+/**
+ * Advances the iterator and returns the current element or NULL if all elements
+ * in the pset have been processed.
+ * @attention It is not allowed to use pset_insert or pset_remove while
+ *            iterating over a pset; pset_remove_iter is allowed.
+ *
+ * @param iterator  Pointer to the pset iterator.
+ * @returns         Next element in the pset or NULL
+ */
+void* pset_iterator_next(pset_iterator_t *iterator);
+
+/**
+ * Removes the element that the iterator currently points to from the hashset.
+ *
+ * @param pset      Pointer to the pset
+ * @param iterator  Pointer to the iterator
+ */
+void pset_remove_iterator(pset_t *pset, const pset_iterator_t *iterator);
+#endif
+
+#endif
diff --git a/adt/strset.c b/adt/strset.c
new file mode 100644 (file)
index 0000000..5f0c528
--- /dev/null
@@ -0,0 +1,28 @@
+#include <config.h>
+
+#include "strset.h"
+#include "hash_string.h"
+
+#define HashSet                    strset_t
+#define HashSetIterator            strset_iterator_t
+#define HashSetEntry               strset_entry_t
+#define ValueType                  const char*
+#define ConstKeyType               const char*
+#define NullValue                  NULL
+#define DeletedValue               ((void*)-1)
+#define Hash(this, value)          hash_string(value)
+#define KeysEqual(this,key1,key2)  (strcmp(key1, key2) == 0)
+#define SetRangeEmpty(ptr,size)    memset(ptr, 0, (size) * sizeof(strset_entry_t))
+
+#define hashset_init            strset_init
+#define hashset_init_size       strset_init_size
+#define hashset_destroy         strset_destroy
+#define hashset_insert          strset_insert
+#define hashset_remove          strset_remove
+#define hashset_find            strset_find
+#define hashset_size            strset_size
+#define hashset_iterator_init   strset_iterator_init
+#define hashset_iterator_next   strset_iterator_next
+#define hashset_remove_iterator strset_remove_iterator
+
+#include "hashset.c"
diff --git a/adt/strset.h b/adt/strset.h
new file mode 100644 (file)
index 0000000..1753e78
--- /dev/null
@@ -0,0 +1,102 @@
+#ifndef _FIRM_STRSET_H_
+#define _FIRM_STRSET_H_
+
+#define HashSet          strset_t
+#define HashSetIterator  strset_iterator_t
+#define HashSetEntry     strset_entry_t
+#define ValueType        const char*
+#include "hashset.h"
+#undef ValueType
+#undef HashSetEntry
+#undef HashSetIterator
+#undef HashSet
+
+/**
+ * Initializes a strset
+ *
+ * @param strset   Pointer to allocated space for the strset
+ */
+void strset_init(strset_t *strset);
+
+/**
+ * Initializes a strset
+ *
+ * @param strset                Pointer to allocated space for the strset
+ * @param expected_elements   Number of elements expected in the strset (rougly)
+ */
+void strset_init_size(strset_t *strset, size_t expected_elements);
+
+/**
+ * Destroys a strset and frees the memory allocated for hashtable. The memory of
+ * the strset itself is not freed.
+ *
+ * @param strset   Pointer to the strset
+ */
+void strset_destroy(strset_t *strset);
+
+/**
+ * Inserts a string into a strset.
+ *
+ * @param strset   Pointer to the strset
+ * @param ptr      Pointer to insert into the strset
+ * @returns        @p ptr if the string was inserted into the set,
+ *                 otherwise a pointer to the string already in the set
+ */
+const char *strset_insert(strset_t *strset, const char *ptr);
+
+/**
+ * Removes an element from a strset. Does nothing if the strset doesn't contain the
+ * element.
+ *
+ * @param strset   Pointer to the strset
+ * @param ptr    Pointer to remove from the strset
+ */
+void strset_remove(strset_t *strset, const char *ptr);
+
+/**
+ * Tests whether a strset contains a pointer
+ *
+ * @param strset   Pointer to the strset
+ * @param ptr    The pointer to test
+ * @returns      1 @p strset contains the @p ptr, 0 otherwise
+ */
+const char* strset_find(const strset_t *strset, const char *ptr);
+
+/**
+ * Returns the number of pointers contained in the strset
+ *
+ * @param strset   Pointer to the strset
+ * @returns      Number of pointers contained in the strset
+ */
+size_t strset_size(const strset_t *strset);
+
+/**
+ * Initializes a strset iterator. Sets the iterator before the first element in
+ * the strset.
+ *
+ * @param iterator   Pointer to already allocated iterator memory
+ * @param strset       Pointer to the strset
+ */
+void strset_iterator_init(strset_iterator_t *iterator, const strset_t *strset);
+
+/**
+ * Advances the iterator and returns the current element or NULL if all elements
+ * in the strset have been processed.
+ * @attention It is not allowed to use strset_insert or strset_remove while
+ *            iterating over a strset.
+ *
+ * @param iterator  Pointer to the strset iterator.
+ * @returns         Next element in the strset or NULL
+ */
+const char *strset_iterator_next(strset_iterator_t *iterator);
+
+/**
+ * Removes the string from the set that the iterator currently points to
+ *
+ * @param strset    Pointer to the strset
+ * @param iter      Pointer to the iterator
+ */
+void strset_remove_iterator(strset_t *strset,
+                            const strset_iterator_t *iterator);
+
+#endif
diff --git a/adt/util.h b/adt/util.h
new file mode 100644 (file)
index 0000000..19a7bf8
--- /dev/null
@@ -0,0 +1,33 @@
+/**
+ * @file
+ * @date    16.03.2007
+ * @brief   Various utility functions that wrap compiler specific extensions
+ * @author  Matthias Braun
+ * @version $Id$
+ */
+#ifndef _FIRM_UTIL_H_
+#define _FIRM_UTIL_H_
+
+/**
+ * Asserts that the constant expression x is not zero at compiletime. name has
+ * to be a unique identifier.
+ *
+ * @note This uses the fact, that double case labels are not allowed.
+ */
+#define COMPILETIME_ASSERT(x, name)    \
+       static __attribute__((unused)) void compiletime_assert_##name (int h) { \
+               switch(h) { case 0:     case (x): ; } \
+       }
+
+/**
+ * Indicates to the compiler that the value of x is very likely 1
+ * @note Only use this in speed critical code and when you are sure x is often 1
+ */
+#define LIKELY(x)   __builtin_expect((x), 1)
+/**
+ * Indicates to the compiler that it's very likely that x is 0
+ * @note Only use this in speed critical code and when you are sure x is often 0
+ */
+#define UNLIKELY(x) __builtin_expect((x), 0)
+
+#endif
diff --git a/adt/xmalloc.c b/adt/xmalloc.c
new file mode 100644 (file)
index 0000000..08de608
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Project:     libFIRM
+ * File name:   ir/adt/xmalloc.c
+ * Purpose:     Xmalloc --- never failing wrappers for malloc() & friends.
+ * Author:      Markus Armbruster
+ * Modified by:
+ * Created:     1999 by getting from fiasco
+ * CVS-ID:      $Id$
+ * Copyright:   (c) 1995, 1996 Markus Armbruster
+ * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
+ */
+
+/* @@@ ToDo: replace this file with the one from liberty.
+   [reimplement xstrdup, ... ] */
+#include <config.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "xmalloc.h"
+#include "error.h"
+#include "util.h"
+
+static inline __attribute__((noreturn))
+void out_of_memory(void) {
+       panic("out of memory");
+}
+
+void *xmalloc(size_t size) {
+       void *res = malloc(size);
+
+       if (UNLIKELY(res == NULL))
+               out_of_memory();
+
+       return res;
+}
+
+void *xcalloc(size_t num, size_t size) {
+       void *res = calloc(num, size);
+
+       if (UNLIKELY(res == NULL))
+               out_of_memory();
+
+       return res;
+}
+
+void *xrealloc(void *ptr, size_t size) {
+       void *res = realloc (ptr, size);
+
+       if (UNLIKELY(res == NULL))
+               out_of_memory();
+
+       return res;
+}
+
+char *xstrdup(const char *str) {
+       size_t len = strlen(str) + 1;
+       char *res = xmalloc(len);
+       memcpy(res, str, len);
+
+       return res;
+}
diff --git a/adt/xmalloc.h b/adt/xmalloc.h
new file mode 100644 (file)
index 0000000..e6517f3
--- /dev/null
@@ -0,0 +1,28 @@
+/**
+ * @file
+ * @brief     More comfortable allocations.
+ * @author    Markus Armbruster
+ * @data      1999 by getting from fiasco
+ * @version   $Id$
+ * Copyright: (c) 1995, 1996 Markus Armbruster
+ * Licence:   This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
+ */
+#ifndef _XMALLOC_H_
+#define _XMALLOC_H_
+
+#include <stdlib.h>
+
+__attribute__((malloc))
+void *xmalloc(size_t size);
+
+__attribute__((malloc))
+void *xcalloc(size_t num, size_t size);
+
+void *xrealloc(void *ptr, size_t size);
+
+__attribute__((malloc))
+char *xstrdup(const char *str);
+
+#define xfree(ptr)      free(ptr)
+
+#endif
diff --git a/ast.c b/ast.c
new file mode 100644 (file)
index 0000000..b05df50
--- /dev/null
+++ b/ast.c
@@ -0,0 +1,324 @@
+#include <config.h>
+
+#include "ast_t.h"
+#include "type_t.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "adt/error.h"
+
+struct obstack ast_obstack;
+
+static
+void print_const(FILE *out, const const_t *cnst)
+{
+       fprintf(out, "%d", cnst->value);
+}
+
+static
+void print_string_literal(FILE *out, const string_literal_t *string_literal)
+{
+       /* TODO escape " and non-printable chars */
+       fprintf(out, "\"%s\"", string_literal->value);
+}
+
+static
+void print_call_expression(FILE *out, const call_expression_t *call)
+{
+       print_expression(out, call->method);
+       fprintf(out, "(");
+       call_argument_t *argument = call->arguments;
+       int              first    = 1;
+       while(argument != NULL) {
+               if(!first) {
+                       fprintf(out, ", ");
+               } else {
+                       first = 0;
+               }
+               print_expression(out, argument->expression);
+
+               argument = argument->next;
+       }
+       fprintf(out, ")");
+}
+
+static
+void print_binary_expression(FILE *out, const binary_expression_t *binexpr)
+{
+       fprintf(out, "(");
+       print_expression(out, binexpr->left);
+       fprintf(out, " ");
+       switch(binexpr->type) {
+       case BINEXPR_INVALID:
+               fprintf(out, "INVOP");
+               break;
+       case BINEXPR_ASSIGN:
+               fprintf(out, "<-");
+               break;
+       case BINEXPR_ADD:
+               fprintf(out, "+");
+               break;
+       case BINEXPR_SUB:
+               fprintf(out, "-");
+               break;
+       case BINEXPR_NOTEQUAL:
+               fprintf(out, "/=");
+               break;
+       case BINEXPR_EQUAL:
+               fprintf(out, "=");
+               break;
+       case BINEXPR_LESS:
+               fprintf(out, "<");
+               break;
+       case BINEXPR_LESSEQUAL:
+               fprintf(out, "<=");
+               break;
+       case BINEXPR_GREATER:
+               fprintf(out, ">");
+               break;
+       case BINEXPR_GREATEREQUAL:
+               fprintf(out, ">=");
+               break;
+       default:
+               /* TODO: add missing ops */
+               fprintf(out, "op%d", binexpr->type);
+               break;
+       }
+       fprintf(out, " ");
+       print_expression(out, binexpr->right);
+       fprintf(out, ")");
+}
+
+void print_expression(FILE *out, const expression_t *expression)
+{
+       switch(expression->type) {
+       case EXPR_INVALID:
+               fprintf(out, "*invalid expression*");
+               break;
+       case EXPR_CONST:
+               print_const(out, (const const_t*) expression);
+               break;
+       case EXPR_STRING_LITERAL:
+               print_string_literal(out, (const string_literal_t*) expression);
+               break;
+       case EXPR_CALL:
+               print_call_expression(out, (const call_expression_t*) expression);
+               break;
+       case EXPR_BINARY:
+               print_binary_expression(out, (const binary_expression_t*) expression);
+               break;
+       case EXPR_REFERENCE:
+       case EXPR_UNARY:
+       case EXPR_SELECT:
+       case EXPR_ARRAY_ACCESS:
+       case EXPR_SIZEOF:
+               /* TODO */
+               fprintf(out, "some expression of type %d", expression->type);
+               break;
+       }
+}
+
+static
+void print_block_statement(FILE *out, int indent,
+                           const block_statement_t *block)
+{
+       statement_t *statement = block->first_statement;
+       while(statement != NULL) {
+               print_statement(out, indent + 1, statement);
+
+               statement = statement->next;
+       }
+}
+
+static
+void print_return_statement(FILE *out, const return_statement_t *statement)
+{
+       fprintf(out, "return ");
+       if(statement->return_value != NULL)
+               print_expression(out, statement->return_value);
+}
+
+static
+void print_expression_statement(FILE *out,
+                                const expression_statement_t *statement)
+{
+       print_expression(out, statement->expression);
+}
+
+static
+void print_goto_statement(FILE *out, const goto_statement_t *statement)
+{
+       fprintf(out, "goto ");
+       if(statement->label != NULL) {
+               fprintf(out, "%s", statement->label->symbol->string);
+       } else {
+               fprintf(out, "?%s", statement->label_symbol->string);
+       }
+}
+
+static
+void print_label_statement(FILE *out, const label_statement_t *statement)
+{
+       fprintf(out, ":%s", statement->symbol->string);
+}
+
+static
+void print_if_statement(FILE *out, int indent, const if_statement_t *statement)
+{
+       fprintf(out, "if ");
+       print_expression(out, statement->condition);
+       fprintf(out, ":\n");
+       if(statement->true_statement != NULL) {
+               print_statement(out, indent, statement->true_statement);
+       }
+
+       if(statement->false_statement != NULL) {
+               fprintf(out, "else:\n");
+               print_statement(out, indent, statement->false_statement);
+       }
+}
+
+static
+void print_variable_declaration_statement(FILE *out,
+                     const variable_declaration_statement_t *statement)
+{
+       fprintf(out, "var");
+       if(statement->type != NULL) {
+               fprintf(out, "<");
+               print_type(out, statement->type);
+               fprintf(out, ">");
+       }
+       fprintf(out, " %s", statement->symbol->string);
+}
+
+void print_statement(FILE *out, int indent, const statement_t *statement)
+{
+       for(int i = 0; i < indent; ++i)
+               fprintf(out, "\t");
+
+       switch(statement->type) {
+       case STATEMENT_BLOCK:
+               print_block_statement(out, indent,
+                                     (const block_statement_t*) statement);
+               break;
+       case STATEMENT_RETURN:
+               print_return_statement(out, (const return_statement_t*) statement);
+               break;
+       case STATEMENT_EXPRESSION:
+               print_expression_statement(out,
+                                          (const expression_statement_t*) statement);
+               break;
+       case STATEMENT_LABEL:
+               print_label_statement(out, (const label_statement_t*) statement);
+               break;
+       case STATEMENT_GOTO:
+               print_goto_statement(out, (const goto_statement_t*) statement);
+               break;
+       case STATEMENT_IF:
+               print_if_statement(out, indent, (const if_statement_t*) statement);
+               break;
+       case STATEMENT_VARIABLE_DECLARATION:
+               print_variable_declaration_statement(out,
+                       (const variable_declaration_statement_t*) statement);
+               break;
+       case STATEMENT_INVALID:
+       default:
+               fprintf(out, "*invalid statement*");
+               break;
+
+       }
+       fprintf(out, "\n");
+}
+
+static
+void print_method_parameters(FILE *out, const method_parameter_t *parameters,
+                             const method_type_t *method_type)
+{
+       fprintf(out, "(");
+
+       int                            first          = 1;
+       const method_parameter_t      *parameter      = parameters;
+       const method_parameter_type_t *parameter_type
+               = method_type->parameter_types;
+       while(parameter != NULL && parameter_type != NULL) {
+               if(!first) {
+                       fprintf(out, ", ");
+               } else {
+                       first = 0;
+               }
+
+               print_type(out, parameter_type->type);
+               fprintf(out, " %s", parameter->symbol->string);
+
+               parameter      = parameter->next;
+               parameter_type = parameter_type->next;
+       }
+       assert(parameter == NULL && parameter_type == NULL);
+
+       fprintf(out, ")");
+}
+
+static
+void print_method(FILE *out, const method_t *method)
+{
+       method_type_t *type = method->type;
+
+       fprintf(out, "func ");
+       print_type(out, type->result_type);
+       fprintf(out, " %s", method->symbol->string);
+
+       print_method_parameters(out, method->parameters, type);
+
+       if(method->statement != NULL) {
+               fprintf(out, ":\n");
+               print_statement(out, 0, method->statement);
+       } else {
+               fprintf(out, "\n");
+       }
+}
+
+static
+void print_namespace_entry(FILE *out, const namespace_entry_t *entry)
+{
+       switch(entry->type) {
+       case NAMESPACE_ENTRY_METHOD:
+               print_method(out, (const method_t*) entry);
+               break;
+       case NAMESPACE_ENTRY_VARIABLE:
+               /* TODO */
+               fprintf(out, "some namespace entry of type %d\n\n", entry->type);
+               break;
+       case NAMESPACE_ENTRY_INVALID:
+       default:
+               fprintf(out, "invalid namespace entry (%d)\n", entry->type);
+               break;
+       }
+}
+
+void print_ast(FILE *out, const namespace_t *namespace)
+{
+       namespace_entry_t *entry = namespace->entries;
+
+       while(entry != NULL) {
+               print_namespace_entry(out, entry);
+
+               entry = entry->next;
+       }
+}
+
+void init_ast_module(void)
+{
+       obstack_init(&ast_obstack);
+}
+
+void exit_ast_module(void)
+{
+       obstack_free(&ast_obstack, NULL);
+}
+
+void* (allocate_ast) (size_t size)
+{
+       return _allocate_ast(size);
+}
diff --git a/ast.h b/ast.h
new file mode 100644 (file)
index 0000000..30560cb
--- /dev/null
+++ b/ast.h
@@ -0,0 +1,47 @@
+#ifndef AST_H
+#define AST_H
+
+#include <stdio.h>
+
+typedef struct expression_t               expression_t;
+typedef struct const_t                    const_t;
+typedef struct string_literal_t           string_literal_t;
+typedef struct cast_expression_t          cast_expression_t;
+typedef struct call_argument_t            call_argument_t;
+typedef struct type_argument_t            type_argument_t;
+typedef struct call_expression_t          call_expression_t;
+typedef struct binary_expression_t        binary_expression_t;
+typedef struct unary_expression_t         unary_expression_t;
+typedef struct select_expression_t        select_expression_t;
+typedef struct array_access_expression_t  array_access_expression_t;
+typedef struct sizeof_expression_t        sizeof_expression_t;
+typedef struct conditional_expression_t   conditional_expression_t;
+typedef struct expression_list_element_t  expression_list_element_t;
+typedef struct comma_expression_t         comma_expression_t;
+
+typedef struct statement_t                statement_t;
+typedef struct block_statement_t          block_statement_t;
+typedef struct return_statement_t         return_statement_t;
+typedef struct if_statement_t             if_statement_t;
+typedef struct variable_declaration_statement_t
+                                          variable_declaration_statement_t;
+typedef struct expression_statement_t     expression_statement_t;
+typedef struct goto_statement_t           goto_statement_t;
+typedef struct label_statement_t          label_statement_t;
+
+typedef enum   namespace_entry_type_t     namespace_entry_type_t;
+typedef struct namespace_entry_t          namespace_entry_t;
+typedef struct namespace_t                namespace_t;
+typedef struct method_parameter_t         method_parameter_t;
+typedef struct method_t                   method_t;
+typedef struct global_variable_t          global_variable_t;
+
+void  init_ast_module(void);
+void  exit_ast_module(void);
+
+void  print_expression(FILE *out, const expression_t *expression);
+void  print_statement(FILE *out, int indent, const statement_t *statement);
+void  print_ast(FILE *out, const namespace_t *namespace);
+void *allocate_ast(size_t size);
+
+#endif
diff --git a/ast_t.h b/ast_t.h
new file mode 100644 (file)
index 0000000..415bbc6
--- /dev/null
+++ b/ast_t.h
@@ -0,0 +1,267 @@
+#ifndef AST_T_H
+#define AST_T_H
+
+#include "ast.h"
+#include "symbol.h"
+#include "lexer_t.h"
+#include "type.h"
+#include "adt/obst.h"
+
+extern struct obstack ast_obstack;
+
+typedef enum {
+       EXPR_INVALID = 0,
+       EXPR_REFERENCE,
+       EXPR_CONST,
+       EXPR_STRING_LITERAL,
+       EXPR_CALL,
+       EXPR_UNARY,
+       EXPR_BINARY,
+       EXPR_SELECT,
+       EXPR_ARRAY_ACCESS,
+       EXPR_SIZEOF,
+} expresion_type_t;
+
+struct expression_t {
+       expresion_type_t   type;
+       type_t            *datatype;
+       source_position_t  source_position;
+};
+
+struct const_t {
+       expression_t  expression;
+       int           value;
+};
+
+struct string_literal_t {
+       expression_t  expression;
+       const char   *value;
+};
+
+struct reference_expression_t {
+       expression_t                      expression;
+       symbol_t                         *symbol;
+       union {
+               variable_declaration_statement_t *variable;
+               method_t                         *method;
+               global_variable_t                *global_variable;
+               method_parameter_t               *method_parameter;
+       } r;
+};
+
+struct call_argument_t {
+       expression_t    *expression;
+       call_argument_t *next;
+};
+
+struct call_expression_t {
+       expression_t     expression;
+       expression_t    *method;
+       call_argument_t *arguments;
+};
+
+typedef enum {
+       UNEXPR_INVALID = 0,
+       UNEXPR_NEGATE,
+       UNEXPR_BITWISE_NEGATE,
+       UNEXPR_NOT,
+       UNEXPR_DEREFERENCE,
+       UNEXPR_TAKE_ADDRESS,
+       UNEXPR_POSTFIX_INCREMENT,
+       UNEXPR_POSTFIX_DECREMENT,
+       UNEXPR_PREFIX_INCREMENT,
+       UNEXPR_PREFIX_DECREMENT,
+       UNEXPR_CAST
+} unary_expression_type_t;
+
+struct unary_expression_t {
+       expression_t             expression;
+       unary_expression_type_t  type;
+       expression_t            *value;
+};
+
+typedef enum {
+       BINEXPR_INVALID = 0,
+       BINEXPR_ADD,
+       BINEXPR_SUB,
+       BINEXPR_MUL,
+       BINEXPR_DIV,
+       BINEXPR_MOD,
+       BINEXPR_EQUAL,
+       BINEXPR_NOTEQUAL,
+       BINEXPR_LESS,
+       BINEXPR_LESSEQUAL,
+       BINEXPR_GREATER,
+       BINEXPR_GREATEREQUAL,
+       BINEXPR_BITWISE_AND,
+       BINEXPR_BITWISE_OR,
+       BINEXPR_BITWSIE_XOR,
+       BINEXPR_LOGICAL_AND,
+       BINEXPR_LOGICAL_OR,
+       BINEXPR_SHIFTLEFT,
+       BINEXPR_SHIFTRIGHT,
+       BINEXPR_ASSIGN,
+       BINEXPR_MUL_ASSIGN,
+       BINEXPR_DIV_ASSIGN,
+       BINEXPR_MOD_ASSIGN,
+       BINEXPR_ADD_ASSIGN,
+       BINEXPR_SUB_ASSIGN,
+       BINEXPR_SHIFTLEFT_ASSIGN,
+       BINEXPR_SHIFTRIGHT_ASSIGN,
+       BINEXPR_BITWISE_AND_ASSIGN,
+       BINEXPR_BITWISE_XOR_ASSIGN,
+       BINEXPR_BITWISE_OR_ASSIGN
+} binary_expression_type_t;
+
+struct binary_expression_t {
+       expression_t              expression;
+       binary_expression_type_t  type;
+       expression_t             *left;
+       expression_t             *right;
+};
+
+struct select_expression_t {
+       expression_t      expression;
+       expression_t     *compound;
+       symbol_t         *symbol;
+
+       compound_entry_t *compound_entry;
+};
+
+struct array_access_expression_t {
+       expression_t  expression;
+       expression_t *array_ref;
+       expression_t *index;
+};
+
+struct sizeof_expression_t {
+       expression_t  expression;
+       union {
+               type_t       *type;
+               expression_t *size_expression;
+       } v;
+};
+
+struct conditional_expression_t {
+       expression_t  expression;
+       expression_t *condition;
+       expression_t *true_expression;
+       expression_t *false_expression;
+};
+
+struct expression_list_element_t {
+       expression_t *expression;
+       expression_t *next;
+};
+
+struct comma_expression_t {
+       expression_t               expression;
+       expression_list_element_t *expressions;
+};
+
+typedef enum {
+       STATEMENT_INVALID,
+       STATEMENT_BLOCK,
+       STATEMENT_RETURN,
+       STATEMENT_VARIABLE_DECLARATION,
+       STATEMENT_IF,
+       STATEMENT_EXPRESSION,
+       STATEMENT_GOTO,
+       STATEMENT_LABEL
+} statement_type_t;
+
+struct statement_t {
+       statement_type_t   type;
+       statement_t       *next;
+       source_position_t  source_position;
+};
+
+struct return_statement_t {
+       statement_t   statement;
+       expression_t *return_value;
+};
+
+struct block_statement_t {
+       statement_t  statement;
+       statement_t *first_statement;
+};
+
+struct variable_declaration_statement_t {
+       statement_t  statement;
+       type_t      *type;
+       symbol_t    *symbol;
+
+       int          value_number; /**< filled in by semantic phase */
+       int          refs;
+};
+
+struct if_statement_t {
+       statement_t   statement;
+       expression_t *condition;
+       statement_t  *true_statement;
+       statement_t  *false_statement;
+};
+
+struct goto_statement_t {
+       statement_t        statement;
+       symbol_t          *label_symbol;
+       label_statement_t *label;
+};
+
+struct label_statement_t {
+       statement_t        statement;
+       symbol_t          *symbol;
+};
+
+struct expression_statement_t {
+       statement_t   statement;
+       expression_t *expression;
+};
+
+enum namespace_entry_type_t {
+       NAMESPACE_ENTRY_INVALID,
+       NAMESPACE_ENTRY_METHOD,
+       NAMESPACE_ENTRY_VARIABLE,
+};
+
+struct namespace_entry_t {
+       namespace_entry_type_t  type;
+       namespace_entry_t      *next;
+       source_position_t       source_position;
+};
+
+struct method_parameter_t {
+       method_parameter_t *next;
+       symbol_t           *symbol;
+       type_t             *type;
+       int                 num;
+};
+
+struct method_t {
+       namespace_entry_t   namespace_entry;
+       symbol_t           *symbol;
+       method_type_t      *type;
+       method_parameter_t *parameters;
+
+       statement_t        *statement;
+};
+
+struct global_variable_t {
+       namespace_entry_t  namespace_entry;
+       symbol_t          *symbol;
+       type_t            *type;
+};
+
+struct namespace_t {
+       namespace_entry_t *entries;
+};
+
+static inline
+void *_allocate_ast(size_t size)
+{
+       return obstack_alloc(&ast_obstack, size);
+}
+
+#define allocate_ast(size)                 _allocate_ast(size)
+
+#endif
diff --git a/config.h b/config.h
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/lexer.c b/lexer.c
new file mode 100644 (file)
index 0000000..acd2ef6
--- /dev/null
+++ b/lexer.c
@@ -0,0 +1,816 @@
+#include <config.h>
+
+#include "lexer_t.h"
+#include "token_t.h"
+#include "symbol_table_t.h"
+#include "adt/error.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+
+//#define DEBUG_CHARS
+#define MAX_PUTBACK 1
+
+static
+void error_prefix_at(lexer_t *this, const char *input_name, unsigned linenr)
+{
+       (void) this;
+       fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
+}
+
+static
+void error_prefix(lexer_t *this)
+{
+       error_prefix_at(this, this->source_position.input_name,
+                       this->source_position.linenr);
+}
+
+static
+void parse_error(lexer_t *this, const char *msg)
+{
+       error_prefix(this);
+       fprintf(stderr, "%s\n", msg);
+}
+
+static inline
+void next_char(lexer_t *this)
+{
+       this->bufpos++;
+       if(this->bufpos >= this->bufend) {
+               size_t s = fread(this->buf + MAX_PUTBACK, 1,
+                                sizeof(this->buf) - MAX_PUTBACK, this->input);
+               if(s == 0) {
+                       this->c = EOF;
+                       return;
+               }
+               this->bufpos = this->buf + MAX_PUTBACK;
+               this->bufend = this->buf + MAX_PUTBACK + s;
+       }
+       this->c = *(this->bufpos);
+#ifdef DEBUG_CHARS
+       printf("nchar '%c'\n", this->c);
+#endif
+}
+
+static inline
+void put_back(lexer_t *this, int c)
+{
+       char *p = (char*) this->bufpos - 1;
+       this->bufpos--;
+       assert(p >= this->buf);
+       *p = c;
+
+#ifdef DEBUG_CHARS
+       printf("putback '%c'\n", c);
+#endif
+}
+
+static
+int replace_trigraph(lexer_t *this)
+{
+#define MATCH_TRIGRAPH(ch,replacement)           \
+       case ch:                                     \
+               this->c = replacement;                   \
+               return 1;
+
+       switch(this->c) {
+       MATCH_TRIGRAPH('=', '#')
+       MATCH_TRIGRAPH('(', '[')
+       MATCH_TRIGRAPH('/', '\\')
+       MATCH_TRIGRAPH(')', ']')
+       MATCH_TRIGRAPH('\'', '^')
+       MATCH_TRIGRAPH('<', '{')
+       MATCH_TRIGRAPH('!', '|')
+       MATCH_TRIGRAPH('>', '}')
+       MATCH_TRIGRAPH('-', '~')
+       default:
+               break;
+       }
+
+       return 0;
+}
+
+static
+void parse_symbol(lexer_t *this, token_t *token)
+{
+       symbol_t *symbol;
+       char     *string;
+
+       obstack_1grow(&symbol_obstack, this->c);
+       next_char(this);
+
+       while(1) {
+               switch(this->c) {
+               case '\\':
+                       next_char(this);
+                       if(this->c == '\n') {
+                               next_char(this);
+                               this->source_position.linenr++;
+                               break;
+                       }
+
+               case 'A' ... 'Z':
+               case 'a' ... 'z':
+               case '_':
+                       obstack_1grow(&symbol_obstack, this->c);
+                       next_char(this);
+                       break;
+
+               case '?':
+                       next_char(this);
+                       if(this->c != '?') {
+                               put_back(this, this->c);
+                               this->c = '?';
+                               goto end_symbol;
+                       }
+                       next_char(this);
+                       if(replace_trigraph(this))
+                               break;
+                       put_back(this, '?');
+                       put_back(this, this->c);
+                       this->c = '?';
+                       goto end_symbol;
+
+               default:
+                       goto end_symbol;
+               }
+       }
+end_symbol:
+       obstack_1grow(&symbol_obstack, '\0');
+
+       string = obstack_finish(&symbol_obstack);
+       symbol = symbol_table_insert(string);
+
+       if(symbol->ID > 0) {
+               token->type = symbol->ID;
+       } else {
+               token->type = T_IDENTIFIER;
+       }
+       token->v.symbol = symbol;
+
+       if(symbol->string != string) {
+               obstack_free(&symbol_obstack, string);
+       }
+}
+
+#if 0
+static
+preprocessor_token_type_t parse_pp_symbol(lexer_t *this)
+{
+       do {
+               obstack_1grow(&symbol_obstack, this->c);
+               next_char(this);
+       } while(is_ident_char(this->c));
+       obstack_1grow(&symbol_obstack, '\0');
+
+       char     *string = obstack_finish(&symbol_obstack);
+       symbol_t *symbol = preprocessor_symbol_table_find(string);
+       obstack_free(&symbol_obstack, string);
+
+       if(symbol == 0)
+               return TP_ERROR;
+
+       return symbol->ID;
+}
+#endif
+
+static
+void parse_number_hex(lexer_t *this, token_t *token)
+{
+       assert(this->c == 'x' || this->c == 'X');
+       next_char(this);
+
+       if (!isdigit(this->c) &&
+               !('A' <= this->c && this->c <= 'F') &&
+               !('a' <= this->c && this->c <= 'f')) {
+               parse_error(this, "premature end of hex number literal");
+               token->type = T_ERROR;
+               return;
+       }
+
+       int value = 0;
+       for(;;) {
+               if (isdigit(this->c)) {
+                       value = 16 * value + this->c - '0';
+               } else if ('A' <= this->c && this->c <= 'F') {
+                       value = 16 * value + this->c - 'A' + 10;
+               } else if ('a' <= this->c && this->c <= 'f') {
+                       value = 16 * value + this->c - 'a' + 10;
+               } else {
+                       token->type     = T_INTEGER;
+                       token->v.intvalue = value;
+                       return;
+               }
+               next_char(this);
+       }
+}
+
+static
+void parse_number_oct(lexer_t *this, token_t *token)
+{
+       assert(this->c == 'o' || this->c == 'O');
+       next_char(this);
+
+       int value = 0;
+       for(;;) {
+               if ('0' <= this->c && this->c <= '7') {
+                       value = 8 * value + this->c - '0';
+               } else {
+                       token->type     = T_INTEGER;
+                       token->v.intvalue = value;
+                       return;
+               }
+               next_char(this);
+       }
+}
+
+static
+void parse_number_dec(lexer_t *this, token_t *token, int first_char)
+{
+       int value = 0;
+       if(first_char > 0) {
+               assert(first_char >= '0' && first_char <= '9');
+               value = first_char - '0';
+       }
+
+       for(;;) {
+               if (isdigit(this->c)) {
+                       value = 10 * value + this->c - '0';
+               } else {
+                       token->type     = T_INTEGER;
+                       token->v.intvalue = value;
+                       return;
+               }
+               next_char(this);
+       }
+}
+
+static
+void parse_number(lexer_t *this, token_t *token)
+{
+       // TODO check for overflow
+       // TODO check for various invalid inputs sequences
+
+       if (this->c == '0') {
+               next_char(this);
+               switch (this->c) {
+                       case 'X':
+                       case 'x': parse_number_hex(this, token); break;
+                       case 'o':
+                       case 'O': parse_number_oct(this, token); break;
+                       default:  parse_number_dec(this, token, '0');
+               }
+       } else {
+               parse_number_dec(this, token, 0);
+       }
+}
+
+static
+int parse_escape_sequence(lexer_t *this)
+{
+       int c = this->c;
+       next_char(this);
+
+       switch(c) {
+       case '"': return '"';
+       case '\'': return'\'';
+       case '\\':
+               if(this->c == '\n') {
+                       this->source_position.linenr++;
+                       next_char(this);
+                       return parse_escape_sequence(this);
+               }
+               return '\\';
+       case 'a': return '\a';
+       case 'b': return '\b';
+       case 'f': return '\f';
+       case 'n': return '\n';
+       case 'r': return '\r';
+       case 't': return '\t';
+       case 'v': return '\v';
+       case 'x': /* TODO parse hex number ... */
+               parse_error(this, "hex escape sequences not implemented yet");
+               return EOF;
+       case 0 ... 8: /* TODO parse octal number ... */
+               parse_error(this, "octal escape sequences not implemented yet");
+               return EOF;
+       case '?':
+               if(this->c != '?') {
+                       return '?';
+               }
+               /* might be a trigraph */
+               next_char(this);
+               if(replace_trigraph(this)) {
+                       return parse_escape_sequence(this);
+               }
+               put_back(this, '?');
+               return '?';
+
+       case EOF:
+               parse_error(this, "reached end of file while parsing escape sequence");
+               return EOF;
+       default:
+               parse_error(this, "unknown escape sequence\n");
+               return EOF;
+       }
+}
+
+static
+void parse_string_literal(lexer_t *this, token_t *token)
+{
+       unsigned    start_linenr = this->source_position.linenr;
+       char       *string;
+       const char *result;
+
+       assert(this->c == '"');
+       next_char(this);
+
+       while(1) {
+               switch(this->c) {
+               case '?':
+                       next_char(this);
+                       if(this->c != '?') {
+                               obstack_1grow(&symbol_obstack, '?');
+                               break;
+                       }
+                       next_char(this);
+                       if(replace_trigraph(this))
+                               break;
+                       obstack_1grow(&symbol_obstack, '?');
+                       put_back(this, this->c);
+                       this->c = '?';
+                       break;
+
+               case '\\':
+                       next_char(this);
+                       if(this->c == '\n') {
+                               next_char(this);
+                               this->source_position.linenr++;
+                               break;
+                       }
+                       int c = parse_escape_sequence(this);
+                       obstack_1grow(&symbol_obstack, c);
+                       break;
+
+               case EOF:
+                       error_prefix_at(this, this->source_position.input_name,
+                                       start_linenr);
+                       fprintf(stderr, "string has no end\n");
+                       token->type = T_ERROR;
+                       return;
+
+               case '"':
+                       next_char(this);
+                       goto end_of_string;
+
+               default:
+                       obstack_1grow(&symbol_obstack, this->c);
+                       next_char(this);
+                       break;
+               }
+       }
+
+end_of_string:
+
+       /* TODO: concatenate multiple strings separated by whitespace... */
+
+       /* add finishing 0 to the string */
+       obstack_1grow(&symbol_obstack, '\0');
+       string = obstack_finish(&symbol_obstack);
+
+       /* check if there is already a copy of the string */
+       result = strset_insert(&this->stringset, string);
+       if(result != string) {
+               obstack_free(&symbol_obstack, string);
+       }
+
+       token->type     = T_STRING_LITERAL;
+       token->v.string = result;
+}
+
+static
+void skip_multiline_comment(lexer_t *this)
+{
+       unsigned start_linenr = this->source_position.linenr;
+
+       while(1) {
+               switch(this->c) {
+               case '*':
+                       next_char(this);
+                       if(this->c == '/') {
+                               next_char(this);
+                               return;
+                       }
+                       break;
+               case '?':
+                       next_char(this);
+                       if(this->c != '?')
+                               break;
+                       next_char(this);
+                       if(replace_trigraph(this))
+                               break;
+                       put_back(this, '?');
+                       /* we don't put back the 2nd ? as the comment text is discarded
+                        * anyway */
+                       break;
+
+               case '\n':
+                       this->source_position.linenr++;
+                       next_char(this);
+                       break;
+               case EOF:
+                       error_prefix_at(this, this->source_position.input_name,
+                                       start_linenr);
+                       fprintf(stderr, "at end of file while looking for comment end\n");
+                       return;
+               default:
+                       next_char(this);
+                       break;
+               }
+       }
+}
+
+static
+void skip_line_comment(lexer_t *this)
+{
+       while(1) {
+               switch(this->c) {
+               case '?':
+                       next_char(this);
+                       if(this->c != '?')
+                               break;
+                       next_char(this);
+                       if(replace_trigraph(this))
+                               break;
+                       put_back(this, '?');
+                       /* we don't put back the 2nd ? as the comment text is discarded
+                        * anyway */
+                       break;
+
+               case '\\':
+                       next_char(this);
+                       if(this->c == '\n') {
+                               next_char(this);
+                               this->source_position.linenr++;
+                       }
+                       break;
+
+               case EOF:
+               case '\n':
+                       return;
+               default:
+                       next_char(this);
+                       break;
+               }
+       }
+}
+
+#if 0
+static
+void eat_until_newline(lexer_t *this)
+{
+       while(this->c != '\n') {
+               next_char(this);
+       }
+}
+#endif
+
+#if 0
+static
+void parse_preprocessor_directive(lexer_t *this, token_t *result_token)
+{
+       (void) result_token;
+       /* skip whitespaces */
+       while(this->c == ' ' || this->c == '\t' || this->c == '\r') {
+               next_char(this);
+       }
+}
+#endif
+
+void preprocessor_next_token(lexer_t *this, token_t *token)
+{
+       /* skip whitespaces */
+       while(this->c == ' ' || this->c == '\t' || this->c == '\r') {
+               next_char(this);
+       }
+
+       switch(this->c) {
+       case 'A' ... 'Z':
+       case 'a' ... 'z':
+       case '_':
+               parse_symbol(this, token);
+       }
+}
+
+void lexer_next_token(lexer_t *this, token_t *token)
+{
+       int line_begin = 0;
+
+       /* skip whitespaces */
+       while(this->c == ' ' || this->c == '\t' || this->c == '\n'
+             || this->c == '\r') {
+               if(this->c == '\n') {
+                       line_begin = 1;
+                       this->source_position.linenr++;
+               }
+               next_char(this);
+       }
+
+       switch(this->c) {
+       case 'A' ... 'Z':
+       case 'a' ... 'z':
+       case '_':
+               parse_symbol(this, token);
+               break;
+
+       case '0' ... '9':
+               parse_number(this, token);
+               break;
+
+       case '"':
+               parse_string_literal(this, token);
+               break;
+
+       case '\'':
+               next_char(this);
+               if(this->c == '\\') {
+                       next_char(this);
+                       token->type       = T_INTEGER;
+                       token->v.intvalue = parse_escape_sequence(this);
+               } else {
+                       if(this->c == '\n') {
+                               parse_error(this, "newline while parsing character constant");
+                               this->source_position.linenr++;
+                       }
+                       token->type       = T_INTEGER;
+                       token->v.intvalue = this->c;
+                       next_char(this);
+               }
+               if(this->c != '\'') {
+                       parse_error(this, "multibyte character constant");
+                       token->type = T_ERROR;
+               } else {
+                       next_char(this);
+               }
+               break;
+
+       case '\\':
+               next_char(this);
+               if(this->c == '\n') {
+                       next_char(this);
+                       this->source_position.linenr++;
+                       lexer_next_token(this, token);
+                       return;
+               } else {
+                       parse_error(this, "unexpected '\\' found");
+                       token->type = T_ERROR;
+               }
+               break;
+
+#define MAYBE1(ch, set_type)                           \
+               next_char(this);                               \
+               while(1) {                                     \
+                       switch(this->c) {                          \
+                       case ch:                                   \
+                               next_char(this);                       \
+                               token->type = set_type;                \
+                               return;                                \
+
+#define MAYBE(ch, set_type)                            \
+                       case ch:                                   \
+                               next_char(this);                       \
+                               token->type = set_type;                \
+                               return;
+
+#define ELSE(set_type)                                 \
+                       case '?':                                  \
+                               next_char(this);                       \
+                               if(this->c != '?') {                   \
+                                       put_back(this, this->c);           \
+                                       this->c = '?';                     \
+                                       token->type = set_type;            \
+                                       return;                            \
+                               }                                      \
+                               next_char(this);                       \
+                               if(replace_trigraph(this))             \
+                                       break;                             \
+                               put_back(this, '?');                   \
+                               put_back(this, this->c);               \
+                               this->c = '?';                         \
+                               token->type = set_type;                \
+                               return;                                \
+                                                       \
+                       case '\\':                                 \
+                               next_char(this);                       \
+                               if(this->c == '\n') {                  \
+                                       next_char(this);                   \
+                                       this->source_position.linenr++;    \
+                                       break;                             \
+                               }                                      \
+                               /* fallthrough */                      \
+                       default:                                   \
+                               token->type = set_type;                \
+                               return;                                \
+                       }                                          \
+               } /* end of while(1) */                        \
+               break;
+
+       case '.':
+               next_char(this);
+               if(this->c == '.') {
+                       next_char(this);
+                       if(this->c == '.') {
+                               next_char(this);
+                               token->type = T_DOTDOTDOT;
+                       } else {
+                               put_back(this, '.');
+                               token->type = '.';
+                       }
+               } else {
+                       token->type = '.';
+               }
+               break;
+       case '&':
+               MAYBE1('&', T_ANDAND)
+               MAYBE('=', T_ANDEQUAL)
+               ELSE('&')
+       case '*':
+               MAYBE1('=', T_ASTERISKEQUAL)
+               ELSE('*')
+       case '+':
+               MAYBE1('+', T_PLUSPLUS)
+               MAYBE('=', T_PLUSEQUAL)
+               ELSE('+')
+       case '-':
+               MAYBE1('-', T_MINUSMINUS)
+               MAYBE('=', T_MINUSEQUAL)
+               ELSE('-')
+       case '!':
+               MAYBE1('=', T_EXCLAMATIONMARKEQUAL)
+               ELSE('!')
+       case '/':
+               MAYBE1('=', T_SLASHEQUAL)
+                       case '*':
+                               next_char(this);
+                               skip_multiline_comment(this);
+                               lexer_next_token(this, token);
+                               return;
+                       case '/':
+                               next_char(this);
+                               skip_line_comment(this);
+                               lexer_next_token(this, token);
+                               return;
+               ELSE('/')
+       case '%':
+               MAYBE1('=', T_PERCENTEQUAL)
+                       case ':':
+                               /* TODO find trigraphs... */
+                               next_char(this);
+                               if(this->c == '%') {
+                                       next_char(this);
+                                       if(this->c == ':') {
+                                               next_char(this);
+                                               token->type = T_PERCENTCOLONPERCENTCOLON;
+                                       } else {
+                                               put_back(this, '%');
+                                               token->type = T_PERCENTCOLON;
+                                       }
+                                       return;
+                               }
+                               token->type = T_PERCENTCOLON;
+                               return;
+               MAYBE('>', T_PERCENTGREATER)
+               ELSE('%')
+       case '<':
+               MAYBE1(':', T_LESSCOLON)
+               MAYBE('%', T_LESSPERCENT)
+                       case '<':
+                               /* TODO trigraphs... */
+                               next_char(this);
+                               if(this->c == '<') {
+                                       next_char(this);
+                                       if(this->c == '=') {
+                                               next_char(this);
+                                               token->type = T_LESSLESSEQUAL;
+                                       } else {
+                                               token->type = T_LESSLESS;
+                                       }
+                               } else {
+                                       token->type = T_LESS;
+                               }
+                               return;
+               ELSE('<')
+       case '>':
+               next_char(this);
+               while(1) {
+                       switch(this->c) {
+                       case '>':
+                               next_char(this);
+                               /* TODO trigraphs */
+                               if(this->c == '=') {
+                                       next_char(this);
+                                       token->type = T_GREATERGREATEREQUAL;
+                               } else {
+                                       token->type = T_GREATERGREATER;
+                               }
+                               break;
+               ELSE('>')
+       case '^':
+               MAYBE1('=', T_CARETEQUAL)
+               ELSE('^')
+       case '|':
+               MAYBE1('=', T_PIPEEQUAL)
+               MAYBE('|', T_PIPEPIPE)
+               ELSE('|')
+       case ':':
+               MAYBE1('>', T_COLONGREATER)
+               ELSE(':')
+       case '=':
+               MAYBE1('=', T_EQUALEQUAL)
+               ELSE('=')
+       case '#':
+               MAYBE1('#', T_HASHHASH)
+#if 0
+               else {
+                       if(line_begin) {
+                               parse_preprocessor_directive(this, token);
+                               return;
+                       } else {
+                               token->type = '#';
+                       }
+#else
+               ELSE('#')
+#endif
+
+       case '?':
+               next_char(this);
+               /* just a simple ? */
+               if(this->c != '?') {
+                       token->type = '?';
+                       break;
+               }
+               /* might be a trigraph */
+               next_char(this);
+               if(replace_trigraph(this)) {
+                       lexer_next_token(this, token);
+                       return;
+               }
+               put_back(this, this->c);
+               this->c = '?';
+               token->type = '?';
+               break;
+
+       case '[':
+       case ']':
+       case '(':
+       case ')':
+       case '{':
+       case '}':
+       case '~':
+       case ';':
+       case ',':
+               token->type = this->c;
+               next_char(this);
+               break;
+
+       case EOF:
+               token->type = T_EOF;
+               break;
+
+       default:
+               error_prefix(this);
+               fprintf(stderr, "unknown character '%c' found\n", this->c);
+               token->type = T_ERROR;
+               next_char(this);
+               break;
+       }
+}
+
+void lexer_init(lexer_t *this, FILE *stream, const char *input_name)
+{
+       memset(this, 0, sizeof(this[0]));
+
+       this->input = stream;
+
+       this->source_position.linenr     = 0;
+       this->source_position.input_name = input_name;
+       strset_init(&this->stringset);
+
+       /* we place a virtual '\n' at the beginning so the lexer knows we're at the
+        * beginning of a line */
+       this->c = '\n';
+}
+
+void lexer_destroy(lexer_t *this)
+{
+       (void) this;
+}
+
+static __attribute__((unused))
+void dbg_pos(const source_position_t source_position)
+{
+       fprintf(stdout, "%s:%d\n", source_position.input_name, source_position.linenr);
+       fflush(stdout);
+}
diff --git a/lexer.h b/lexer.h
new file mode 100644 (file)
index 0000000..f22f445
--- /dev/null
+++ b/lexer.h
@@ -0,0 +1,11 @@
+#ifndef LEXER_H
+#define LEXER_H
+
+#include "symbol_table_t.h"
+#include "token_t.h"
+
+typedef struct lexer_t lexer_t;
+
+void lexer_next_token(lexer_t *lexer, token_t *token);
+
+#endif
diff --git a/lexer_t.h b/lexer_t.h
new file mode 100644 (file)
index 0000000..f4da9b2
--- /dev/null
+++ b/lexer_t.h
@@ -0,0 +1,33 @@
+#ifndef LEXER_T_H
+#define LEXER_T_H
+
+#include "lexer.h"
+
+#include <stdio.h>
+#include "symbol_table_t.h"
+#include "adt/obst.h"
+#include "adt/strset.h"
+
+#define MAX_INDENT               256
+
+typedef struct source_position_t source_position_t;
+struct source_position_t {
+       const char *input_name;
+       unsigned    linenr;
+};
+
+struct lexer_t {
+       int               c;
+       source_position_t source_position;
+       FILE             *input;
+       char              buf[1024];
+       const char       *bufend;
+       const char       *bufpos;
+       strset_t          stringset;
+};
+
+void lexer_init(lexer_t *lexer, FILE *stream, const char *input_name);
+
+void lexer_destroy(lexer_t *lexer);
+
+#endif
diff --git a/lextest/legalc/comment.c b/lextest/legalc/comment.c
new file mode 100644 (file)
index 0000000..e1c4fb9
--- /dev/null
@@ -0,0 +1,17 @@
+#include <stdio.h>
+
+int main()
+{
+       //     a comment\
+       printf("don't print me\n");
+
+       //       blo \??= \??/
+       printf("don't print me too\n");
+
+       //     another comment???/
+       printf("don't print me 3\n");
+
+       /*       *??/
+/   printf("print me\n");
+       return 0;
+}
diff --git a/lextest/legalc/operators.c b/lextest/legalc/operators.c
new file mode 100644 (file)
index 0000000..89f44cf
--- /dev/null
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+
+}
diff --git a/lextest/legalc/splicetest.c b/lextest/legalc/splicetest.c
new file mode 100644 (file)
index 0000000..6591c9e
--- /dev/null
@@ -0,0 +1,11 @@
+#i\
+n??/
+clude <stdio\
+.h>
+
+int main()
+{
+       printf("gut\??/
+n");
+       return 0;
+}
diff --git a/lextest/legalc/string.c b/lextest/legalc/string.c
new file mode 100644 (file)
index 0000000..f2bbc1c
--- /dev/null
@@ -0,0 +1,9 @@
+#include <stdio.h>
+
+int main()
+{
+       printf("String "// Hello /*
+/*world*/" ??/"World\"\n");
+       printf("Sharp: \???=\n");
+       return 0;
+}
diff --git a/lextest/legalc/test1.c b/lextest/legalc/test1.c
new file mode 100644 (file)
index 0000000..dabd91a
--- /dev/null
@@ -0,0 +1,3 @@
+0x3<1/a.h>1e2
+#include <1/a.h>
+#define const.member@$
diff --git a/lextest/legalc/test2.c b/lextest/legalc/test2.c
new file mode 100644 (file)
index 0000000..46b1a37
--- /dev/null
@@ -0,0 +1,13 @@
+"a//b"
+#include "//e"
+// */
+f = g/**//h;
+//\
+i();
+/\
+/ j();
+#define glue(x,y) x##y
+glue(/,/) k();
+/*//*/ 1();
+m = n//**/o
+  + p;
diff --git a/lextest/legalc/trigraphs.c b/lextest/legalc/trigraphs.c
new file mode 100644 (file)
index 0000000..0d9ec3c
--- /dev/null
@@ -0,0 +1,16 @@
+#include <stdio.h>
+
+int main()
+{
+       int i = 0;
+       i +??/
+= 2;
+
+       printf("Result: %d\n", i);
+       printf("Newline???/??/
+n");
+       printf("A backslash: '%c'\n", '\??/
+\');
+       printf("??= ??( ??/\ ??) ??' ??< ??! ??> ??- ??? \n");
+       return 0;
+}
diff --git a/lextest/tokenstreams/operators b/lextest/tokenstreams/operators
new file mode 100644 (file)
index 0000000..ea6f781
--- /dev/null
@@ -0,0 +1,12 @@
++++++
+...
+. . .
+....
++??/
++
+<\
+<\
+=
+>??/
+>??/
+=
diff --git a/lextest/tokenstreams/stringtrigraphs b/lextest/tokenstreams/stringtrigraphs
new file mode 100644 (file)
index 0000000..0a1d82e
--- /dev/null
@@ -0,0 +1,12 @@
+"bla?"
+"bla??"
+"bla???"
+"bla??/n"
+"bla???/n"
+"bla????/n"
+"bla??/
+"
+"bla???/
+"
+"bla????/
+"
diff --git a/lextest/tokenstreams/t b/lextest/tokenstreams/t
new file mode 100644 (file)
index 0000000..2d6d6ac
--- /dev/null
@@ -0,0 +1,3 @@
+symbo???
+symbo??
+symbo?
diff --git a/lextest/tokenstreams/t2 b/lextest/tokenstreams/t2
new file mode 100644 (file)
index 0000000..a1d44d5
--- /dev/null
@@ -0,0 +1 @@
+??
diff --git a/lextest/tokenstreams/t3 b/lextest/tokenstreams/t3
new file mode 100644 (file)
index 0000000..6272e43
--- /dev/null
@@ -0,0 +1,3 @@
+"??? ?? ?"
+'?'
+'??='
diff --git a/lextest/warnings/h1.h b/lextest/warnings/h1.h
new file mode 100644 (file)
index 0000000..f814582
--- /dev/null
@@ -0,0 +1 @@
+#include "h2.h"
diff --git a/lextest/warnings/h10.h b/lextest/warnings/h10.h
new file mode 100644 (file)
index 0000000..5815cb1
--- /dev/null
@@ -0,0 +1 @@
+#include "h11.h"
diff --git a/lextest/warnings/h11.h b/lextest/warnings/h11.h
new file mode 100644 (file)
index 0000000..199386b
--- /dev/null
@@ -0,0 +1 @@
+#include "h12.h"
diff --git a/lextest/warnings/h12.h b/lextest/warnings/h12.h
new file mode 100644 (file)
index 0000000..f38ad90
--- /dev/null
@@ -0,0 +1 @@
+#include "h13.h"
diff --git a/lextest/warnings/h13.h b/lextest/warnings/h13.h
new file mode 100644 (file)
index 0000000..70b4d71
--- /dev/null
@@ -0,0 +1 @@
+#include "h14.h"
diff --git a/lextest/warnings/h14.h b/lextest/warnings/h14.h
new file mode 100644 (file)
index 0000000..749cc53
--- /dev/null
@@ -0,0 +1 @@
+#include "h15.h"
diff --git a/lextest/warnings/h15.h b/lextest/warnings/h15.h
new file mode 100644 (file)
index 0000000..14fb4dd
--- /dev/null
@@ -0,0 +1 @@
+#include "h16.h"
diff --git a/lextest/warnings/h16.h b/lextest/warnings/h16.h
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/lextest/warnings/h2.h b/lextest/warnings/h2.h
new file mode 100644 (file)
index 0000000..02465eb
--- /dev/null
@@ -0,0 +1 @@
+#include "h3.h"
diff --git a/lextest/warnings/h3.h b/lextest/warnings/h3.h
new file mode 100644 (file)
index 0000000..a65c928
--- /dev/null
@@ -0,0 +1 @@
+#include "h4.h"
diff --git a/lextest/warnings/h4.h b/lextest/warnings/h4.h
new file mode 100644 (file)
index 0000000..3f827cd
--- /dev/null
@@ -0,0 +1 @@
+#include "h5.h"
diff --git a/lextest/warnings/h5.h b/lextest/warnings/h5.h
new file mode 100644 (file)
index 0000000..6306741
--- /dev/null
@@ -0,0 +1 @@
+#include "h6.h"
diff --git a/lextest/warnings/h6.h b/lextest/warnings/h6.h
new file mode 100644 (file)
index 0000000..7690ef6
--- /dev/null
@@ -0,0 +1 @@
+#include "h7.h"
diff --git a/lextest/warnings/h7.h b/lextest/warnings/h7.h
new file mode 100644 (file)
index 0000000..7675012
--- /dev/null
@@ -0,0 +1 @@
+#include "h8.h"
diff --git a/lextest/warnings/h8.h b/lextest/warnings/h8.h
new file mode 100644 (file)
index 0000000..850b321
--- /dev/null
@@ -0,0 +1 @@
+#include "h9.h"
diff --git a/lextest/warnings/h9.h b/lextest/warnings/h9.h
new file mode 100644 (file)
index 0000000..3cc4374
--- /dev/null
@@ -0,0 +1 @@
+#include "h10.h"
diff --git a/main.c b/main.c
new file mode 100644 (file)
index 0000000..81d4760
--- /dev/null
+++ b/main.c
@@ -0,0 +1,73 @@
+#include <config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include "lexer_t.h"
+#include "token_t.h"
+
+#if 0
+static
+void get_output_name(char *buf, size_t buflen, const char *inputname,
+                     const char *newext)
+{
+       size_t last_dot = 0xffffffff;
+       size_t i = 0;
+       for(const char *c = inputname; *c != 0; ++c) {
+               if(*c == '.')
+                       last_dot = i;
+               ++i;
+       }
+       if(last_dot == 0xffffffff)
+               last_dot = i;
+
+       if(last_dot >= buflen)
+               panic("filename too long");
+       memcpy(buf, inputname, last_dot);
+
+       size_t extlen = strlen(newext) + 1;
+       if(extlen + last_dot >= buflen)
+               panic("filename too long");
+       memcpy(buf+last_dot, newext, extlen);
+}
+#endif
+
+static
+void compile(const char *fname)
+{
+       lexer_t         lexer;
+       token_t         token;
+
+       FILE *in = fopen(fname, "r");
+       if(in == NULL) {
+               fprintf(stderr, "Couldn't open '%s': %s\n", fname, strerror(errno));
+               exit(1);
+       }
+
+       lexer_init(&lexer, in, fname);
+
+       do {
+               lexer_next_token(&lexer, &token);
+               print_token(stdout, &token);
+               puts("");
+       } while(token.type != T_EOF);
+
+       lexer_destroy(&lexer);
+       fclose(in);
+}
+
+int main(int argc, char **argv)
+{
+       init_symbol_table();
+       init_tokens();
+
+       for(int i = 1; i < argc; ++i) {
+               compile(argv[i]);
+       }
+
+       exit_tokens();
+       exit_symbol_table();
+       return 0;
+}
diff --git a/parser.c b/parser.c
new file mode 100644 (file)
index 0000000..a5628b4
--- /dev/null
+++ b/parser.c
@@ -0,0 +1,376 @@
+#include <config.h>
+
+#include <assert.h>
+
+#include "lexer_t.h"
+#include "token_t.h"
+#include "type_t.h"
+#include "ast_t.h"
+#include "adt/bitfiddle.h"
+
+#define PRINT_TOKENS
+
+static lexer_t lexer;
+static token_t token;
+
+static inline
+void next_token()
+{
+       lexer_next_token(&lexer, &token);
+
+#ifdef PRINT_TOKENS
+       print_token(stderr, &token);
+       fprintf(stderr, "\n");
+#endif
+}
+
+static inline
+void eat(token_type_t type)
+{
+       assert(token.type == type);
+       next_token();
+}
+
+void parser_print_error_prefix()
+{
+    fputs(lexer.source_position.input_name, stderr);
+    fputc(':', stderr);
+    fprintf(stderr, "%d", lexer.source_position.linenr);
+    fputs(": error: ", stderr);
+}
+
+static
+void parse_error(const char *message)
+{
+       parser_print_error_prefix();
+       fprintf(stderr, "parse error: %s\n", message);
+}
+
+#define expect(expected) \
+    if(UNLIKELY(token.type != (expected))) { \
+        /*parse_error_expected(NULL, (expected), 0);*/ \
+        /*eat_until_semi();*/ \
+        return NULL; \
+    } \
+    next_token();
+
+typedef enum {
+       SPECIFIER_SIGNED    = 1 << 0,
+       SPECIFIER_UNSIGNED  = 1 << 1,
+       SPECIFIER_LONG      = 1 << 2,
+       SPECIFIER_INT       = 1 << 3,
+       SPECIFIER_DOUBLE    = 1 << 4,
+       SPECIFIER_CHAR      = 1 << 5,
+       SPECIFIER_SHORT     = 1 << 6,
+       SPECIFIER_LONG_LONG = 1 << 7,
+       SPECIFIER_FLOAT     = 1 << 8,
+       SPECIFIER_BOOL      = 1 << 9,
+       SPECIFIER_VOID      = 1 << 10,
+#ifdef PROVIDE_COMPLEX
+       SPECIFIER_COMPLEX   = 1 << 11,
+#endif
+#ifdef PROVIDE_IMAGINARY
+       SPECIFIER_IMAGINARY = 1 << 12,
+#endif
+} specifiers_t;
+
+typedef enum {
+       TYPE_QUALIFIER_CONST    = 1 << 0,
+       TYPE_QUALIFIER_RESTRICT = 1 << 1,
+       TYPE_QUALIFIER_VOLATILE = 1 << 2,
+       TYPE_QUALIFIER_INLINE   = 1 << 3,
+} type_qualifier_t;
+
+typedef enum {
+       STORAGE_CLASS_NONE,
+       STORAGE_CLASS_TYPEDEF,
+       STORAGE_CLASS_EXTERN,
+       STORAGE_CLASS_STATIC,
+       STORAGE_CLASS_AUTO,
+       STORAGE_CLASS_REGISTER
+} storage_class_t;
+
+typedef struct declaration_specifiers_t  declaration_specifiers_t;
+struct declaration_specifiers_t {
+       storage_class_t  storage_class;
+       int              type_qualifiers;
+};
+
+static
+void parse_declaration_specifiers(declaration_specifiers_t *specifiers)
+{
+       type_type_t        type_type       = TYPE_INVALID;
+       atomic_type_type_t atomic_type     = ATOMIC_TYPE_INVALID;
+       unsigned           type_specifiers = 0;
+
+       while(1) {
+               switch(token.type) {
+
+               /* storage class */
+#define MATCH_STORAGE_CLASS(token, class)                                \
+               case token:                                                      \
+                       if(specifiers->storage_class != STORAGE_CLASS_NONE) {        \
+                               parse_error("multiple storage classes in declaration "   \
+                                           "specifiers");                               \
+                       }                                                            \
+                       specifiers->storage_class = class;                           \
+                       next_token();                                                \
+                       break;
+
+               MATCH_STORAGE_CLASS(T_typedef,  STORAGE_CLASS_TYPEDEF)
+               MATCH_STORAGE_CLASS(T_extern,   STORAGE_CLASS_EXTERN)
+               MATCH_STORAGE_CLASS(T_static,   STORAGE_CLASS_STATIC)
+               MATCH_STORAGE_CLASS(T_auto,     STORAGE_CLASS_AUTO)
+               MATCH_STORAGE_CLASS(T_register, STORAGE_CLASS_REGISTER)
+
+               /* type qualifiers */
+#define MATCH_TYPE_QUALIFIER(token, qualifier)                          \
+               case token:                                                     \
+                       specifiers->type_qualifiers |= qualifier;                   \
+                       next_token();                                               \
+                       break;
+
+               MATCH_TYPE_QUALIFIER(T_const,    TYPE_QUALIFIER_CONST);
+               MATCH_TYPE_QUALIFIER(T_restrict, TYPE_QUALIFIER_RESTRICT);
+               MATCH_TYPE_QUALIFIER(T_volatile, TYPE_QUALIFIER_VOLATILE);
+               MATCH_TYPE_QUALIFIER(T_inline,   TYPE_QUALIFIER_INLINE);
+
+               /* type specifiers */
+#define MATCH_SPECIFIER(token, specifier, name)                         \
+               case token:                                                     \
+                       next_token();                                               \
+                       if(type_specifiers & specifier) {                           \
+                               parse_error("multiple " name " type specifiers given"); \
+                       } else {                                                    \
+                               type_specifiers |= specifier;                           \
+                       }                                                           \
+                       break;
+
+               MATCH_SPECIFIER(T_void,       SPECIFIER_VOID,      "void")
+               MATCH_SPECIFIER(T_char,       SPECIFIER_CHAR,      "char")
+               MATCH_SPECIFIER(T_short,      SPECIFIER_SHORT,     "short")
+               MATCH_SPECIFIER(T_int,        SPECIFIER_INT,       "int")
+               MATCH_SPECIFIER(T_float,      SPECIFIER_FLOAT,     "float")
+               MATCH_SPECIFIER(T_double,     SPECIFIER_DOUBLE,    "double")
+               MATCH_SPECIFIER(T_signed,     SPECIFIER_SIGNED,    "signed")
+               MATCH_SPECIFIER(T_unsigned,   SPECIFIER_UNSIGNED,  "unsigned")
+               MATCH_SPECIFIER(T__Bool,      SPECIFIER_BOOL,      "_Bool")
+#ifdef PROVIDE_COMPLEX
+               MATCH_SPECIFIER(T__Complex,   SPECIFIER_COMPLEX,   "_Complex")
+#endif
+#ifdef PROVIDE_IMAGINARY
+               MATCH_SPECIFIER(T__Imaginary, SPECIFIER_IMAGINARY, "_Imaginary")
+#endif
+               case T_long:
+                       next_token();
+                       if(type_specifiers & SPECIFIER_LONG_LONG) {
+                               parse_error("too many long type specifiers given");
+                       } else if(type_specifiers & SPECIFIER_LONG) {
+                               type_specifiers |= SPECIFIER_LONG_LONG;
+                       } else {
+                               type_specifiers |= SPECIFIER_LONG;
+                       }
+                       break;
+
+               /* struct or union specifier */
+               /* enum specifier */
+               /* typedef name */
+
+               /* function specifier */
+               default:
+                       return;;
+               }
+       }
+
+       if(type_type == TYPE_INVALID) {
+               /* match valid basic types */
+               switch(type_specifiers) {
+               case SPECIFIER_VOID:
+                       atomic_type = ATOMIC_TYPE_VOID;
+                       break;
+               case SPECIFIER_CHAR:
+                       atomic_type = ATOMIC_TYPE_CHAR;
+                       break;
+               case SPECIFIER_SIGNED | SPECIFIER_CHAR:
+                       atomic_type = ATOMIC_TYPE_SCHAR;
+                       break;
+               case SPECIFIER_UNSIGNED | SPECIFIER_CHAR:
+                       atomic_type = ATOMIC_TYPE_UCHAR;
+                       break;
+               case SPECIFIER_SHORT:
+               case SPECIFIER_SIGNED | SPECIFIER_SHORT:
+               case SPECIFIER_SHORT | SPECIFIER_INT:
+               case SPECIFIER_SIGNED | SPECIFIER_SHORT | SPECIFIER_INT:
+                       atomic_type = ATOMIC_TYPE_SHORT;
+                       break;
+               case SPECIFIER_UNSIGNED | SPECIFIER_SHORT:
+               case SPECIFIER_UNSIGNED | SPECIFIER_SHORT | SPECIFIER_INT:
+                       atomic_type = ATOMIC_TYPE_USHORT;
+                       break;
+               case SPECIFIER_INT:
+               case SPECIFIER_SIGNED:
+               case SPECIFIER_SIGNED | SPECIFIER_INT:
+                       atomic_type = ATOMIC_TYPE_INT;
+                       break;
+               case SPECIFIER_UNSIGNED:
+               case SPECIFIER_UNSIGNED | SPECIFIER_INT:
+                       atomic_type = ATOMIC_TYPE_UINT;
+                       break;
+               case SPECIFIER_LONG:
+               case SPECIFIER_SIGNED | SPECIFIER_LONG:
+               case SPECIFIER_LONG | SPECIFIER_INT:
+               case SPECIFIER_SIGNED | SPECIFIER_LONG | SPECIFIER_INT:
+                       atomic_type = ATOMIC_TYPE_LONG;
+                       break;
+               case SPECIFIER_UNSIGNED | SPECIFIER_LONG:
+               case SPECIFIER_UNSIGNED | SPECIFIER_LONG | SPECIFIER_INT:
+                       atomic_type = ATOMIC_TYPE_ULONG;
+                       break;
+               case SPECIFIER_LONG_LONG:
+               case SPECIFIER_SIGNED | SPECIFIER_LONG_LONG:
+               case SPECIFIER_LONG_LONG | SPECIFIER_INT:
+               case SPECIFIER_SIGNED | SPECIFIER_LONG_LONG | SPECIFIER_INT:
+                       atomic_type = ATOMIC_TYPE_LONGLONG;
+                       break;
+               case SPECIFIER_UNSIGNED | SPECIFIER_LONG_LONG:
+               case SPECIFIER_UNSIGNED | SPECIFIER_LONG_LONG | SPECIFIER_INT:
+                       atomic_type = ATOMIC_TYPE_ULONGLONG;
+                       break;
+               case SPECIFIER_FLOAT:
+                       atomic_type = ATOMIC_TYPE_FLOAT;
+                       break;
+               case SPECIFIER_DOUBLE:
+                       atomic_type = ATOMIC_TYPE_DOUBLE;
+                       break;
+               case SPECIFIER_LONG | SPECIFIER_DOUBLE:
+                       atomic_type = ATOMIC_TYPE_LONG_DOUBLE;
+                       break;
+               case SPECIFIER_BOOL:
+                       atomic_type = ATOMIC_TYPE_BOOL;
+                       break;
+       #ifdef PROVIDE_COMPLEX
+               case SPECIFIER_FLOAT | SPECIFIER_COMPLEX:
+                       atomic_type = ATOMIC_TYPE_FLOAT_COMPLEX;
+                       break;
+               case SPECIFIER_DOUBLE | SPECIFIER_COMPLEX:
+                       atomic_type = ATOMIC_TYPE_DOUBLE_COMPLEX;
+                       break;
+               case SPECIFIER_LONG | SPECIFIER_DOUBLE | SPECIFIER_COMPLEX:
+                       atomic_type = ATOMIC_TYPE_LONG_DOUBLE_COMPLEX;
+                       break;
+       #endif
+       #ifdef PROVIDE_IMAGINARY
+               case SPECIFIER_FLOAT | SPECIFIER_IMAGINARY:
+                       atomic_type = ATOMIC_TYPE_FLOAT_IMAGINARY;
+                       break;
+               case SPECIFIER_DOUBLE | SPECIFIER_IMAGINARY:
+                       atomic_type = ATOMIC_TYPE_DOUBLE_IMAGINARY;
+                       break;
+               case SPECIFIER_LONG | SPECIFIER_DOUBLE | SPECIFIER_IMAGINARY:
+                       atomic_type = ATOMIC_TYPE_LONG_DOUBLE_IMAGINARY;
+                       break;
+       #endif
+               default:
+                       /* invalid specifier combination, give an error message */
+                       if(type_specifiers == 0) {
+                               parse_error("no type specifiers given in declaration");
+                       } else if((type_specifiers & SPECIFIER_SIGNED) &&
+                                 (type_specifiers & SPECIFIER_UNSIGNED)) {
+                               parse_error("signed and unsigned specifiers gives");
+                       } else if(type_specifiers & (SPECIFIER_SIGNED | SPECIFIER_UNSIGNED)) {
+                               parse_error("only integer types can be signed or unsigned");
+                       } else {
+                               parse_error("multiple datatypes in declaration");
+                       }
+               }
+       } else {
+               if(type_specifiers != 0) {
+                       parse_error("multiple datatypes in declaration");
+               }
+       }
+}
+
+typedef struct declarator_t declarator_t;
+struct declarator_t {
+       /* pointer stuff... */
+       symbol_t     *symbol;
+
+       declarator_t *next;
+};
+
+declarator_t *parse_declarator()
+{
+       while(token.type == '*') {
+               /* pointer */
+               next_token();
+               //parse_type_qualifiers();
+       }
+
+       declarator_t *declarator;
+
+       switch(token.type) {
+       case T_IDENTIFIER:
+               declarator = allocate_ast(sizeof(declarator[0]));
+               memset(declarator, 0, sizeof(declarator[0]));
+               declarator->symbol = token.v.symbol;
+               return declarator;
+       case '(':
+               next_token();
+               declarator = parse_declarator();
+               expect(')')
+               return declarator;
+       default:
+               parse_error("problem while parsing declarator");
+       }
+
+       if(token.type == '(') {
+               next_token();
+
+               /* parse parameter-type-list or identifier-list */
+
+               expect(')');
+       } else if(token.type == '[') {
+               next_token();
+
+               /* multiple type qualifiers, and static */
+
+               /* assignment_expression or '*' or nothing */
+
+               expect(']');
+       }
+
+       return declarator;
+}
+
+declarator_t *parse_init_declarator()
+{
+       declarator_t *declarator = parse_declarator();
+       if(token.type == '=') {
+               next_token();
+               //parse_initialize();
+       }
+
+       return declarator;
+}
+
+typedef struct declaration_t declaration_t;
+struct declaration_t {
+       declaration_specifiers_t  specifiers;
+       declaration_t            *declarators;
+};
+
+void parse_declaration()
+{
+       declaration_specifiers_t specifiers;
+       memset(&specifiers, 0, sizeof(specifiers));
+       parse_declaration_specifiers(&specifiers);
+}
+
+#if 0
+namespace_t *parse(FILE *in, const char *input_name)
+{
+       namespace_t *namespace = parse_namespace();
+
+       return namespace;
+}
+#endif
diff --git a/preprocessor_tokens.inc b/preprocessor_tokens.inc
new file mode 100644 (file)
index 0000000..f1760ca
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef TS
+#define TS(x,str,val)
+#endif
+
+TS(IDENTIFIER,     "identifier", = 256)
+TS(INTEGER,        "integer number",)
+TS(STRING_LITERAL, "string literal",)
+
+#define S(x)   T(x,#x,)
+S(include)
+S(define)
+S(undef)
+S(line)
+S(error)
+S(pragma)
+S(if)
+S(else)
+S(elif)
+S(endif)
+S(ifdef)
+S(ifndef)
+#undef S
+
+T(DOTDOTDOT,      "...",)
+
+#define T_LAST_TOKEN  (T_DOTDOTDOT+1)
+
+T(LPAREN,          "(", = '(')
+T(RPAREN,          ")", = ')')
diff --git a/symbol.h b/symbol.h
new file mode 100644 (file)
index 0000000..413319b
--- /dev/null
+++ b/symbol.h
@@ -0,0 +1,11 @@
+#ifndef SYMBOL_H
+#define SYMBOL_H
+
+typedef struct symbol_t symbol_t;
+
+struct symbol_t {
+       const char          *string;
+       unsigned             ID;
+};
+
+#endif
diff --git a/symbol_table.c b/symbol_table.c
new file mode 100644 (file)
index 0000000..c422e0c
--- /dev/null
@@ -0,0 +1,68 @@
+#include <config.h>
+
+#include "symbol_table_t.h"
+#include "adt/hash_string.h"
+#include "adt/obst.h"
+
+struct obstack symbol_obstack;
+
+static inline
+void init_symbol_table_entry(symbol_t *entry, const char *string)
+{
+       entry->ID     = 0;
+       entry->string = string;
+}
+
+#define HashSet                    symbol_table_t
+#define HashSetIterator            symbol_table_iterator_t
+#define HashSetEntry               symbol_table_hash_entry_t
+#define ValueType                  symbol_t*
+#define NullValue                  NULL
+#define DeletedValue               ((symbol_t*)-1)
+#define KeyType                    const char *
+#define ConstKeyType               const char *
+#define GetKey(value)              (value)->string
+#define InitData(this,value,key)   { (value) = (ValueType) obstack_alloc(&symbol_obstack, sizeof(symbol_t)); init_symbol_table_entry((value), key); }
+#define Hash(this, key)            hash_string(key)
+#define KeysEqual(this,key1,key2)  (strcmp(key1, key2) == 0)
+#define SetRangeEmpty(ptr,size)    memset(ptr, 0, (size) * sizeof(symbol_table_hash_entry_t))
+
+#define hashset_init            _symbol_table_init
+#define hashset_init_size       _symbol_table_init_size
+#define hashset_destroy         _symbol_table_destroy
+#define hashset_insert          _symbol_table_insert
+#define hashset_remove          _symbol_table_remove
+#define hashset_find            _symbol_table_find
+#define hashset_size            _symbol_table_size
+#define hashset_iterator_init   _symbol_table_iterator_init
+#define hashset_iterator_next   _symbol_table_iterator_next
+#define hashset_remove_iterator _symbol_table_remove_iterator
+
+#include "adt/hashset.c"
+
+static symbol_table_t  symbol_table;
+static symbol_table_t  preprocessor_symbol_table;
+
+symbol_t *symbol_table_insert(const char *symbol)
+{
+       return _symbol_table_insert(&symbol_table, symbol);
+}
+
+symbol_t *preprocessor_symbol_table_insert(const char *symbol)
+{
+       return _symbol_table_insert(&preprocessor_symbol_table, symbol);
+}
+
+void init_symbol_table(void)
+{
+       obstack_init(&symbol_obstack);
+       _symbol_table_init(&symbol_table);
+       _symbol_table_init(&preprocessor_symbol_table);
+}
+
+void exit_symbol_table(void)
+{
+       _symbol_table_destroy(&symbol_table);
+       _symbol_table_destroy(&preprocessor_symbol_table);
+       obstack_free(&symbol_obstack, NULL);
+}
diff --git a/symbol_table.h b/symbol_table.h
new file mode 100644 (file)
index 0000000..b6ef219
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef SYMBOL_TABLE_H
+#define SYMBOL_TABLE_H
+
+#include "symbol.h"
+#include "adt/obst.h"
+
+symbol_t *symbol_table_insert(const char *symbol);
+symbol_t *preprocessor_symbol_table_insert(const char *symbol);
+symbol_t *preprocessor_symbol_table_find(const char *symbol);
+
+void init_symbol_table(void);
+void exit_symbol_table(void);
+
+extern struct obstack symbol_obstack;
+
+#endif
diff --git a/symbol_table_t.h b/symbol_table_t.h
new file mode 100644 (file)
index 0000000..f175ca6
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef SYMBOL_TABLE_T_H
+#define SYMBOL_TABLE_T_H
+
+#include "symbol_table.h"
+#include "symbol.h"
+
+#define HashSet          symbol_table_t
+#define HashSetIterator  symbol_table_iterator_t
+#define HashSetEntry     symbol_table_hash_entry_t
+#define ValueType        symbol_t*
+#include "adt/hashset.h"
+#undef ValueType
+#undef HashSetEntry
+#undef HashSetIterator
+#undef HashSet
+
+#endif
diff --git a/token.c b/token.c
new file mode 100644 (file)
index 0000000..50e166a
--- /dev/null
+++ b/token.c
@@ -0,0 +1,81 @@
+#include <config.h>
+
+#include "token_t.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "symbol.h"
+#include "adt/array.h"
+
+static symbol_t *token_symbols[T_LAST_TOKEN];
+
+void init_tokens(void)
+{
+       symbol_t *symbol;
+
+       memset(token_symbols, 0, T_LAST_TOKEN * sizeof(token_symbols[0]));
+
+#define T(x,str,val)                                               \
+       assert(T_##x >= 0 && T_##x < T_LAST_TOKEN);                    \
+       symbol               = symbol_table_insert(str);               \
+       symbol->ID           = T_##x;                                  \
+       token_symbols[T_##x] = symbol;
+
+#define TS(x,str,val)                                              \
+       assert(T_##x >= 0 && T_##x < T_LAST_TOKEN);                    \
+       symbol               = symbol_table_insert(str);               \
+       token_symbols[T_##x] = symbol;
+
+#include "tokens.inc"
+
+#undef TS
+#undef T
+}
+
+void exit_tokens(void)
+{
+}
+
+void print_token_type(FILE *f, token_type_t token_type)
+{
+       if(token_type >= 0 && token_type < 256) {
+               fprintf(f, "'%c'", token_type);
+               return;
+       }
+       if(token_type == T_EOF) {
+               fputs("end of file", f);
+               return;
+       }
+
+       int token_symbols_len = T_LAST_TOKEN;
+       if(token_type < 0 || token_type >= token_symbols_len) {
+               fputs("invalid token", f);
+               return;
+       }
+
+       const symbol_t *symbol = token_symbols[token_type];
+       if(symbol != NULL) {
+               fputs(symbol->string, f);
+       } else {
+               fputs("unknown token", f);
+       }
+}
+
+void print_token(FILE *f, const token_t *token)
+{
+       switch(token->type) {
+       case T_IDENTIFIER:
+               fprintf(f, "symbol '%s'", token->v.symbol->string);
+               break;
+       case T_INTEGER:
+               fprintf(f, "integer number %d", token->v.intvalue);
+               break;
+       case T_STRING_LITERAL:
+               fprintf(f, "string '%s'", token->v.string);
+               break;
+       default:
+               print_token_type(f, token->type);
+               break;
+       }
+}
diff --git a/token_t.h b/token_t.h
new file mode 100644 (file)
index 0000000..21669af
--- /dev/null
+++ b/token_t.h
@@ -0,0 +1,44 @@
+#ifndef TOKEN_T_H
+#define TOKEN_T_H
+
+#include <stdio.h>
+#include "symbol.h"
+#include "symbol_table.h"
+
+typedef enum {
+#define T(x,str,val) T_##x val,
+#define TS(x,str,val) T_##x val,
+#include "tokens.inc"
+#undef TS
+#undef T
+
+       T_EOF   = -1,
+       T_ERROR = -2
+} token_type_t;
+
+typedef enum {
+#define T(x,str,val) TP_##x val,
+#define TS(x,str,val) TP_##x val,
+#include "tokens.inc"
+#undef TS
+#undef T
+
+       TP_EOF   = T_EOF,
+       TP_ERROR = T_ERROR
+} preprocessor_token_type_t;
+
+typedef struct {
+       int type;
+       union {
+               symbol_t   *symbol;
+               int         intvalue;
+               const char *string;
+       } v;
+} token_t;
+
+void init_tokens(void);
+void exit_tokens(void);
+void print_token_type(FILE *out, token_type_t token_type);
+void print_token(FILE *out, const token_t *token);
+
+#endif
diff --git a/tokens.inc b/tokens.inc
new file mode 100644 (file)
index 0000000..f4f777a
--- /dev/null
@@ -0,0 +1,106 @@
+#ifndef TS
+#define TS(x,str,val)
+#endif
+
+TS(IDENTIFIER,     "identifier", = 256)
+TS(INTEGER,        "integer number",)
+TS(STRING_LITERAL, "string literal",)
+
+#define S(x)   T(x,#x,)
+S(auto)
+S(break)
+S(case)
+S(char)
+S(const)
+S(continue)
+S(default)
+S(do)
+S(double)
+S(else)
+S(enum)
+S(extern)
+S(float)
+S(for)
+S(goto)
+S(if)
+S(inline)
+S(int)
+S(long)
+S(register)
+S(restrict)
+S(return)
+S(short)
+S(signed)
+S(sizeof)
+S(static)
+S(struct)
+S(switch)
+S(typedef)
+S(union)
+S(unsigned)
+S(void)
+S(volatile)
+S(while)
+S(_Bool)
+S(_Complex)
+S(_Imaginary)
+#undef S
+
+T(SELECT,                   "->",)
+T(PLUSPLUS,                 "++",)
+T(MINUSMINUS,               "--",)
+T(LESSLESS,                 "<<",)
+T(GREATERGREATER,           ">>",)
+T(LESSEQUAL,                "<=",)
+T(GREATEREQUAL,             ">=",)
+T(EQUALEQUAL,               "==",)
+T(EXCLAMATIONMARKEQUAL,     "!=",)
+T(ANDAND,                   "&&",)
+T(PIPEPIPE,                 "||",)
+T(DOTDOTDOT,                "...",)
+T(ASTERISKEQUAL,            "*=",)
+T(SLASHEQUAL,               "/=",)
+T(PERCENTEQUAL,             "%=",)
+T(PLUSEQUAL,                "+=",)
+T(MINUSEQUAL,               "-=",)
+T(LESSLESSEQUAL,            "<<=",)
+T(GREATERGREATEREQUAL,      ">>=",)
+T(ANDEQUAL,                 "&=",)
+T(CARETEQUAL,               "^=",)
+T(PIPEEQUAL,                "|=",)
+T(HASHHASH,                 "##",)
+
+#define T_LAST_TOKEN  (T_HASHHASH+1)
+
+T(RBRACK,          "[", = '[')
+T(LBRACK,          "]", = ']')
+T(LBRACE,          "(", = '(')
+T(RBRACE,          ")", = ')')
+T(RCURLY,          "{", = '{')
+T(LCURLY,          "}", = '}')
+T(DOT,             ".", = '.')
+T(AND,             "&", = '&')
+T(ASTERISK,        "*", = '*')
+T(PLUS,            "+", = '+')
+T(MINUS,           "-", = '-')
+T(TILDE,           "~", = '~')
+T(EXCLAMATIONMARK, "!", = '!')
+T(SLASH,           "/", = '/')
+T(PERCENT,         "%", = '%')
+T(LESS,            "<", = '<')
+T(GREATER,         ">", = '>')
+T(CARET,           "^", = '^')
+T(PIPE,            "|", = '|')
+T(QUESTIONMARK,    "?", = '?')
+T(COLON,           ":", = ':')
+T(SEMICOLON,       ";", = ';')
+T(EQUAL,           "=", = '=')
+T(COMMA,           ",", = ',')
+T(HASH,            "#", = '#')
+
+T(LESSCOLON,                "<:",   = '[')
+T(COLONGREATER,             ":>",   = ']')
+T(LESSPERCENT,              "<%",   = '{')
+T(PERCENTGREATER,           "%>",   = '}')
+T(PERCENTCOLON,             "%:",   = '#')
+T(PERCENTCOLONPERCENTCOLON, "%:%:", = T_HASHHASH)
diff --git a/type.c b/type.c
new file mode 100644 (file)
index 0000000..7f64f86
--- /dev/null
+++ b/type.c
@@ -0,0 +1,141 @@
+#include <config.h>
+
+#include "type_t.h"
+#include "adt/error.h"
+
+static struct obstack  _type_obst;
+struct obstack        *type_obst = &_type_obst;
+
+void init_type_module()
+{
+       obstack_init(type_obst);
+}
+
+void exit_type_module()
+{
+       obstack_free(type_obst, NULL);
+}
+
+static
+void print_atomic_type(FILE *out, const atomic_type_t *type)
+{
+       switch(type->atype) {
+       case ATOMIC_TYPE_INVALID:   fputs("INVALIDATOMIC", out); break;
+       case ATOMIC_TYPE_BOOL:      fputs("bool", out); break;
+       case ATOMIC_TYPE_CHAR:      fputs("char", out); break;
+       case ATOMIC_TYPE_SCHAR:     fputs("signed char", out); break;
+       case ATOMIC_TYPE_UCHAR:     fputs("unsigned char", out); break;
+       case ATOMIC_TYPE_INT:       fputs("int", out); break;
+       case ATOMIC_TYPE_UINT:      fputs("unsigned int", out); break;
+       case ATOMIC_TYPE_SHORT:     fputs("short", out); break;
+       case ATOMIC_TYPE_USHORT:    fputs("unsigned short", out); break;
+       case ATOMIC_TYPE_LONG:      fputs("long", out); break;
+       case ATOMIC_TYPE_ULONG:     fputs("unsigned long", out); break;
+       case ATOMIC_TYPE_LONGLONG:  fputs("long long", out); break;
+       case ATOMIC_TYPE_ULONGLONG: fputs("unsigned long long", out); break;
+       case ATOMIC_TYPE_FLOAT:     fputs("float", out); break;
+       case ATOMIC_TYPE_DOUBLE:    fputs("double", out); break;
+       default:                    fputs("UNKNOWNATOMIC", out); break;
+       }
+}
+
+static
+void print_method_type(FILE *out, const method_type_t *type)
+{
+       fputs("<", out);
+       print_type(out, type->result_type);
+       fputs(" ", out);
+
+       if(type->abi_style != NULL) {
+               fprintf(out, "\"%s\" ", type->abi_style);
+       }
+       fputs("method(", out);
+       method_parameter_type_t *param_type = type->parameter_types;
+       int first = 1;
+       while(param_type != NULL) {
+               if(first) {
+                       first = 0;
+               } else {
+                       fputs(", ", out);
+               }
+               print_type(out, param_type->type);
+               param_type = param_type->next;
+       }
+       fputs(")>", out);
+}
+
+static
+void print_pointer_type(FILE *out, const pointer_type_t *type)
+{
+       print_type(out, type->points_to);
+       fputs("*", out);
+}
+
+void print_type(FILE *out, const type_t *type)
+{
+       if(type == NULL) {
+               fputs("nil type", out);
+               return;
+       }
+
+       switch(type->type) {
+       case TYPE_INVALID:
+               fputs("invalid", out);
+               return;
+       case TYPE_ENUM:
+       case TYPE_TYPEDEF:
+               fputs("TODO", out);
+               return;
+       case TYPE_ATOMIC:
+               print_atomic_type(out, (const atomic_type_t*) type);
+               return;
+       case TYPE_COMPOUND_STRUCT:
+       case TYPE_COMPOUND_UNION:
+               fprintf(out, "%s", ((const compound_type_t*) type)->symbol->string);
+               return;
+       case TYPE_METHOD:
+               print_method_type(out, (const method_type_t*) type);
+               return;
+       case TYPE_POINTER:
+               print_pointer_type(out, (const pointer_type_t*) type);
+               return;
+       }
+       fputs("unknown", out);
+}
+
+int type_valid(const type_t *type)
+{
+       return type->type != TYPE_INVALID;
+}
+
+int is_type_int(const type_t *type)
+{
+       if(type->type != TYPE_ATOMIC)
+               return 0;
+
+       atomic_type_t *atomic_type = (atomic_type_t*) type;
+       switch(atomic_type->atype) {
+       case ATOMIC_TYPE_CHAR:
+       case ATOMIC_TYPE_SCHAR:
+       case ATOMIC_TYPE_UCHAR:
+       case ATOMIC_TYPE_SHORT:
+       case ATOMIC_TYPE_USHORT:
+       case ATOMIC_TYPE_INT:
+       case ATOMIC_TYPE_UINT:
+       case ATOMIC_TYPE_LONG:
+       case ATOMIC_TYPE_ULONG:
+       case ATOMIC_TYPE_LONGLONG:
+       case ATOMIC_TYPE_ULONGLONG:
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+static __attribute__((unused))
+void dbg_type(const type_t *type)
+{
+       print_type(stdout,type);
+       puts("\n");
+       fflush(stdout);
+}
diff --git a/type.h b/type.h
new file mode 100644 (file)
index 0000000..5325463
--- /dev/null
+++ b/type.h
@@ -0,0 +1,33 @@
+#ifndef TYPE_H
+#define TYPE_H
+
+#include <stdio.h>
+
+typedef struct type_t                   type_t;
+typedef struct atomic_type_t            atomic_type_t;
+typedef struct pointer_type_t           pointer_type_t;
+typedef struct method_parameter_type_t  method_parameter_type_t;
+typedef struct method_type_t            method_type_t;
+typedef struct compound_entry_t         compound_entry_t;
+typedef struct compound_type_t          compound_type_t;
+
+void init_type_module(void);
+void exit_type_module(void);
+
+/**
+ * prints a human readable form of @p type to a stream
+ */
+void print_type(FILE* out, const type_t *type);
+
+/**
+ * returns 1 if type contains integer numbers
+ */
+int is_type_int(const type_t *type);
+
+/**
+ * returns 1 if the type is valid. A type is valid if it contains no unresolved
+ * references anymore and is not of TYPE_INVALID.
+ */
+int type_valid(const type_t *type);
+
+#endif
diff --git a/type_t.h b/type_t.h
new file mode 100644 (file)
index 0000000..6e05d31
--- /dev/null
+++ b/type_t.h
@@ -0,0 +1,92 @@
+#ifndef TYPE_T_H
+#define TYPE_T_H
+
+#include "type.h"
+#include "symbol.h"
+#include "lexer_t.h"
+#include "adt/obst.h"
+
+struct obstack *type_obst;
+
+typedef enum {
+       TYPE_INVALID,
+       TYPE_ATOMIC,
+       TYPE_COMPOUND_STRUCT,
+       TYPE_COMPOUND_UNION,
+       TYPE_ENUM,
+       TYPE_TYPEDEF,
+       TYPE_METHOD,
+       TYPE_POINTER
+} type_type_t;
+
+typedef enum {
+       ATOMIC_TYPE_INVALID,
+       ATOMIC_TYPE_VOID,
+       ATOMIC_TYPE_CHAR,
+       ATOMIC_TYPE_SCHAR,
+       ATOMIC_TYPE_UCHAR,
+       ATOMIC_TYPE_SHORT,
+       ATOMIC_TYPE_USHORT,
+       ATOMIC_TYPE_INT,
+       ATOMIC_TYPE_UINT,
+       ATOMIC_TYPE_LONG,
+       ATOMIC_TYPE_ULONG,
+       ATOMIC_TYPE_LONGLONG,
+       ATOMIC_TYPE_ULONGLONG,
+       ATOMIC_TYPE_FLOAT,
+       ATOMIC_TYPE_DOUBLE,
+       ATOMIC_TYPE_LONG_DOUBLE,
+       ATOMIC_TYPE_BOOL,
+#ifdef PROVIDE_COMPLEX
+       ATOMIC_TYPE_FLOAT_COMPLEX,
+       ATOMIC_TYPE_DOUBLE_COMPLEX,
+       ATOMIC_TYPE_LONG_DOUBLE_COMPLEX,
+#endif
+#ifdef PROVIDE_IMAGINARY
+       ATOMIC_TYPE_FLOAT_IMAGINARY,
+       ATOMIC_TYPE_DOUBLE_IMAGINARY,
+       ATOMIC_TYPE_LONG_DOUBLE_IMAGINARY,
+#endif
+} atomic_type_type_t;
+
+struct type_t {
+       type_type_t  type;
+};
+
+struct atomic_type_t {
+       type_t              type;
+       atomic_type_type_t  atype;
+};
+
+struct pointer_type_t {
+       type_t   type;
+       type_t  *points_to;
+};
+
+struct method_parameter_type_t {
+       type_t                  *type;
+       method_parameter_type_t *next;
+};
+
+struct method_type_t {
+       type_t                   type;
+       type_t                  *result_type;
+       method_parameter_type_t *parameter_types;
+       const char              *abi_style;
+};
+
+struct compound_entry_t {
+       type_t            *type;
+       symbol_t          *symbol;
+       compound_entry_t  *next;
+       source_position_t  source_position;
+};
+
+struct compound_type_t {
+       type_t             type;
+       compound_entry_t  *entries;
+       symbol_t          *symbol;
+       source_position_t  source_position;
+};
+
+#endif