From d5e8df5885f97ea65839f8970b8697549c207610 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Sat, 9 Jun 2007 22:03:29 +0000 Subject: [PATCH] Initial import of c parser [r18314] --- Makefile | 58 ++ TODO | 5 + adt/align.h | 56 ++ adt/array.h | 334 +++++++++++ adt/bitfiddle.h | 194 +++++++ adt/error.h | 8 + adt/fourcc.h | 18 + adt/hash_string.h | 35 ++ adt/hashset.c | 596 +++++++++++++++++++ adt/hashset.h | 58 ++ adt/obst.h | 15 + adt/pset.c | 36 ++ adt/pset.h | 111 ++++ adt/strset.c | 28 + adt/strset.h | 102 ++++ adt/util.h | 33 ++ adt/xmalloc.c | 62 ++ adt/xmalloc.h | 28 + ast.c | 324 +++++++++++ ast.h | 47 ++ ast_t.h | 267 +++++++++ config.h | 0 lexer.c | 816 +++++++++++++++++++++++++++ lexer.h | 11 + lexer_t.h | 33 ++ lextest/legalc/comment.c | 17 + lextest/legalc/operators.c | 6 + lextest/legalc/splicetest.c | 11 + lextest/legalc/string.c | 9 + lextest/legalc/test1.c | 3 + lextest/legalc/test2.c | 13 + lextest/legalc/trigraphs.c | 16 + lextest/tokenstreams/operators | 12 + lextest/tokenstreams/stringtrigraphs | 12 + lextest/tokenstreams/t | 3 + lextest/tokenstreams/t2 | 1 + lextest/tokenstreams/t3 | 3 + lextest/warnings/h1.h | 1 + lextest/warnings/h10.h | 1 + lextest/warnings/h11.h | 1 + lextest/warnings/h12.h | 1 + lextest/warnings/h13.h | 1 + lextest/warnings/h14.h | 1 + lextest/warnings/h15.h | 1 + lextest/warnings/h16.h | 0 lextest/warnings/h2.h | 1 + lextest/warnings/h3.h | 1 + lextest/warnings/h4.h | 1 + lextest/warnings/h5.h | 1 + lextest/warnings/h6.h | 1 + lextest/warnings/h7.h | 1 + lextest/warnings/h8.h | 1 + lextest/warnings/h9.h | 1 + main.c | 73 +++ parser.c | 376 ++++++++++++ preprocessor_tokens.inc | 29 + symbol.h | 11 + symbol_table.c | 68 +++ symbol_table.h | 16 + symbol_table_t.h | 17 + token.c | 81 +++ token_t.h | 44 ++ tokens.inc | 106 ++++ type.c | 141 +++++ type.h | 33 ++ type_t.h | 92 +++ 66 files changed, 4483 insertions(+) create mode 100644 Makefile create mode 100644 TODO create mode 100644 adt/align.h create mode 100644 adt/array.h create mode 100644 adt/bitfiddle.h create mode 100644 adt/error.h create mode 100644 adt/fourcc.h create mode 100644 adt/hash_string.h create mode 100644 adt/hashset.c create mode 100644 adt/hashset.h create mode 100644 adt/obst.h create mode 100644 adt/pset.c create mode 100644 adt/pset.h create mode 100644 adt/strset.c create mode 100644 adt/strset.h create mode 100644 adt/util.h create mode 100644 adt/xmalloc.c create mode 100644 adt/xmalloc.h create mode 100644 ast.c create mode 100644 ast.h create mode 100644 ast_t.h create mode 100644 config.h create mode 100644 lexer.c create mode 100644 lexer.h create mode 100644 lexer_t.h create mode 100644 lextest/legalc/comment.c create mode 100644 lextest/legalc/operators.c create mode 100644 lextest/legalc/splicetest.c create mode 100644 lextest/legalc/string.c create mode 100644 lextest/legalc/test1.c create mode 100644 lextest/legalc/test2.c create mode 100644 lextest/legalc/trigraphs.c create mode 100644 lextest/tokenstreams/operators create mode 100644 lextest/tokenstreams/stringtrigraphs create mode 100644 lextest/tokenstreams/t create mode 100644 lextest/tokenstreams/t2 create mode 100644 lextest/tokenstreams/t3 create mode 100644 lextest/warnings/h1.h create mode 100644 lextest/warnings/h10.h create mode 100644 lextest/warnings/h11.h create mode 100644 lextest/warnings/h12.h create mode 100644 lextest/warnings/h13.h create mode 100644 lextest/warnings/h14.h create mode 100644 lextest/warnings/h15.h create mode 100644 lextest/warnings/h16.h create mode 100644 lextest/warnings/h2.h create mode 100644 lextest/warnings/h3.h create mode 100644 lextest/warnings/h4.h create mode 100644 lextest/warnings/h5.h create mode 100644 lextest/warnings/h6.h create mode 100644 lextest/warnings/h7.h create mode 100644 lextest/warnings/h8.h create mode 100644 lextest/warnings/h9.h create mode 100644 main.c create mode 100644 parser.c create mode 100644 preprocessor_tokens.inc create mode 100644 symbol.h create mode 100644 symbol_table.c create mode 100644 symbol_table.h create mode 100644 symbol_table_t.h create mode 100644 token.c create mode 100644 token_t.h create mode 100644 tokens.inc create mode 100644 type.c create mode 100644 type.h create mode 100644 type_t.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a3961a0 --- /dev/null +++ b/Makefile @@ -0,0 +1,58 @@ +GOAL = cparser + +#FIRM_CFLAGS = `pkg-config --cflags libfirm` +#FIRM_LIBS = `pkg-config --libs libfirm` +FIRM_CFLAGS = -I$(HOME)/projects/firm/libfirm/include -I$(HOME)/projects/firm/libcore +FIRM_LIBS = -L$(HOME)/projects/firm/build/i686-pc-linux-gnu/debug -lfirm -llpp -lcore -lm + +CFLAGS += -Wall -W -Werror -O0 -g3 -std=c99 +CFLAGS += -DHAVE_CONFIG_H +CFLAGS += -I . +CFLAGS += $(FIRM_CFLAGS) + +LFLAGS = $(FIRM_LIBS) -llpp -ldl --export-dynamic + +SOURCES := \ + adt/hashset.c \ + adt/pset.c \ + adt/strset.c \ + adt/xmalloc.c \ + ast.c \ + lexer.c \ + main.c \ + parser.c \ + symbol_table.c \ + token.c \ + type.c + +OBJECTS = $(SOURCES:%.c=build/%.o) + +Q = @ + +.PHONY : all clean dirs + +all: $(GOAL) + +ifeq ($(findstring $(MAKECMDGOALS), clean depend),) +-include .depend +endif + +.depend: $(SOURCES) + @echo "===> DEPEND" + @rm -f $@ && touch $@ && makedepend -p "$@ build/" -Y -f $@ -- $(CFLAGS) -- $(SOURCES) 2> /dev/null && rm $@.bak + +$(GOAL): build/adt $(OBJECTS) + @echo "===> LD $@" + $(Q)$(CC) -rdynamic $(OBJECTS) $(LFLAGS) -o $(GOAL) + +build/adt: + @echo "===> MKDIR $@" + $(Q)mkdir -p $@ + +build/%.o: %.c + @echo '===> CC $<' + $(Q)$(CC) $(CFLAGS) -c $< -o $@ + +clean: + @echo '===> CLEAN' + $(Q)rm -rf build $(GOAL) .depend diff --git a/TODO b/TODO new file mode 100644 index 0000000..1dd8883 --- /dev/null +++ b/TODO @@ -0,0 +1,5 @@ +Lexer: +- proper support of preprocessor +- parse float numbers +- octal&hex escape sequences +- handling of newline is probably not correct for all strange OSes diff --git a/adt/align.h b/adt/align.h new file mode 100644 index 0000000..01e877f --- /dev/null +++ b/adt/align.h @@ -0,0 +1,56 @@ +/* + * Project: libFIRM + * File name: ir/adt/align.h + * Purpose: macros for alignment. + * Author: Markus Armbruster + * Modified by: + * Created: 1999 by getting from fiasco + * CVS-ID: $Id$ + * Copyright: (c) 1995, 1996 Markus Armbruster + * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. + */ +#ifndef _ALIGN_H +#define _ALIGN_H + +#include + +/** + * @file align.h + */ + +/** A size handled efficiently by malloc(), at least 1K. */ +#define PREF_MALLOC_SIZE 2048 + + +/** A wrapper around GNU C's __attribute__ */ + +/* According to the documentation, the attributes we are interested in + work with 2.5, but we encountered trouble before 2.7. */ +#if defined (__GNUC__) && __GNUC__ >= 2 && __GNUC_MINOR__ >= 7 +# define HAVE_ATTRIBUTE 1 +# define ATTRIBUTE(attrs) __attribute__ (attrs) +#else +# define ATTRIBUTE(attrs) +#endif + + +/* Alignment */ + +/** A type that has most constrained alignment. */ +typedef union { + long double d; + void *p; + long l; +} aligned_type ATTRIBUTE ((aligned)); + +/** Inquiring about the alignment of a type. */ +#ifdef __GNUC__ +# define ALIGNOF(type) __alignof__ (type) +#else +# define ALIGNOF(type) offsetof (struct { char c; type d; }, d) +#endif + +/** Maximal alignment required for any type. */ +#define MAX_ALIGN ALIGNOF (aligned_type) + +#endif /* _ALIGN_H */ diff --git a/adt/array.h b/adt/array.h new file mode 100644 index 0000000..3fc8c86 --- /dev/null +++ b/adt/array.h @@ -0,0 +1,334 @@ +/* + * Project: libFIRM + * File name: ir/adt/array.h + * Purpose: Declarations for Array. + * Author: Markus Armbruster + * Modified by: + * Created: 1999 by getting from fiasco + * CVS-ID: $Id$ + * Copyright: (c) 1995, 1996 Markus Armbruster + * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. + */ + +/** + * @file array.h Dynamic and flexible arrays for C. + */ + +#ifndef _ARRAY_H +#define _ARRAY_H + +#include +#include +#include + +#include "fourcc.h" +#include "align.h" +#include "xmalloc.h" + +#define ARR_D_MAGIC FOURCC('A','R','R','D') +#define ARR_A_MAGIC FOURCC('A','R','R','A') +#define ARR_F_MAGIC FOURCC('A','R','R','F') + +/** + * Creates a flexible array. + * + * @param type The element type of the new array. + * @param nelts a size_t expression evaluating to the number of elements + * + * This macro creates a flexible array of a given type at runtime. + * The size of the array can be changed later. + * + * @return A pointer to the flexible array (can be used as a pointer to the + * first element of this array). + */ +#define NEW_ARR_F(type, nelts) \ + ((type *)_new_arr_f ((nelts), sizeof(type) * (nelts))) + +/** + * Creates a new flxible array with the same number of elements as a + * given one. + * + * @param type The element type of the new array. + * @param arr An array from which the number of elements will be taken + * + * This macro creates a flexible array of a given type at runtime. + * The size of the array can be changed later. + * + * @return A pointer to the flexible array (can be used as a pointer to the + * first element of this array). + */ +#define CLONE_ARR_F(arr) \ + NEW_ARR_F (__typeof__(arr[0]), ARR_LEN ((arr))) + +/** + * Duplicates an array and returns the new flexible one. + * + * @param type The element type of the new array. + * @param arr An array from which the elements will be duplicated + * + * This macro creates a flexible array of a given type at runtime. + * The size of the array can be changed later. + * + * @return A pointer to the flexible array (can be used as a pointer to the + * first element of this array). + */ +#define DUP_ARR_F(arr) \ + memcpy (CLONE_ARR_F (__typeof__(arr[0]), (arr)), (arr), sizeof(__typeof__(arr[0])) * ARR_LEN((arr))) + +/** + * Delete a flexible array. + * + * @param arr The flexible array. + */ +#define DEL_ARR_F(arr) (_del_arr_f ((arr))) + +/** + * Creates a dynamic array on an obstack. + * + * @param type The element type of the new array. + * @param obstack A struct obstack * were the data will be allocated + * @param nelts A size_t expression evaluating to the number of elements + * + * This macro creates a dynamic array of a given type at runtime. + * The size of the array cannot be changed later. + * + * @return A pointer to the dynamic array (can be used as a pointer to the + * first element of this array). + */ +#define NEW_ARR_D(type, obstack, nelts) \ + ( nelts \ + ? (type *)_new_arr_d ((obstack), (nelts), sizeof(type) * (nelts)) \ + : (type *)arr_mt_descr.v.elts) + +/** + * Creates a new dynamic array with the same number of elements as a + * given one. + * + * @param type The element type of the new array. + * @param obstack An struct obstack * were the data will be allocated + * @param arr An array from which the number of elements will be taken + * + * This macro creates a dynamic array of a given type at runtime. + * The size of the array cannot be changed later. + * + * @return A pointer to the dynamic array (can be used as a pointer to the + * first element of this array). + */ +#define CLONE_ARR_D(obstack, arr) \ + NEW_ARR_D (__typeof__(arr[0]), (obstack), ARR_LEN ((arr))) + +/** + * Duplicates an array and returns the new dynamic one. + * + * @param type The element type of the new array. + * @param obstack An struct obstack * were the data will be allocated + * @param arr An array from which the elements will be duplicated + * + * This macro creates a dynamic array of a given type at runtime. + * The size of the array cannot be changed later. + * + * @return A pointer to the dynamic array (can be used as a pointer to the + * first element of this array). + */ +#define DUP_ARR_D(obstack, arr) \ + memcpy (CLONE_ARR_D (__typeof__(arr[0]), (obstack), (arr)), (arr), sizeof(__typeof__(arr[0])) * ARR_LEN ((arr))) + +/** + * Create an automatic array which will be deleted at return from function. + * Beware, the data will be allocated on the function stack! + * + * @param type The element type of the new array. + * @param var A lvalue of type (type *) which will hold the new array. + * @param n number of elements in this array. + * + * This macro creates a dynamic array on the functions stack of a given type at runtime. + * The size of the array cannot be changed later. + */ +#define NEW_ARR_A(type, var, n) \ + do { \ + int _nelts = (n); \ + assert (_nelts >= 0); \ + (var) = (void *)((_arr_descr *)alloca (_ARR_ELTS_OFFS + sizeof(type) * _nelts))->v.elts; \ + _ARR_SET_DBGINF (_ARR_DESCR ((var)), ARR_A_MAGIC, sizeof (type)); \ + (void)(_ARR_DESCR ((var))->nelts = _nelts); \ + } while (0) + +/** + * Creates a new automatic array with the same number of elements as a + * given one. + * + * @param var A lvalue of type (type *) which will hold the new array. + * @param arr An array from which the elements will be duplicated + * + * This macro creates a dynamic array of a given type at runtime. + * The size of the array cannot be changed later. + * + * @return A pointer to the dynamic array (can be used as a pointer to the + * first element of this array). + */ +#define CLONE_ARR_A(var, arr) \ + NEW_ARR_A (__typeof__(arr[0]), (var), ARR_LEN ((arr))) + +/** + * Duplicates an array and returns a new automatic one. + * + * @param type The element type of the new array. + * @param var A lvalue of type (type *) which will hold the new array. + * @param arr An array from with the number of elements will be taken + * + * This macro creates a dynamic array of a given type at runtime. + * The size of the array cannot be changed later. + * + * @return A pointer to the dynamic array (can be used as a pointer to the + * first element of this array). + */ +#define DUP_ARR_A(var, arr) \ + do { CLONE_ARR_A(__typeof__(arr[0]), (var), (arr)); \ + memcpy ((var), (arr), sizeof (__typeof__(arr[0])) * ARR_LEN ((arr))); } \ + while (0) + +/** + * Declare an initialized (zero'ed) array of fixed size. + * This macro should be used at file scope only. + * + * @param type The element type of the new array. + * @param var A lvalue of type (type *) which will hold the new array. + * @param _nelts Number of elements in this new array. + */ +#define DECL_ARR_S(type, var, _nelts) \ + ARR_STRUCT(type, (_nelts) ? (_nelts) : 1) _##var; \ + type *var = (_ARR_SET_DBGINF (&_##var, ARR_A_MAGIC, sizeof (type)), \ + _##var.nelts = _nelts, \ + _##var.v.elts) + +/** + * Returns the length of an array + * + * @param arr a flexible, dynamic, automatic or static array. + */ +#define ARR_LEN(arr) (ARR_VRFY ((arr)), _ARR_DESCR((arr))->nelts) + +/** + * Resize a flexible array, allocate more data if needed but do NOT + * reduce. + * + * @param arr The array, which must be an lvalue. + * @param n The new size of the array. + * + * @remark This macro may change arr, so update all references! + */ +#define ARR_RESIZE(arr, n) \ + ((arr) = _arr_resize ((arr), (n), sizeof(__typeof__(arr[0])))) + +/** + * Resize a flexible array, always reallocate data. + * + * @param arr The array, which must be an lvalue. + * @param n The new size of the array. + * + * @remark This macro may change arr, so update all references! + */ +#define ARR_SETLEN(arr, n) \ + ((arr) = _arr_setlen ((arr), (n), sizeof(__typeof__(arr[0])) * (n))) + +/** Set a length smaller than the current length of the array. Do not + * resize. len must be <= ARR_LEN(arr). */ +#define ARR_SHRINKLEN(arr,len) \ + (ARR_VRFY ((arr)), assert(_ARR_DESCR((arr))->nelts >= len), \ + _ARR_DESCR((arr))->nelts = len) + +/** + * Resize a flexible array by growing it by delta elements. + * + * @param arr The array, which must be an lvalue. + * @param delta The delta number of elements. + * + * @remark This macro may change arr, so update all references! + */ +#define ARR_EXTEND(arr, delta) \ + ARR_RESIZE ((arr), ARR_LEN ((arr)) + (delta)) + +/** + * Resize a flexible array to hold n elements only if it is currently shorter + * than n. + * + * @param arr The array, which must be an lvalue. + * @param n The new size of the array. + * + * @remark This macro may change arr, so update all references! + */ +#define ARR_EXTO(arr, n) \ + ((n) >= ARR_LEN ((arr)) ? ARR_RESIZE ((arr), (n)+1) : (arr)) + +/** + * Append one element to a flexible array. + * + * @param arr The array, which must be an lvalue. + * @param elt The new element, must be of type (type). + */ +#define ARR_APP1(arr, elt) \ + (ARR_EXTEND ((arr), 1), (arr)[ARR_LEN ((arr))-1] = (elt)) + + +#ifdef NDEBUG +# define ARR_VRFY(arr) ((void)0) +# define ARR_IDX_VRFY(arr, idx) ((void)0) +#else +# define ARR_VRFY(arr) \ + assert ( ( (_ARR_DESCR((arr))->magic == ARR_D_MAGIC) \ + || (_ARR_DESCR((arr))->magic == ARR_A_MAGIC) \ + || (_ARR_DESCR((arr))->magic == ARR_F_MAGIC)) \ + && ( (_ARR_DESCR((arr))->magic != ARR_F_MAGIC) \ + || (_ARR_DESCR((arr))->u.allocated >= _ARR_DESCR((arr))->nelts)) \ + && (_ARR_DESCR((arr))->nelts >= 0)) +# define ARR_IDX_VRFY(arr, idx) \ + assert ((0 <= (idx)) && ((idx) < ARR_LEN ((arr)))) +#endif + + +/* Private !!! + Don't try this at home, kids, we're trained professionals ;-> + ... or at the IPD, either. */ +#ifdef NDEBUG +# define _ARR_DBGINF_DECL +# define _ARR_SET_DBGINF(descr, co, es) +#else +# define _ARR_DBGINF_DECL int magic; size_t eltsize; +# define _ARR_SET_DBGINF(descr, co, es) \ + ( (descr)->magic = (co), (descr)->eltsize = (es) ) +#endif + +/** + * Construct an array header. + */ +#define ARR_STRUCT(type, _nelts) \ + struct { \ + _ARR_DBGINF_DECL \ + union { \ + struct obstack *obstack; /* dynamic: allocated on this obstack */ \ + int allocated; /* flexible: #slots allocated */ \ + } u; \ + int nelts; \ + union { \ + type elts[(_nelts)]; \ + aligned_type align[1]; \ + } v; \ + } + +/** + * The array descriptor header type. + */ +typedef ARR_STRUCT (aligned_type, 1) _arr_descr; + +extern _arr_descr arr_mt_descr; + +void *_new_arr_f (int, size_t); +void _del_arr_f (void *); +void *_new_arr_d (struct obstack *obstack, int nelts, size_t elts_size); +void *_arr_resize (void *, int, size_t); +void *_arr_setlen (void *, int, size_t); + +#define _ARR_ELTS_OFFS offsetof (_arr_descr, v.elts) +#define _ARR_DESCR(elts) ((_arr_descr *)(void *)((char *)(elts) - _ARR_ELTS_OFFS)) + +#endif diff --git a/adt/bitfiddle.h b/adt/bitfiddle.h new file mode 100644 index 0000000..624a2d6 --- /dev/null +++ b/adt/bitfiddle.h @@ -0,0 +1,194 @@ +/** + * @file + * @date 28.9.2004 + * @brief Functions from hackers delight. + * @author Sebastian Hack, Matthias Braun + * @version $Id$ + */ +#ifndef _FIRM_BITFIDDLE_H_ +#define _FIRM_BITFIDDLE_H_ + +#include +#include "util.h" + +/* some functions here assume ints are 32 bit wide */ +#define HACKDEL_WORDSIZE 32 +COMPILETIME_ASSERT(sizeof(unsigned) == 4, unsignedsize) +COMPILETIME_ASSERT(UINT_MAX == 4294967295U, uintmax) + +/** + * Add saturated. + * @param x Summand 1. + * @param y Summand 2. + * @return x + y or INT_MAX/INT_MIN if an overflow occurred and x,y was positive/negative. + * + * @note See hacker's delight, page 27. + */ +static inline __attribute__((const)) +int add_saturated(int x, int y) +{ + int sum = x + y; + /* + An overflow occurs, if the sign of the both summands is equal + and the one of the sum is different from the summand's one. + The sign bit is 1, if an overflow occurred, 0 otherwise. + int overflow = ~(x ^ y) & (sum ^ x); + */ + int overflow = (x ^ sum) & (y ^ sum); + + /* + The infinity to use. + Make a mask of the sign bit of x and y (they are the same if an + overflow occurred). + INT_MIN == ~INT_MAX, so if the sign was negative, INT_MAX becomes + INT_MIN. + */ + int inf = (x >> (sizeof(x) * 8 - 1)) ^ INT_MAX; + + return overflow < 0 ? inf : sum; +} + +/** + * Compute the count of set bits in a 32-bit word. + * @param x A 32-bit word. + * @return The number of bits set in x. + */ +static inline __attribute__((const)) +unsigned popcnt(unsigned x) { + x -= ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0f0f0f0f; + x += x >> 8; + x += x >> 16; + return x & 0x3f; +} + +/** + * Compute the number of leading zeros in a word. + * @param x The word. + * @return The number of leading (from the most significant bit) zeros. + */ +static inline __attribute__((const)) +unsigned nlz(unsigned x) { +#ifdef USE_X86_ASSEMBLY + unsigned res; + if(x == 0) + return 32; + + __asm__("bsrl %1,%0" + : "=r" (res) + : "r" (x)); + return 31 - res; +#else + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return popcnt(~x); +#endif +} + +/** + * Compute the number of trailing zeros in a word. + * @param x The word. + * @return The number of trailing zeros. + */ +static inline __attribute__((const)) +unsigned ntz(unsigned x) { +#ifdef USE_X86_ASSEMBLY + unsigned res; + if(x == 0) + return 32; + + __asm__("bsfl %1,%0" + : "=r" (res) + : "r" (x)); + return res; +#else + return HACKDEL_WORDSIZE - nlz(~x & (x - 1)); +#endif +} + +/** + * Compute the greatest power of 2 smaller or equal to a value. + * This is also known as the binary logarithm. + * @param x The value. + * @return The power of two. + */ +#define log2_floor(x) (HACKDEL_WORDSIZE - 1 - nlz(x)) + +/** + * Compute the smallest power of 2 greater or equal to a value. + * This is also known as the binary logarithm. + * @param x The value. + * @return The power of two. + */ +#define log2_ceil(x) (HACKDEL_WORDSIZE - nlz((x) - 1)) + +/** + * Round up to the next multiple of a power of two. + * @param x A value. + * @param pot A power of two. + * @return x rounded up to the next multiple of pot. + */ +#define round_up2(x,pot) (((x) + ((pot) - 1)) & (~((pot) - 1))) + +/** + * Returns the biggest power of 2 that is equal or smaller than @p x + * (see hackers delight power-of-2 boundaries, page 48) + */ +static inline __attribute__((const)) +unsigned floor_po2(unsigned x) +{ +#ifdef USE_X86_ASSEMBLY // in this case nlz is fast + if(x == 0) + return 0; + // note that x != 0 here, so nlz(x) < 32! + return 0x80000000U >> nlz(x); +#else + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return x - (x >> 1); +#endif +} + +/** + * Returns the smallest power of 2 that is equal or greater than x + * @remark x has to be <= 0x8000000 of course + * @note see hackers delight power-of-2 boundaries, page 48 + */ +static inline __attribute__((const)) +unsigned ceil_po2(unsigned x) +{ + if(x == 0) + return 0; + assert(x < (1U << 31)); + +#ifdef USE_X86_ASSEMBLY // in this case nlz is fast + // note that x != 0 here! + return 0x80000000U >> (nlz(x-1) - 1); +#else + x = x - 1; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return x + 1; +#endif +} + +/** + * Tests whether @p x is a power of 2 + */ +static inline __attribute__((const)) +int is_po2(unsigned x) +{ + return (x & (x-1)) == 0; +} + +#endif diff --git a/adt/error.h b/adt/error.h new file mode 100644 index 0000000..9b971db --- /dev/null +++ b/adt/error.h @@ -0,0 +1,8 @@ +// placeholder file... +#include + +void abort(void); + +static inline __attribute__((noreturn)) +void panic(const char *msg) +{ fprintf(stderr, "Panic: %s\n", msg); abort(); } diff --git a/adt/fourcc.h b/adt/fourcc.h new file mode 100644 index 0000000..8b5a7cb --- /dev/null +++ b/adt/fourcc.h @@ -0,0 +1,18 @@ +/* + * Project: libFIRM + * File name: ir/adt/fourcc.h + * Purpose: define the famous infame FOURCC macro. + * Author: + * Modified by: + * Created: 02.01.2004 + * CVS-ID: $Id$ + * Copyright: (C) 2004 University of Karlsruhe + * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. + */ +#ifndef _FOURCC_H +#define _FOURCC_H + +/** define a readable fourcc code */ +#define FOURCC(a,b,c,d) ((a) | ((b) << 8) | ((c) << 16) | ((d) << 24)) + +#endif /* _FOURCC_H */ diff --git a/adt/hash_string.h b/adt/hash_string.h new file mode 100644 index 0000000..f2557a2 --- /dev/null +++ b/adt/hash_string.h @@ -0,0 +1,35 @@ +#ifndef _FIRM_HASH_STRING_H_ +#define _FIRM_HASH_STRING_H_ + +#define _FIRM_FNV_OFFSET_BASIS 2166136261U +#define _FIRM_FNV_FNV_PRIME 16777619U + +static inline __attribute__((pure)) +unsigned hash_string(const char* str) +{ + const unsigned char *p; + unsigned hash = _FIRM_FNV_OFFSET_BASIS; + + for(p = (const unsigned char*) str; *p != 0; ++p) { + hash *= _FIRM_FNV_FNV_PRIME; + hash ^= *p; + } + + return hash; +} + +static inline __attribute__((pure)) +unsigned hash_string_size(const char* str, size_t size) +{ + size_t i; + unsigned hash = _FIRM_FNV_OFFSET_BASIS; + + for(i = 0; i < size; ++i) { + hash *= _FIRM_FNV_FNV_PRIME; + hash ^= str[i]; + } + + return hash; +} + +#endif diff --git a/adt/hashset.c b/adt/hashset.c new file mode 100644 index 0000000..c2ac584 --- /dev/null +++ b/adt/hashset.c @@ -0,0 +1,596 @@ +/** + * @file + * @date 17.03.2007 + * @brief Geberic hashset implementation + * @author Matthias Braun, inspiration from densehash from google sparsehash + * package + * @version $Id$ + * + * + * You have to specialize this file by defining: + * + *
    + *
  • HashSet The name of the hashset type
  • + *
  • HashSetIterator The name of the hashset iterator type
  • + *
  • ValueType The type of the stored data values
  • + *
  • NullValue A special value representing no values
  • + *
  • DeletedValue A special value representing deleted entries
  • + *
  • Hash(hashset,key) calculates the hash value for a given key
  • + *
+ * + * Note that by default it is assumed that the data values themselfes are used + * as keys. However you can change that with additional defines: + * + *
    + *
  • KeyType The type of the keys identifying data values. + * Defining this implies, that a data value contains + * more than just the key.
  • + *
  • GetKey(value) Extracts the key from a data value
  • + *
  • KeysEqual(hashset,key1,key2) Tests wether 2 keys are equal
  • + *
  • DO_REHASH Instead of storing the hash-values, recalculate + * them on demand from the datavalues. (usefull if + * calculating the hash-values takes less time than + * a memory access)
  • + *
+ * + * You can further fine tune your hashset by defining the following: + * + *
    + *
  • JUMP(num_probes) The probing method
  • + *
  • Alloc(count) Allocates count hashset entries (NOT bytes)
  • + *
  • Free(ptr) Frees a block of memory allocated by Alloc
  • + *
  • SetRangeEmpty(ptr,count) Efficiently sets a range of elements to + * the Null value
  • + *
  • ADDITIONAL_DATA Additional fields appended to the hashset struct
  • + *
+ */ +#ifdef HashSet + +#include +#include +#include + +#include "bitfiddle.h" +#include "util.h" + +/* quadratic probing */ +#ifndef JUMP +#define JUMP(num_probes) (num_probes) +#endif + +#ifndef Hash +#define ID_HASH +#define Hash(this,value) ((unsigned)(value)) +#endif + +#ifdef DO_REHASH +#define HashSetEntry ValueType +#define EntrySetHash(entry,new_hash) +#define EntryGetHash(this,entry) Hash(this,entry) +#define EntryGetValue(entry) (entry) +#else +#define EntryGetHash(this,entry) (entry).hash +#define EntrySetHash(entry,new_hash) (entry).hash = (new_hash) +#define EntryGetValue(entry) (entry).data +#endif + +#ifndef Alloc +#include "xmalloc.h" +#define Alloc(size) (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry)) +#define Free(ptr) free(ptr) +#endif + +#ifdef ID_HASH +#define InsertReturnValue int +#define GetInsertReturnValue(entry,new) (new) +#else +#define InsertReturnValue ValueType +#define GetInsertReturnValue(entry,new) EntryGetValue(entry) +#endif + +#ifndef KeyType +#define KeyType ValueType +#define GetKey(value) (value) +#define InitData(this,value,key) (value) = (key) +#endif + +#ifndef ConstKeyType +#define ConstKeyType const KeyType +#endif + +#ifndef EntrySetEmpty +#define EntrySetEmpty(entry) EntryGetValue(entry) = NullValue +#endif +#ifndef EntrySetDeleted +#define EntrySetDeleted(entry) EntryGetValue(entry) = DeletedValue +#endif +#ifndef EntryIsEmpty +#define EntryIsEmpty(entry) (EntryGetValue(entry) == NullValue) +#endif +#ifndef EntryIsDeleted +#define EntryIsDeleted(entry) (EntryGetValue(entry) == DeletedValue) +#endif +#ifndef SetRangeEmpty +#define SetRangeEmpty(ptr,size) \ +{ \ + size_t _i; \ + size_t _size = (size); \ + HashSetEntry *entries = (ptr); \ + for(_i = 0; _i < _size; ++_i) { \ + HashSetEntry *entry = & entries[_i]; \ + EntrySetEmpty(*entry); \ + } \ +} +#endif + +#ifndef HT_OCCUPANCY_FLT +/** how full before we double size */ +#define HT_OCCUPANCY_FLT 0.5f +#endif + +#ifndef HT_EMPTY_FLT +/** how empty before we half size */ +#define HT_EMPTY_FLT (0.4f * (HT_OCCUPANCY_FLT)) +#endif + +#ifndef HT_MIN_BUCKETS +/** default smallest bucket size */ +#define HT_MIN_BUCKETS 32 +#endif + +#define ILLEGAL_POS ((size_t)-1) + +#ifndef hashset_init +#error You have to redefine hashset_init +#endif +#ifndef hashset_init_size +#error You have to redefine hashset_init_size +#endif +#ifndef hashset_destroy +#error You have to redefine hashset_destroy +#endif +#ifndef hashset_insert +#error You have to redefine hashset_insert +#endif +#ifndef hashset_remove +#error You have to redefine hashset_remove +#endif +#ifndef hashset_find +#error You have to redefine hashset_find +#endif +#ifndef hashset_size +#error You have to redefine hashset_size +#endif +#ifndef hashset_iterator_init +#error You have to redefine hashset_iterator_init +#endif +#ifndef hashset_iterator_next +#error You have to redefine hashset_iterator_next +#endif +#ifndef hashset_remove_iterator +#error You have to redefine hashset_remove_iterator +#endif + +/** + * Returns the number of elements in the hashset + */ +size_t hashset_size(const HashSet *this) +{ + return this->num_elements - this->num_deleted; +} + +/** + * Inserts an element into a hashset without growing the set (you have to make + * sure there's enough room for that. + * @note also see comments for hashset_insert() + * @internal + */ +static inline +InsertReturnValue insert_nogrow(HashSet *this, KeyType key) +{ + size_t num_probes = 0; + size_t num_buckets = this->num_buckets; + size_t hashmask = num_buckets - 1; + unsigned hash = Hash(this, key); + size_t bucknum = hash & hashmask; + size_t insert_pos = ILLEGAL_POS; + + assert((num_buckets & (num_buckets - 1)) == 0); + + while(1) { + HashSetEntry *entry = & this->entries[bucknum]; + + if(EntryIsEmpty(*entry)) { + size_t p; + HashSetEntry *nentry; + + if(insert_pos != ILLEGAL_POS) { + p = insert_pos; + } else { + p = bucknum; + } + + nentry = &this->entries[p]; + InitData(this, EntryGetValue(*nentry), key); + EntrySetHash(*nentry, hash); + this->num_elements++; + return GetInsertReturnValue(*nentry, 1); + } + if(EntryIsDeleted(*entry)) { + if(insert_pos == ILLEGAL_POS) + insert_pos = bucknum; + } else if(EntryGetHash(this, *entry) == hash) { + if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) { + // Value already in the set, return it + return GetInsertReturnValue(*entry, 0); + } + } + + ++num_probes; + bucknum = (bucknum + JUMP(num_probes)) & hashmask; + assert(num_probes < num_buckets); + } +} + +/** + * Inserts an element into a hashset under the assumption that the hashset + * contains no deleted entries and the element doesn't exist in the hashset yet. + * @internal + */ +static +void insert_new(HashSet *this, unsigned hash, ValueType value) +{ + size_t num_probes = 0; + size_t num_buckets = this->num_buckets; + size_t hashmask = num_buckets - 1; + size_t bucknum = hash & hashmask; + size_t insert_pos = ILLEGAL_POS; + + assert(value != NullValue); + + while(1) { + HashSetEntry *entry = & this->entries[bucknum]; + + if(EntryIsEmpty(*entry)) { + size_t p; + HashSetEntry *nentry; + + if(insert_pos != ILLEGAL_POS) { + p = insert_pos; + } else { + p = bucknum; + } + nentry = &this->entries[p]; + + EntryGetValue(*nentry) = value; + EntrySetHash(*nentry, hash); + this->num_elements++; + return; + } + assert(!EntryIsDeleted(*entry)); + + ++num_probes; + bucknum = (bucknum + JUMP(num_probes)) & hashmask; + assert(num_probes < num_buckets); + } +} + +/** + * calculate shrink and enlarge limits + * @internal + */ +static inline +void reset_thresholds(HashSet *this) +{ + this->enlarge_threshold = (size_t) (this->num_buckets * HT_OCCUPANCY_FLT); + this->shrink_threshold = (size_t) (this->num_buckets * HT_EMPTY_FLT); + this->consider_shrink = 0; +} + +/** + * Resize the hashset + * @internal + */ +static inline +void resize(HashSet *this, size_t new_size) +{ + size_t num_buckets = this->num_buckets; + size_t i; + HashSetEntry *old_entries = this->entries; + HashSetEntry *new_entries; + + /* allocate a new array with double size */ + new_entries = Alloc(new_size); + SetRangeEmpty(new_entries, new_size); + + /* use the new array */ + this->entries = new_entries; + this->num_buckets = new_size; + this->num_elements = 0; + this->num_deleted = 0; +#ifndef NDEBUG + this->entries_version++; +#endif + reset_thresholds(this); + + /* reinsert all elements */ + for(i = 0; i < num_buckets; ++i) { + HashSetEntry *entry = & old_entries[i]; + if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry)) + continue; + + insert_new(this, EntryGetHash(this, *entry), EntryGetValue(*entry)); + } + + /* now we can free the old array */ + Free(old_entries); +} + +/** + * grow the hashset if adding 1 more elements would make it too crowded + * @internal + */ +static inline +void maybe_grow(HashSet *this) +{ + size_t resize_to; + + if(LIKELY(this->num_elements + 1 <= this->enlarge_threshold)) + return; + + /* double table size */ + resize_to = this->num_buckets * 2; + resize(this, resize_to); +} + +/** + * shrink the hashset if it is only sparsely filled + * @internal + */ +static inline +void maybe_shrink(HashSet *this) +{ + size_t size; + size_t resize_to; + + if(!this->consider_shrink) + return; + + this->consider_shrink = 0; + size = hashset_size(this); + if(LIKELY(size > this->shrink_threshold)) + return; + + resize_to = ceil_po2(size); + + if(resize_to < 4) + resize_to = 4; + + resize(this, resize_to); +} + +/** + * Insert an element into the hashset. If no element with key key exists yet, + * then a new one is created and initialized with the InitData function. + * Otherwise the exisiting element is returned (for hashs where key is equal to + * value, nothing is returned.) + * + * @param this the hashset + * @param key the key that identifies the data + * @returns the existing or newly created data element (or nothing in case of hashs where keys are the while value) + */ +InsertReturnValue hashset_insert(HashSet *this, KeyType key) +{ +#ifndef NDEBUG + this->entries_version++; +#endif + + maybe_shrink(this); + maybe_grow(this); + return insert_nogrow(this, key); +} + +/** + * Searchs for an element with key @p key. + * + * @param this the hashset + * @param key the key to search for + * @returns the found value or NullValue if nothing was found + */ +ValueType hashset_find(const HashSet *this, ConstKeyType key) +{ + size_t num_probes = 0; + size_t num_buckets = this->num_buckets; + size_t hashmask = num_buckets - 1; + unsigned hash = Hash(this, key); + size_t bucknum = hash & hashmask; + + while(1) { + HashSetEntry *entry = & this->entries[bucknum]; + + if(EntryIsEmpty(*entry)) { + return NullValue; + } + if(EntryIsDeleted(*entry)) { + // value is deleted + } else if(EntryGetHash(this, *entry) == hash) { + if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) { + // found the value + return EntryGetValue(*entry); + } + } + + ++num_probes; + bucknum = (bucknum + JUMP(num_probes)) & hashmask; + assert(num_probes < num_buckets); + } +} + +/** + * Removes an element from a hashset. Does nothing if the set doesn't contain + * the element. + * + * @param this the hashset + * @param key key that identifies the data to remove + */ +void hashset_remove(HashSet *this, ConstKeyType key) +{ + size_t num_probes = 0; + size_t num_buckets = this->num_buckets; + size_t hashmask = num_buckets - 1; + unsigned hash = Hash(this, key); + size_t bucknum = hash & hashmask; + +#ifndef NDEBUG + this->entries_version++; +#endif + + while(1) { + HashSetEntry *entry = & this->entries[bucknum]; + + if(EntryIsEmpty(*entry)) { + return; + } + if(EntryIsDeleted(*entry)) { + // entry is deleted + } else if(EntryGetHash(this, *entry) == hash) { + if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) { + EntrySetDeleted(*entry); + this->num_deleted++; + this->consider_shrink = 1; + return; + } + } + + ++num_probes; + bucknum = (bucknum + JUMP(num_probes)) & hashmask; + assert(num_probes < num_buckets); + } +} + +/** + * Initializes hashset with a specific size + * @internal + */ +static inline +void init_size(HashSet *this, size_t initial_size) +{ + if(initial_size < 4) + initial_size = 4; + + this->entries = Alloc(initial_size); + SetRangeEmpty(this->entries, initial_size); + this->num_buckets = initial_size; + this->consider_shrink = 0; + this->num_elements = 0; + this->num_deleted = 0; +#ifndef NDEBUG + this->entries_version = 0; +#endif + + reset_thresholds(this); +} + +/** + * Initialializes a hashset with the default size. The memory for the set has to + * already allocated. + */ +void hashset_init(HashSet *this) +{ + init_size(this, HT_MIN_BUCKETS); +} + +/** + * Destroys a hashset, freeing all used memory (except the memory for the + * HashSet struct itself). + */ +void hashset_destroy(HashSet *this) +{ + Free(this->entries); +#ifndef NDEBUG + this->entries = NULL; +#endif +} + +/** + * Initializes a hashset expecting expected_element size + */ +void hashset_init_size(HashSet *this, size_t expected_elements) +{ + size_t needed_size; + size_t po2size; + + if(expected_elements >= UINT_MAX/2) { + abort(); + } + + needed_size = expected_elements * (1.0 / HT_OCCUPANCY_FLT); + po2size = ceil_po2(needed_size); + init_size(this, po2size); +} + +/** + * Initializes a hashset iterator. The memory for the allocator has to be + * already allocated. + * @note it is not allowed to remove or insert elements while iterating + */ +void hashset_iterator_init(HashSetIterator *this, const HashSet *hashset) +{ + this->current_bucket = hashset->entries - 1; + this->end = hashset->entries + hashset->num_buckets; +#ifndef NDEBUG + this->set = hashset; + this->entries_version = hashset->entries_version; +#endif +} + +/** + * Returns the next value in the iterator or NULL if no value is left + * in the hashset. + * @note it is not allowed to remove or insert elements while iterating + */ +ValueType hashset_iterator_next(HashSetIterator *this) +{ + HashSetEntry *current_bucket = this->current_bucket; + HashSetEntry *end = this->end; + + if(current_bucket >= end) + return NullValue; + + /* using hashset_insert or hashset_remove is not allowed while iterating */ + assert(this->entries_version == this->set->entries_version); + + do { + current_bucket++; + } while(current_bucket < end && + (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket))); + + if(current_bucket >= end) + return NullValue; + + this->current_bucket = current_bucket; + return EntryGetValue(*current_bucket); +} + +/** + * Removes the element the iterator points to. Removing an element a second time + * has no result. + */ +void hashset_remove_iterator(HashSet *this, const HashSetIterator *iter) +{ + HashSetEntry *entry = iter->current_bucket; + + /* iterator_next needs to have been called at least once */ + assert(entry >= this->entries); + /* needs to be on a valid element */ + assert(entry < this->entries + this->num_buckets); + + if(EntryIsDeleted(*entry)) + return; + + EntrySetDeleted(*entry); + this->num_deleted++; + this->consider_shrink = 1; +} + +#endif diff --git a/adt/hashset.h b/adt/hashset.h new file mode 100644 index 0000000..bcd90c5 --- /dev/null +++ b/adt/hashset.h @@ -0,0 +1,58 @@ +/** + * @file + * @date 16.03.2007 + * @brief Generic hashset functions + * @author Matthias Braun + * @version $Id$ + */ + +/* You have to specialize this header by defining HashSet, HashSetIterator and + * ValueType */ +#ifdef HashSet + +#include + +#ifdef DO_REHASH +#define HashSetEntry ValueType +#else +typedef struct HashSetEntry { + ValueType data; + unsigned hash; +} HashSetEntry; +#endif + +#ifndef NO_TYPEDEFS +typedef struct HashSet HashSet; +typedef struct HashSetIterator HashSetIterator; +#endif + +struct HashSet { + HashSetEntry *entries; + size_t num_buckets; + size_t enlarge_threshold; + size_t shrink_threshold; + size_t num_elements; + size_t num_deleted; + int consider_shrink; +#ifndef NDEBUG + unsigned entries_version; +#endif +#ifdef ADDITIONAL_DATA + ADDITIONAL_DATA; +#endif +}; + +struct HashSetIterator { + HashSetEntry *current_bucket; + HashSetEntry *end; +#ifndef NDEBUG + const HashSet *set; + unsigned entries_version; +#endif +}; + +#ifdef DO_REHASH +#undef HashSetEntry +#endif + +#endif diff --git a/adt/obst.h b/adt/obst.h new file mode 100644 index 0000000..c829a63 --- /dev/null +++ b/adt/obst.h @@ -0,0 +1,15 @@ +/** + * @file + * @date 17.03.2007 + * @brief Makes using obstack.h easier by including obstack_chunk_alloc and + * obstack_chunk_free + * @author Martin Trapp, Christian Schaefer + * @version $Id$ + */ +#define _GNU_SOURCE +#include +#include +#include "xmalloc.h" + +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free diff --git a/adt/pset.c b/adt/pset.c new file mode 100644 index 0000000..96fb187 --- /dev/null +++ b/adt/pset.c @@ -0,0 +1,36 @@ +/* collides with libfirm */ +#if 0 + +#include + +#include "pset.h" + +/** probing method: quadratic probing */ +#define DO_REHASH +#define HashSet pset_t +#define HashSetIterator pset_iterator_t +#define ValueType void* +#define NullValue NULL +#define DeletedValue ((void*)-1) +#define KeysEqual(this,key1,key2) 1 +#define SetRangeEmpty(ptr,size) memset(ptr, 0, (size) * sizeof(HashSetEntry)) + +#define hashset_init pset_init +#define hashset_init_size pset_init_size +#define hashset_destroy pset_destroy +#define hashset_insert pset_insert +#define hashset_remove pset_remove +#define hashset_find pset_find +#define hashset_size pset_size +#define hashset_iterator_init pset_iterator_init +#define hashset_iterator_next pset_iterator_next +#define hashset_remove_iterator pset_remove_iterator + +#include "hashset.c" + +int pset_contains(const pset_t *pset, const ValueType val) +{ + return pset_find(pset, val) != NullValue; +} + +#endif diff --git a/adt/pset.h b/adt/pset.h new file mode 100644 index 0000000..8ae8b35 --- /dev/null +++ b/adt/pset.h @@ -0,0 +1,111 @@ +/** + * @file + * @date 17.03.2007 + * @brief A hashset that contains pointers + * @author Matthias Braun + * @version $Id$ + */ +#ifndef _FIRM_PSET_H_ +#define _FIRM_PSET_H_ + +/* collides with libfirm... */ +#if 0 + +#define HashSet pset_t +#define HashSetIterator pset_iterator_t +#define ValueType void* +#define DO_REHASH +#include "hashset.h" +#undef DO_REHASH +#undef HashSet +#undef HashSetIterator +#undef ValueType + +/** + * Initializes a pset + * + * @param pset Pointer to allocated space for the pset + */ +void pset_init(pset_t *pset); + +/** + * Initializes a pset + * + * @param pset Pointer to allocated space for the pset + * @param expected_elements Number of elements expected in the pset (rougly) + */ +void pset_init_size(pset_t *pset, size_t expected_elements); + +/** + * Destroys a pset and frees the memory allocated for hashtable. The memory of + * the pset itself is not freed. + * + * @param pset Pointer to the pset + */ +void pset_destroy(pset_t *pset); + +/** + * Inserts an element into a pset. + * + * @param pset Pointer to the pset + * @param ptr Pointer to insert into the pset + * @returns 1 if the pointer was inserted, 0 if it was already there + */ +int pset_insert(pset_t *pset, void *ptr); + +/** + * Removes an element from a pset. Does nothing if the pset doesn't contain the + * element. + * + * @param pset Pointer to the pset + * @param ptr Pointer to remove from the pset + */ +void pset_remove(pset_t *pset, const void *ptr); + +/** + * Tests whether a pset contains a pointer + * + * @param pset Pointer to the pset + * @param ptr The pointer to test + * @returns 1 @p pset contains the @p ptr, 0 otherwise + */ +int pset_contains(const pset_t *pset, const void *ptr); + +/** + * Returns the number of pointers contained in the pset + * + * @param pset Pointer to the pset + * @returns Number of pointers contained in the pset + */ +size_t pset_size(const pset_t *pset); + +/** + * Initializes a pset iterator. Sets the iterator before the first element in + * the pset. + * + * @param iterator Pointer to already allocated iterator memory + * @param pset Pointer to the pset + */ +void pset_iterator_init(pset_iterator_t *iterator, const pset_t *pset); + +/** + * Advances the iterator and returns the current element or NULL if all elements + * in the pset have been processed. + * @attention It is not allowed to use pset_insert or pset_remove while + * iterating over a pset; pset_remove_iter is allowed. + * + * @param iterator Pointer to the pset iterator. + * @returns Next element in the pset or NULL + */ +void* pset_iterator_next(pset_iterator_t *iterator); + +/** + * Removes the element that the iterator currently points to from the hashset. + * + * @param pset Pointer to the pset + * @param iterator Pointer to the iterator + */ +void pset_remove_iterator(pset_t *pset, const pset_iterator_t *iterator); +#endif + +#endif diff --git a/adt/strset.c b/adt/strset.c new file mode 100644 index 0000000..5f0c528 --- /dev/null +++ b/adt/strset.c @@ -0,0 +1,28 @@ +#include + +#include "strset.h" +#include "hash_string.h" + +#define HashSet strset_t +#define HashSetIterator strset_iterator_t +#define HashSetEntry strset_entry_t +#define ValueType const char* +#define ConstKeyType const char* +#define NullValue NULL +#define DeletedValue ((void*)-1) +#define Hash(this, value) hash_string(value) +#define KeysEqual(this,key1,key2) (strcmp(key1, key2) == 0) +#define SetRangeEmpty(ptr,size) memset(ptr, 0, (size) * sizeof(strset_entry_t)) + +#define hashset_init strset_init +#define hashset_init_size strset_init_size +#define hashset_destroy strset_destroy +#define hashset_insert strset_insert +#define hashset_remove strset_remove +#define hashset_find strset_find +#define hashset_size strset_size +#define hashset_iterator_init strset_iterator_init +#define hashset_iterator_next strset_iterator_next +#define hashset_remove_iterator strset_remove_iterator + +#include "hashset.c" diff --git a/adt/strset.h b/adt/strset.h new file mode 100644 index 0000000..1753e78 --- /dev/null +++ b/adt/strset.h @@ -0,0 +1,102 @@ +#ifndef _FIRM_STRSET_H_ +#define _FIRM_STRSET_H_ + +#define HashSet strset_t +#define HashSetIterator strset_iterator_t +#define HashSetEntry strset_entry_t +#define ValueType const char* +#include "hashset.h" +#undef ValueType +#undef HashSetEntry +#undef HashSetIterator +#undef HashSet + +/** + * Initializes a strset + * + * @param strset Pointer to allocated space for the strset + */ +void strset_init(strset_t *strset); + +/** + * Initializes a strset + * + * @param strset Pointer to allocated space for the strset + * @param expected_elements Number of elements expected in the strset (rougly) + */ +void strset_init_size(strset_t *strset, size_t expected_elements); + +/** + * Destroys a strset and frees the memory allocated for hashtable. The memory of + * the strset itself is not freed. + * + * @param strset Pointer to the strset + */ +void strset_destroy(strset_t *strset); + +/** + * Inserts a string into a strset. + * + * @param strset Pointer to the strset + * @param ptr Pointer to insert into the strset + * @returns @p ptr if the string was inserted into the set, + * otherwise a pointer to the string already in the set + */ +const char *strset_insert(strset_t *strset, const char *ptr); + +/** + * Removes an element from a strset. Does nothing if the strset doesn't contain the + * element. + * + * @param strset Pointer to the strset + * @param ptr Pointer to remove from the strset + */ +void strset_remove(strset_t *strset, const char *ptr); + +/** + * Tests whether a strset contains a pointer + * + * @param strset Pointer to the strset + * @param ptr The pointer to test + * @returns 1 @p strset contains the @p ptr, 0 otherwise + */ +const char* strset_find(const strset_t *strset, const char *ptr); + +/** + * Returns the number of pointers contained in the strset + * + * @param strset Pointer to the strset + * @returns Number of pointers contained in the strset + */ +size_t strset_size(const strset_t *strset); + +/** + * Initializes a strset iterator. Sets the iterator before the first element in + * the strset. + * + * @param iterator Pointer to already allocated iterator memory + * @param strset Pointer to the strset + */ +void strset_iterator_init(strset_iterator_t *iterator, const strset_t *strset); + +/** + * Advances the iterator and returns the current element or NULL if all elements + * in the strset have been processed. + * @attention It is not allowed to use strset_insert or strset_remove while + * iterating over a strset. + * + * @param iterator Pointer to the strset iterator. + * @returns Next element in the strset or NULL + */ +const char *strset_iterator_next(strset_iterator_t *iterator); + +/** + * Removes the string from the set that the iterator currently points to + * + * @param strset Pointer to the strset + * @param iter Pointer to the iterator + */ +void strset_remove_iterator(strset_t *strset, + const strset_iterator_t *iterator); + +#endif diff --git a/adt/util.h b/adt/util.h new file mode 100644 index 0000000..19a7bf8 --- /dev/null +++ b/adt/util.h @@ -0,0 +1,33 @@ +/** + * @file + * @date 16.03.2007 + * @brief Various utility functions that wrap compiler specific extensions + * @author Matthias Braun + * @version $Id$ + */ +#ifndef _FIRM_UTIL_H_ +#define _FIRM_UTIL_H_ + +/** + * Asserts that the constant expression x is not zero at compiletime. name has + * to be a unique identifier. + * + * @note This uses the fact, that double case labels are not allowed. + */ +#define COMPILETIME_ASSERT(x, name) \ + static __attribute__((unused)) void compiletime_assert_##name (int h) { \ + switch(h) { case 0: case (x): ; } \ + } + +/** + * Indicates to the compiler that the value of x is very likely 1 + * @note Only use this in speed critical code and when you are sure x is often 1 + */ +#define LIKELY(x) __builtin_expect((x), 1) +/** + * Indicates to the compiler that it's very likely that x is 0 + * @note Only use this in speed critical code and when you are sure x is often 0 + */ +#define UNLIKELY(x) __builtin_expect((x), 0) + +#endif diff --git a/adt/xmalloc.c b/adt/xmalloc.c new file mode 100644 index 0000000..08de608 --- /dev/null +++ b/adt/xmalloc.c @@ -0,0 +1,62 @@ +/* + * Project: libFIRM + * File name: ir/adt/xmalloc.c + * Purpose: Xmalloc --- never failing wrappers for malloc() & friends. + * Author: Markus Armbruster + * Modified by: + * Created: 1999 by getting from fiasco + * CVS-ID: $Id$ + * Copyright: (c) 1995, 1996 Markus Armbruster + * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. + */ + +/* @@@ ToDo: replace this file with the one from liberty. + [reimplement xstrdup, ... ] */ +#include + +#include +#include + +#include "xmalloc.h" +#include "error.h" +#include "util.h" + +static inline __attribute__((noreturn)) +void out_of_memory(void) { + panic("out of memory"); +} + +void *xmalloc(size_t size) { + void *res = malloc(size); + + if (UNLIKELY(res == NULL)) + out_of_memory(); + + return res; +} + +void *xcalloc(size_t num, size_t size) { + void *res = calloc(num, size); + + if (UNLIKELY(res == NULL)) + out_of_memory(); + + return res; +} + +void *xrealloc(void *ptr, size_t size) { + void *res = realloc (ptr, size); + + if (UNLIKELY(res == NULL)) + out_of_memory(); + + return res; +} + +char *xstrdup(const char *str) { + size_t len = strlen(str) + 1; + char *res = xmalloc(len); + memcpy(res, str, len); + + return res; +} diff --git a/adt/xmalloc.h b/adt/xmalloc.h new file mode 100644 index 0000000..e6517f3 --- /dev/null +++ b/adt/xmalloc.h @@ -0,0 +1,28 @@ +/** + * @file + * @brief More comfortable allocations. + * @author Markus Armbruster + * @data 1999 by getting from fiasco + * @version $Id$ + * Copyright: (c) 1995, 1996 Markus Armbruster + * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. + */ +#ifndef _XMALLOC_H_ +#define _XMALLOC_H_ + +#include + +__attribute__((malloc)) +void *xmalloc(size_t size); + +__attribute__((malloc)) +void *xcalloc(size_t num, size_t size); + +void *xrealloc(void *ptr, size_t size); + +__attribute__((malloc)) +char *xstrdup(const char *str); + +#define xfree(ptr) free(ptr) + +#endif diff --git a/ast.c b/ast.c new file mode 100644 index 0000000..b05df50 --- /dev/null +++ b/ast.c @@ -0,0 +1,324 @@ +#include + +#include "ast_t.h" +#include "type_t.h" + +#include +#include +#include + +#include "adt/error.h" + +struct obstack ast_obstack; + +static +void print_const(FILE *out, const const_t *cnst) +{ + fprintf(out, "%d", cnst->value); +} + +static +void print_string_literal(FILE *out, const string_literal_t *string_literal) +{ + /* TODO escape " and non-printable chars */ + fprintf(out, "\"%s\"", string_literal->value); +} + +static +void print_call_expression(FILE *out, const call_expression_t *call) +{ + print_expression(out, call->method); + fprintf(out, "("); + call_argument_t *argument = call->arguments; + int first = 1; + while(argument != NULL) { + if(!first) { + fprintf(out, ", "); + } else { + first = 0; + } + print_expression(out, argument->expression); + + argument = argument->next; + } + fprintf(out, ")"); +} + +static +void print_binary_expression(FILE *out, const binary_expression_t *binexpr) +{ + fprintf(out, "("); + print_expression(out, binexpr->left); + fprintf(out, " "); + switch(binexpr->type) { + case BINEXPR_INVALID: + fprintf(out, "INVOP"); + break; + case BINEXPR_ASSIGN: + fprintf(out, "<-"); + break; + case BINEXPR_ADD: + fprintf(out, "+"); + break; + case BINEXPR_SUB: + fprintf(out, "-"); + break; + case BINEXPR_NOTEQUAL: + fprintf(out, "/="); + break; + case BINEXPR_EQUAL: + fprintf(out, "="); + break; + case BINEXPR_LESS: + fprintf(out, "<"); + break; + case BINEXPR_LESSEQUAL: + fprintf(out, "<="); + break; + case BINEXPR_GREATER: + fprintf(out, ">"); + break; + case BINEXPR_GREATEREQUAL: + fprintf(out, ">="); + break; + default: + /* TODO: add missing ops */ + fprintf(out, "op%d", binexpr->type); + break; + } + fprintf(out, " "); + print_expression(out, binexpr->right); + fprintf(out, ")"); +} + +void print_expression(FILE *out, const expression_t *expression) +{ + switch(expression->type) { + case EXPR_INVALID: + fprintf(out, "*invalid expression*"); + break; + case EXPR_CONST: + print_const(out, (const const_t*) expression); + break; + case EXPR_STRING_LITERAL: + print_string_literal(out, (const string_literal_t*) expression); + break; + case EXPR_CALL: + print_call_expression(out, (const call_expression_t*) expression); + break; + case EXPR_BINARY: + print_binary_expression(out, (const binary_expression_t*) expression); + break; + case EXPR_REFERENCE: + case EXPR_UNARY: + case EXPR_SELECT: + case EXPR_ARRAY_ACCESS: + case EXPR_SIZEOF: + /* TODO */ + fprintf(out, "some expression of type %d", expression->type); + break; + } +} + +static +void print_block_statement(FILE *out, int indent, + const block_statement_t *block) +{ + statement_t *statement = block->first_statement; + while(statement != NULL) { + print_statement(out, indent + 1, statement); + + statement = statement->next; + } +} + +static +void print_return_statement(FILE *out, const return_statement_t *statement) +{ + fprintf(out, "return "); + if(statement->return_value != NULL) + print_expression(out, statement->return_value); +} + +static +void print_expression_statement(FILE *out, + const expression_statement_t *statement) +{ + print_expression(out, statement->expression); +} + +static +void print_goto_statement(FILE *out, const goto_statement_t *statement) +{ + fprintf(out, "goto "); + if(statement->label != NULL) { + fprintf(out, "%s", statement->label->symbol->string); + } else { + fprintf(out, "?%s", statement->label_symbol->string); + } +} + +static +void print_label_statement(FILE *out, const label_statement_t *statement) +{ + fprintf(out, ":%s", statement->symbol->string); +} + +static +void print_if_statement(FILE *out, int indent, const if_statement_t *statement) +{ + fprintf(out, "if "); + print_expression(out, statement->condition); + fprintf(out, ":\n"); + if(statement->true_statement != NULL) { + print_statement(out, indent, statement->true_statement); + } + + if(statement->false_statement != NULL) { + fprintf(out, "else:\n"); + print_statement(out, indent, statement->false_statement); + } +} + +static +void print_variable_declaration_statement(FILE *out, + const variable_declaration_statement_t *statement) +{ + fprintf(out, "var"); + if(statement->type != NULL) { + fprintf(out, "<"); + print_type(out, statement->type); + fprintf(out, ">"); + } + fprintf(out, " %s", statement->symbol->string); +} + +void print_statement(FILE *out, int indent, const statement_t *statement) +{ + for(int i = 0; i < indent; ++i) + fprintf(out, "\t"); + + switch(statement->type) { + case STATEMENT_BLOCK: + print_block_statement(out, indent, + (const block_statement_t*) statement); + break; + case STATEMENT_RETURN: + print_return_statement(out, (const return_statement_t*) statement); + break; + case STATEMENT_EXPRESSION: + print_expression_statement(out, + (const expression_statement_t*) statement); + break; + case STATEMENT_LABEL: + print_label_statement(out, (const label_statement_t*) statement); + break; + case STATEMENT_GOTO: + print_goto_statement(out, (const goto_statement_t*) statement); + break; + case STATEMENT_IF: + print_if_statement(out, indent, (const if_statement_t*) statement); + break; + case STATEMENT_VARIABLE_DECLARATION: + print_variable_declaration_statement(out, + (const variable_declaration_statement_t*) statement); + break; + case STATEMENT_INVALID: + default: + fprintf(out, "*invalid statement*"); + break; + + } + fprintf(out, "\n"); +} + +static +void print_method_parameters(FILE *out, const method_parameter_t *parameters, + const method_type_t *method_type) +{ + fprintf(out, "("); + + int first = 1; + const method_parameter_t *parameter = parameters; + const method_parameter_type_t *parameter_type + = method_type->parameter_types; + while(parameter != NULL && parameter_type != NULL) { + if(!first) { + fprintf(out, ", "); + } else { + first = 0; + } + + print_type(out, parameter_type->type); + fprintf(out, " %s", parameter->symbol->string); + + parameter = parameter->next; + parameter_type = parameter_type->next; + } + assert(parameter == NULL && parameter_type == NULL); + + fprintf(out, ")"); +} + +static +void print_method(FILE *out, const method_t *method) +{ + method_type_t *type = method->type; + + fprintf(out, "func "); + print_type(out, type->result_type); + fprintf(out, " %s", method->symbol->string); + + print_method_parameters(out, method->parameters, type); + + if(method->statement != NULL) { + fprintf(out, ":\n"); + print_statement(out, 0, method->statement); + } else { + fprintf(out, "\n"); + } +} + +static +void print_namespace_entry(FILE *out, const namespace_entry_t *entry) +{ + switch(entry->type) { + case NAMESPACE_ENTRY_METHOD: + print_method(out, (const method_t*) entry); + break; + case NAMESPACE_ENTRY_VARIABLE: + /* TODO */ + fprintf(out, "some namespace entry of type %d\n\n", entry->type); + break; + case NAMESPACE_ENTRY_INVALID: + default: + fprintf(out, "invalid namespace entry (%d)\n", entry->type); + break; + } +} + +void print_ast(FILE *out, const namespace_t *namespace) +{ + namespace_entry_t *entry = namespace->entries; + + while(entry != NULL) { + print_namespace_entry(out, entry); + + entry = entry->next; + } +} + +void init_ast_module(void) +{ + obstack_init(&ast_obstack); +} + +void exit_ast_module(void) +{ + obstack_free(&ast_obstack, NULL); +} + +void* (allocate_ast) (size_t size) +{ + return _allocate_ast(size); +} diff --git a/ast.h b/ast.h new file mode 100644 index 0000000..30560cb --- /dev/null +++ b/ast.h @@ -0,0 +1,47 @@ +#ifndef AST_H +#define AST_H + +#include + +typedef struct expression_t expression_t; +typedef struct const_t const_t; +typedef struct string_literal_t string_literal_t; +typedef struct cast_expression_t cast_expression_t; +typedef struct call_argument_t call_argument_t; +typedef struct type_argument_t type_argument_t; +typedef struct call_expression_t call_expression_t; +typedef struct binary_expression_t binary_expression_t; +typedef struct unary_expression_t unary_expression_t; +typedef struct select_expression_t select_expression_t; +typedef struct array_access_expression_t array_access_expression_t; +typedef struct sizeof_expression_t sizeof_expression_t; +typedef struct conditional_expression_t conditional_expression_t; +typedef struct expression_list_element_t expression_list_element_t; +typedef struct comma_expression_t comma_expression_t; + +typedef struct statement_t statement_t; +typedef struct block_statement_t block_statement_t; +typedef struct return_statement_t return_statement_t; +typedef struct if_statement_t if_statement_t; +typedef struct variable_declaration_statement_t + variable_declaration_statement_t; +typedef struct expression_statement_t expression_statement_t; +typedef struct goto_statement_t goto_statement_t; +typedef struct label_statement_t label_statement_t; + +typedef enum namespace_entry_type_t namespace_entry_type_t; +typedef struct namespace_entry_t namespace_entry_t; +typedef struct namespace_t namespace_t; +typedef struct method_parameter_t method_parameter_t; +typedef struct method_t method_t; +typedef struct global_variable_t global_variable_t; + +void init_ast_module(void); +void exit_ast_module(void); + +void print_expression(FILE *out, const expression_t *expression); +void print_statement(FILE *out, int indent, const statement_t *statement); +void print_ast(FILE *out, const namespace_t *namespace); +void *allocate_ast(size_t size); + +#endif diff --git a/ast_t.h b/ast_t.h new file mode 100644 index 0000000..415bbc6 --- /dev/null +++ b/ast_t.h @@ -0,0 +1,267 @@ +#ifndef AST_T_H +#define AST_T_H + +#include "ast.h" +#include "symbol.h" +#include "lexer_t.h" +#include "type.h" +#include "adt/obst.h" + +extern struct obstack ast_obstack; + +typedef enum { + EXPR_INVALID = 0, + EXPR_REFERENCE, + EXPR_CONST, + EXPR_STRING_LITERAL, + EXPR_CALL, + EXPR_UNARY, + EXPR_BINARY, + EXPR_SELECT, + EXPR_ARRAY_ACCESS, + EXPR_SIZEOF, +} expresion_type_t; + +struct expression_t { + expresion_type_t type; + type_t *datatype; + source_position_t source_position; +}; + +struct const_t { + expression_t expression; + int value; +}; + +struct string_literal_t { + expression_t expression; + const char *value; +}; + +struct reference_expression_t { + expression_t expression; + symbol_t *symbol; + union { + variable_declaration_statement_t *variable; + method_t *method; + global_variable_t *global_variable; + method_parameter_t *method_parameter; + } r; +}; + +struct call_argument_t { + expression_t *expression; + call_argument_t *next; +}; + +struct call_expression_t { + expression_t expression; + expression_t *method; + call_argument_t *arguments; +}; + +typedef enum { + UNEXPR_INVALID = 0, + UNEXPR_NEGATE, + UNEXPR_BITWISE_NEGATE, + UNEXPR_NOT, + UNEXPR_DEREFERENCE, + UNEXPR_TAKE_ADDRESS, + UNEXPR_POSTFIX_INCREMENT, + UNEXPR_POSTFIX_DECREMENT, + UNEXPR_PREFIX_INCREMENT, + UNEXPR_PREFIX_DECREMENT, + UNEXPR_CAST +} unary_expression_type_t; + +struct unary_expression_t { + expression_t expression; + unary_expression_type_t type; + expression_t *value; +}; + +typedef enum { + BINEXPR_INVALID = 0, + BINEXPR_ADD, + BINEXPR_SUB, + BINEXPR_MUL, + BINEXPR_DIV, + BINEXPR_MOD, + BINEXPR_EQUAL, + BINEXPR_NOTEQUAL, + BINEXPR_LESS, + BINEXPR_LESSEQUAL, + BINEXPR_GREATER, + BINEXPR_GREATEREQUAL, + BINEXPR_BITWISE_AND, + BINEXPR_BITWISE_OR, + BINEXPR_BITWSIE_XOR, + BINEXPR_LOGICAL_AND, + BINEXPR_LOGICAL_OR, + BINEXPR_SHIFTLEFT, + BINEXPR_SHIFTRIGHT, + BINEXPR_ASSIGN, + BINEXPR_MUL_ASSIGN, + BINEXPR_DIV_ASSIGN, + BINEXPR_MOD_ASSIGN, + BINEXPR_ADD_ASSIGN, + BINEXPR_SUB_ASSIGN, + BINEXPR_SHIFTLEFT_ASSIGN, + BINEXPR_SHIFTRIGHT_ASSIGN, + BINEXPR_BITWISE_AND_ASSIGN, + BINEXPR_BITWISE_XOR_ASSIGN, + BINEXPR_BITWISE_OR_ASSIGN +} binary_expression_type_t; + +struct binary_expression_t { + expression_t expression; + binary_expression_type_t type; + expression_t *left; + expression_t *right; +}; + +struct select_expression_t { + expression_t expression; + expression_t *compound; + symbol_t *symbol; + + compound_entry_t *compound_entry; +}; + +struct array_access_expression_t { + expression_t expression; + expression_t *array_ref; + expression_t *index; +}; + +struct sizeof_expression_t { + expression_t expression; + union { + type_t *type; + expression_t *size_expression; + } v; +}; + +struct conditional_expression_t { + expression_t expression; + expression_t *condition; + expression_t *true_expression; + expression_t *false_expression; +}; + +struct expression_list_element_t { + expression_t *expression; + expression_t *next; +}; + +struct comma_expression_t { + expression_t expression; + expression_list_element_t *expressions; +}; + +typedef enum { + STATEMENT_INVALID, + STATEMENT_BLOCK, + STATEMENT_RETURN, + STATEMENT_VARIABLE_DECLARATION, + STATEMENT_IF, + STATEMENT_EXPRESSION, + STATEMENT_GOTO, + STATEMENT_LABEL +} statement_type_t; + +struct statement_t { + statement_type_t type; + statement_t *next; + source_position_t source_position; +}; + +struct return_statement_t { + statement_t statement; + expression_t *return_value; +}; + +struct block_statement_t { + statement_t statement; + statement_t *first_statement; +}; + +struct variable_declaration_statement_t { + statement_t statement; + type_t *type; + symbol_t *symbol; + + int value_number; /**< filled in by semantic phase */ + int refs; +}; + +struct if_statement_t { + statement_t statement; + expression_t *condition; + statement_t *true_statement; + statement_t *false_statement; +}; + +struct goto_statement_t { + statement_t statement; + symbol_t *label_symbol; + label_statement_t *label; +}; + +struct label_statement_t { + statement_t statement; + symbol_t *symbol; +}; + +struct expression_statement_t { + statement_t statement; + expression_t *expression; +}; + +enum namespace_entry_type_t { + NAMESPACE_ENTRY_INVALID, + NAMESPACE_ENTRY_METHOD, + NAMESPACE_ENTRY_VARIABLE, +}; + +struct namespace_entry_t { + namespace_entry_type_t type; + namespace_entry_t *next; + source_position_t source_position; +}; + +struct method_parameter_t { + method_parameter_t *next; + symbol_t *symbol; + type_t *type; + int num; +}; + +struct method_t { + namespace_entry_t namespace_entry; + symbol_t *symbol; + method_type_t *type; + method_parameter_t *parameters; + + statement_t *statement; +}; + +struct global_variable_t { + namespace_entry_t namespace_entry; + symbol_t *symbol; + type_t *type; +}; + +struct namespace_t { + namespace_entry_t *entries; +}; + +static inline +void *_allocate_ast(size_t size) +{ + return obstack_alloc(&ast_obstack, size); +} + +#define allocate_ast(size) _allocate_ast(size) + +#endif diff --git a/config.h b/config.h new file mode 100644 index 0000000..e69de29 diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..acd2ef6 --- /dev/null +++ b/lexer.c @@ -0,0 +1,816 @@ +#include + +#include "lexer_t.h" +#include "token_t.h" +#include "symbol_table_t.h" +#include "adt/error.h" + +#include +#include +#include +#include + +//#define DEBUG_CHARS +#define MAX_PUTBACK 1 + +static +void error_prefix_at(lexer_t *this, const char *input_name, unsigned linenr) +{ + (void) this; + fprintf(stderr, "%s:%d: Error: ", input_name, linenr); +} + +static +void error_prefix(lexer_t *this) +{ + error_prefix_at(this, this->source_position.input_name, + this->source_position.linenr); +} + +static +void parse_error(lexer_t *this, const char *msg) +{ + error_prefix(this); + fprintf(stderr, "%s\n", msg); +} + +static inline +void next_char(lexer_t *this) +{ + this->bufpos++; + if(this->bufpos >= this->bufend) { + size_t s = fread(this->buf + MAX_PUTBACK, 1, + sizeof(this->buf) - MAX_PUTBACK, this->input); + if(s == 0) { + this->c = EOF; + return; + } + this->bufpos = this->buf + MAX_PUTBACK; + this->bufend = this->buf + MAX_PUTBACK + s; + } + this->c = *(this->bufpos); +#ifdef DEBUG_CHARS + printf("nchar '%c'\n", this->c); +#endif +} + +static inline +void put_back(lexer_t *this, int c) +{ + char *p = (char*) this->bufpos - 1; + this->bufpos--; + assert(p >= this->buf); + *p = c; + +#ifdef DEBUG_CHARS + printf("putback '%c'\n", c); +#endif +} + +static +int replace_trigraph(lexer_t *this) +{ +#define MATCH_TRIGRAPH(ch,replacement) \ + case ch: \ + this->c = replacement; \ + return 1; + + switch(this->c) { + MATCH_TRIGRAPH('=', '#') + MATCH_TRIGRAPH('(', '[') + MATCH_TRIGRAPH('/', '\\') + MATCH_TRIGRAPH(')', ']') + MATCH_TRIGRAPH('\'', '^') + MATCH_TRIGRAPH('<', '{') + MATCH_TRIGRAPH('!', '|') + MATCH_TRIGRAPH('>', '}') + MATCH_TRIGRAPH('-', '~') + default: + break; + } + + return 0; +} + +static +void parse_symbol(lexer_t *this, token_t *token) +{ + symbol_t *symbol; + char *string; + + obstack_1grow(&symbol_obstack, this->c); + next_char(this); + + while(1) { + switch(this->c) { + case '\\': + next_char(this); + if(this->c == '\n') { + next_char(this); + this->source_position.linenr++; + break; + } + + case 'A' ... 'Z': + case 'a' ... 'z': + case '_': + obstack_1grow(&symbol_obstack, this->c); + next_char(this); + break; + + case '?': + next_char(this); + if(this->c != '?') { + put_back(this, this->c); + this->c = '?'; + goto end_symbol; + } + next_char(this); + if(replace_trigraph(this)) + break; + put_back(this, '?'); + put_back(this, this->c); + this->c = '?'; + goto end_symbol; + + default: + goto end_symbol; + } + } +end_symbol: + obstack_1grow(&symbol_obstack, '\0'); + + string = obstack_finish(&symbol_obstack); + symbol = symbol_table_insert(string); + + if(symbol->ID > 0) { + token->type = symbol->ID; + } else { + token->type = T_IDENTIFIER; + } + token->v.symbol = symbol; + + if(symbol->string != string) { + obstack_free(&symbol_obstack, string); + } +} + +#if 0 +static +preprocessor_token_type_t parse_pp_symbol(lexer_t *this) +{ + do { + obstack_1grow(&symbol_obstack, this->c); + next_char(this); + } while(is_ident_char(this->c)); + obstack_1grow(&symbol_obstack, '\0'); + + char *string = obstack_finish(&symbol_obstack); + symbol_t *symbol = preprocessor_symbol_table_find(string); + obstack_free(&symbol_obstack, string); + + if(symbol == 0) + return TP_ERROR; + + return symbol->ID; +} +#endif + +static +void parse_number_hex(lexer_t *this, token_t *token) +{ + assert(this->c == 'x' || this->c == 'X'); + next_char(this); + + if (!isdigit(this->c) && + !('A' <= this->c && this->c <= 'F') && + !('a' <= this->c && this->c <= 'f')) { + parse_error(this, "premature end of hex number literal"); + token->type = T_ERROR; + return; + } + + int value = 0; + for(;;) { + if (isdigit(this->c)) { + value = 16 * value + this->c - '0'; + } else if ('A' <= this->c && this->c <= 'F') { + value = 16 * value + this->c - 'A' + 10; + } else if ('a' <= this->c && this->c <= 'f') { + value = 16 * value + this->c - 'a' + 10; + } else { + token->type = T_INTEGER; + token->v.intvalue = value; + return; + } + next_char(this); + } +} + +static +void parse_number_oct(lexer_t *this, token_t *token) +{ + assert(this->c == 'o' || this->c == 'O'); + next_char(this); + + int value = 0; + for(;;) { + if ('0' <= this->c && this->c <= '7') { + value = 8 * value + this->c - '0'; + } else { + token->type = T_INTEGER; + token->v.intvalue = value; + return; + } + next_char(this); + } +} + +static +void parse_number_dec(lexer_t *this, token_t *token, int first_char) +{ + int value = 0; + if(first_char > 0) { + assert(first_char >= '0' && first_char <= '9'); + value = first_char - '0'; + } + + for(;;) { + if (isdigit(this->c)) { + value = 10 * value + this->c - '0'; + } else { + token->type = T_INTEGER; + token->v.intvalue = value; + return; + } + next_char(this); + } +} + +static +void parse_number(lexer_t *this, token_t *token) +{ + // TODO check for overflow + // TODO check for various invalid inputs sequences + + if (this->c == '0') { + next_char(this); + switch (this->c) { + case 'X': + case 'x': parse_number_hex(this, token); break; + case 'o': + case 'O': parse_number_oct(this, token); break; + default: parse_number_dec(this, token, '0'); + } + } else { + parse_number_dec(this, token, 0); + } +} + +static +int parse_escape_sequence(lexer_t *this) +{ + int c = this->c; + next_char(this); + + switch(c) { + case '"': return '"'; + case '\'': return'\''; + case '\\': + if(this->c == '\n') { + this->source_position.linenr++; + next_char(this); + return parse_escape_sequence(this); + } + return '\\'; + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + case 'x': /* TODO parse hex number ... */ + parse_error(this, "hex escape sequences not implemented yet"); + return EOF; + case 0 ... 8: /* TODO parse octal number ... */ + parse_error(this, "octal escape sequences not implemented yet"); + return EOF; + case '?': + if(this->c != '?') { + return '?'; + } + /* might be a trigraph */ + next_char(this); + if(replace_trigraph(this)) { + return parse_escape_sequence(this); + } + put_back(this, '?'); + return '?'; + + case EOF: + parse_error(this, "reached end of file while parsing escape sequence"); + return EOF; + default: + parse_error(this, "unknown escape sequence\n"); + return EOF; + } +} + +static +void parse_string_literal(lexer_t *this, token_t *token) +{ + unsigned start_linenr = this->source_position.linenr; + char *string; + const char *result; + + assert(this->c == '"'); + next_char(this); + + while(1) { + switch(this->c) { + case '?': + next_char(this); + if(this->c != '?') { + obstack_1grow(&symbol_obstack, '?'); + break; + } + next_char(this); + if(replace_trigraph(this)) + break; + obstack_1grow(&symbol_obstack, '?'); + put_back(this, this->c); + this->c = '?'; + break; + + case '\\': + next_char(this); + if(this->c == '\n') { + next_char(this); + this->source_position.linenr++; + break; + } + int c = parse_escape_sequence(this); + obstack_1grow(&symbol_obstack, c); + break; + + case EOF: + error_prefix_at(this, this->source_position.input_name, + start_linenr); + fprintf(stderr, "string has no end\n"); + token->type = T_ERROR; + return; + + case '"': + next_char(this); + goto end_of_string; + + default: + obstack_1grow(&symbol_obstack, this->c); + next_char(this); + break; + } + } + +end_of_string: + + /* TODO: concatenate multiple strings separated by whitespace... */ + + /* add finishing 0 to the string */ + obstack_1grow(&symbol_obstack, '\0'); + string = obstack_finish(&symbol_obstack); + + /* check if there is already a copy of the string */ + result = strset_insert(&this->stringset, string); + if(result != string) { + obstack_free(&symbol_obstack, string); + } + + token->type = T_STRING_LITERAL; + token->v.string = result; +} + +static +void skip_multiline_comment(lexer_t *this) +{ + unsigned start_linenr = this->source_position.linenr; + + while(1) { + switch(this->c) { + case '*': + next_char(this); + if(this->c == '/') { + next_char(this); + return; + } + break; + case '?': + next_char(this); + if(this->c != '?') + break; + next_char(this); + if(replace_trigraph(this)) + break; + put_back(this, '?'); + /* we don't put back the 2nd ? as the comment text is discarded + * anyway */ + break; + + case '\n': + this->source_position.linenr++; + next_char(this); + break; + case EOF: + error_prefix_at(this, this->source_position.input_name, + start_linenr); + fprintf(stderr, "at end of file while looking for comment end\n"); + return; + default: + next_char(this); + break; + } + } +} + +static +void skip_line_comment(lexer_t *this) +{ + while(1) { + switch(this->c) { + case '?': + next_char(this); + if(this->c != '?') + break; + next_char(this); + if(replace_trigraph(this)) + break; + put_back(this, '?'); + /* we don't put back the 2nd ? as the comment text is discarded + * anyway */ + break; + + case '\\': + next_char(this); + if(this->c == '\n') { + next_char(this); + this->source_position.linenr++; + } + break; + + case EOF: + case '\n': + return; + default: + next_char(this); + break; + } + } +} + +#if 0 +static +void eat_until_newline(lexer_t *this) +{ + while(this->c != '\n') { + next_char(this); + } +} +#endif + +#if 0 +static +void parse_preprocessor_directive(lexer_t *this, token_t *result_token) +{ + (void) result_token; + /* skip whitespaces */ + while(this->c == ' ' || this->c == '\t' || this->c == '\r') { + next_char(this); + } +} +#endif + +void preprocessor_next_token(lexer_t *this, token_t *token) +{ + /* skip whitespaces */ + while(this->c == ' ' || this->c == '\t' || this->c == '\r') { + next_char(this); + } + + switch(this->c) { + case 'A' ... 'Z': + case 'a' ... 'z': + case '_': + parse_symbol(this, token); + } +} + +void lexer_next_token(lexer_t *this, token_t *token) +{ + int line_begin = 0; + + /* skip whitespaces */ + while(this->c == ' ' || this->c == '\t' || this->c == '\n' + || this->c == '\r') { + if(this->c == '\n') { + line_begin = 1; + this->source_position.linenr++; + } + next_char(this); + } + + switch(this->c) { + case 'A' ... 'Z': + case 'a' ... 'z': + case '_': + parse_symbol(this, token); + break; + + case '0' ... '9': + parse_number(this, token); + break; + + case '"': + parse_string_literal(this, token); + break; + + case '\'': + next_char(this); + if(this->c == '\\') { + next_char(this); + token->type = T_INTEGER; + token->v.intvalue = parse_escape_sequence(this); + } else { + if(this->c == '\n') { + parse_error(this, "newline while parsing character constant"); + this->source_position.linenr++; + } + token->type = T_INTEGER; + token->v.intvalue = this->c; + next_char(this); + } + if(this->c != '\'') { + parse_error(this, "multibyte character constant"); + token->type = T_ERROR; + } else { + next_char(this); + } + break; + + case '\\': + next_char(this); + if(this->c == '\n') { + next_char(this); + this->source_position.linenr++; + lexer_next_token(this, token); + return; + } else { + parse_error(this, "unexpected '\\' found"); + token->type = T_ERROR; + } + break; + +#define MAYBE1(ch, set_type) \ + next_char(this); \ + while(1) { \ + switch(this->c) { \ + case ch: \ + next_char(this); \ + token->type = set_type; \ + return; \ + +#define MAYBE(ch, set_type) \ + case ch: \ + next_char(this); \ + token->type = set_type; \ + return; + +#define ELSE(set_type) \ + case '?': \ + next_char(this); \ + if(this->c != '?') { \ + put_back(this, this->c); \ + this->c = '?'; \ + token->type = set_type; \ + return; \ + } \ + next_char(this); \ + if(replace_trigraph(this)) \ + break; \ + put_back(this, '?'); \ + put_back(this, this->c); \ + this->c = '?'; \ + token->type = set_type; \ + return; \ + \ + case '\\': \ + next_char(this); \ + if(this->c == '\n') { \ + next_char(this); \ + this->source_position.linenr++; \ + break; \ + } \ + /* fallthrough */ \ + default: \ + token->type = set_type; \ + return; \ + } \ + } /* end of while(1) */ \ + break; + + case '.': + next_char(this); + if(this->c == '.') { + next_char(this); + if(this->c == '.') { + next_char(this); + token->type = T_DOTDOTDOT; + } else { + put_back(this, '.'); + token->type = '.'; + } + } else { + token->type = '.'; + } + break; + case '&': + MAYBE1('&', T_ANDAND) + MAYBE('=', T_ANDEQUAL) + ELSE('&') + case '*': + MAYBE1('=', T_ASTERISKEQUAL) + ELSE('*') + case '+': + MAYBE1('+', T_PLUSPLUS) + MAYBE('=', T_PLUSEQUAL) + ELSE('+') + case '-': + MAYBE1('-', T_MINUSMINUS) + MAYBE('=', T_MINUSEQUAL) + ELSE('-') + case '!': + MAYBE1('=', T_EXCLAMATIONMARKEQUAL) + ELSE('!') + case '/': + MAYBE1('=', T_SLASHEQUAL) + case '*': + next_char(this); + skip_multiline_comment(this); + lexer_next_token(this, token); + return; + case '/': + next_char(this); + skip_line_comment(this); + lexer_next_token(this, token); + return; + ELSE('/') + case '%': + MAYBE1('=', T_PERCENTEQUAL) + case ':': + /* TODO find trigraphs... */ + next_char(this); + if(this->c == '%') { + next_char(this); + if(this->c == ':') { + next_char(this); + token->type = T_PERCENTCOLONPERCENTCOLON; + } else { + put_back(this, '%'); + token->type = T_PERCENTCOLON; + } + return; + } + token->type = T_PERCENTCOLON; + return; + MAYBE('>', T_PERCENTGREATER) + ELSE('%') + case '<': + MAYBE1(':', T_LESSCOLON) + MAYBE('%', T_LESSPERCENT) + case '<': + /* TODO trigraphs... */ + next_char(this); + if(this->c == '<') { + next_char(this); + if(this->c == '=') { + next_char(this); + token->type = T_LESSLESSEQUAL; + } else { + token->type = T_LESSLESS; + } + } else { + token->type = T_LESS; + } + return; + ELSE('<') + case '>': + next_char(this); + while(1) { + switch(this->c) { + case '>': + next_char(this); + /* TODO trigraphs */ + if(this->c == '=') { + next_char(this); + token->type = T_GREATERGREATEREQUAL; + } else { + token->type = T_GREATERGREATER; + } + break; + ELSE('>') + case '^': + MAYBE1('=', T_CARETEQUAL) + ELSE('^') + case '|': + MAYBE1('=', T_PIPEEQUAL) + MAYBE('|', T_PIPEPIPE) + ELSE('|') + case ':': + MAYBE1('>', T_COLONGREATER) + ELSE(':') + case '=': + MAYBE1('=', T_EQUALEQUAL) + ELSE('=') + case '#': + MAYBE1('#', T_HASHHASH) +#if 0 + else { + if(line_begin) { + parse_preprocessor_directive(this, token); + return; + } else { + token->type = '#'; + } +#else + ELSE('#') +#endif + + case '?': + next_char(this); + /* just a simple ? */ + if(this->c != '?') { + token->type = '?'; + break; + } + /* might be a trigraph */ + next_char(this); + if(replace_trigraph(this)) { + lexer_next_token(this, token); + return; + } + put_back(this, this->c); + this->c = '?'; + token->type = '?'; + break; + + case '[': + case ']': + case '(': + case ')': + case '{': + case '}': + case '~': + case ';': + case ',': + token->type = this->c; + next_char(this); + break; + + case EOF: + token->type = T_EOF; + break; + + default: + error_prefix(this); + fprintf(stderr, "unknown character '%c' found\n", this->c); + token->type = T_ERROR; + next_char(this); + break; + } +} + +void lexer_init(lexer_t *this, FILE *stream, const char *input_name) +{ + memset(this, 0, sizeof(this[0])); + + this->input = stream; + + this->source_position.linenr = 0; + this->source_position.input_name = input_name; + strset_init(&this->stringset); + + /* we place a virtual '\n' at the beginning so the lexer knows we're at the + * beginning of a line */ + this->c = '\n'; +} + +void lexer_destroy(lexer_t *this) +{ + (void) this; +} + +static __attribute__((unused)) +void dbg_pos(const source_position_t source_position) +{ + fprintf(stdout, "%s:%d\n", source_position.input_name, source_position.linenr); + fflush(stdout); +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..f22f445 --- /dev/null +++ b/lexer.h @@ -0,0 +1,11 @@ +#ifndef LEXER_H +#define LEXER_H + +#include "symbol_table_t.h" +#include "token_t.h" + +typedef struct lexer_t lexer_t; + +void lexer_next_token(lexer_t *lexer, token_t *token); + +#endif diff --git a/lexer_t.h b/lexer_t.h new file mode 100644 index 0000000..f4da9b2 --- /dev/null +++ b/lexer_t.h @@ -0,0 +1,33 @@ +#ifndef LEXER_T_H +#define LEXER_T_H + +#include "lexer.h" + +#include +#include "symbol_table_t.h" +#include "adt/obst.h" +#include "adt/strset.h" + +#define MAX_INDENT 256 + +typedef struct source_position_t source_position_t; +struct source_position_t { + const char *input_name; + unsigned linenr; +}; + +struct lexer_t { + int c; + source_position_t source_position; + FILE *input; + char buf[1024]; + const char *bufend; + const char *bufpos; + strset_t stringset; +}; + +void lexer_init(lexer_t *lexer, FILE *stream, const char *input_name); + +void lexer_destroy(lexer_t *lexer); + +#endif diff --git a/lextest/legalc/comment.c b/lextest/legalc/comment.c new file mode 100644 index 0000000..e1c4fb9 --- /dev/null +++ b/lextest/legalc/comment.c @@ -0,0 +1,17 @@ +#include + +int main() +{ + // a comment\ + printf("don't print me\n"); + + // blo \??= \??/ + printf("don't print me too\n"); + + // another comment???/ + printf("don't print me 3\n"); + + /* *??/ +/ printf("print me\n"); + return 0; +} diff --git a/lextest/legalc/operators.c b/lextest/legalc/operators.c new file mode 100644 index 0000000..89f44cf --- /dev/null +++ b/lextest/legalc/operators.c @@ -0,0 +1,6 @@ +#include + +int main(int argc, char **argv) +{ + +} diff --git a/lextest/legalc/splicetest.c b/lextest/legalc/splicetest.c new file mode 100644 index 0000000..6591c9e --- /dev/null +++ b/lextest/legalc/splicetest.c @@ -0,0 +1,11 @@ +#i\ +n??/ +clude + +int main() +{ + printf("gut\??/ +n"); + return 0; +} diff --git a/lextest/legalc/string.c b/lextest/legalc/string.c new file mode 100644 index 0000000..f2bbc1c --- /dev/null +++ b/lextest/legalc/string.c @@ -0,0 +1,9 @@ +#include + +int main() +{ + printf("String "// Hello /* +/*world*/" ??/"World\"\n"); + printf("Sharp: \???=\n"); + return 0; +} diff --git a/lextest/legalc/test1.c b/lextest/legalc/test1.c new file mode 100644 index 0000000..dabd91a --- /dev/null +++ b/lextest/legalc/test1.c @@ -0,0 +1,3 @@ +0x3<1/a.h>1e2 +#include <1/a.h> +#define const.member@$ diff --git a/lextest/legalc/test2.c b/lextest/legalc/test2.c new file mode 100644 index 0000000..46b1a37 --- /dev/null +++ b/lextest/legalc/test2.c @@ -0,0 +1,13 @@ +"a//b" +#include "//e" +// */ +f = g/**//h; +//\ +i(); +/\ +/ j(); +#define glue(x,y) x##y +glue(/,/) k(); +/*//*/ 1(); +m = n//**/o + + p; diff --git a/lextest/legalc/trigraphs.c b/lextest/legalc/trigraphs.c new file mode 100644 index 0000000..0d9ec3c --- /dev/null +++ b/lextest/legalc/trigraphs.c @@ -0,0 +1,16 @@ +#include + +int main() +{ + int i = 0; + i +??/ += 2; + + printf("Result: %d\n", i); + printf("Newline???/??/ +n"); + printf("A backslash: '%c'\n", '\??/ +\'); + printf("??= ??( ??/\ ??) ??' ??< ??! ??> ??- ??? \n"); + return 0; +} diff --git a/lextest/tokenstreams/operators b/lextest/tokenstreams/operators new file mode 100644 index 0000000..ea6f781 --- /dev/null +++ b/lextest/tokenstreams/operators @@ -0,0 +1,12 @@ ++++++ +... +. . . +.... ++??/ ++ +<\ +<\ += +>??/ +>??/ += diff --git a/lextest/tokenstreams/stringtrigraphs b/lextest/tokenstreams/stringtrigraphs new file mode 100644 index 0000000..0a1d82e --- /dev/null +++ b/lextest/tokenstreams/stringtrigraphs @@ -0,0 +1,12 @@ +"bla?" +"bla??" +"bla???" +"bla??/n" +"bla???/n" +"bla????/n" +"bla??/ +" +"bla???/ +" +"bla????/ +" diff --git a/lextest/tokenstreams/t b/lextest/tokenstreams/t new file mode 100644 index 0000000..2d6d6ac --- /dev/null +++ b/lextest/tokenstreams/t @@ -0,0 +1,3 @@ +symbo??? +symbo?? +symbo? diff --git a/lextest/tokenstreams/t2 b/lextest/tokenstreams/t2 new file mode 100644 index 0000000..a1d44d5 --- /dev/null +++ b/lextest/tokenstreams/t2 @@ -0,0 +1 @@ +?? diff --git a/lextest/tokenstreams/t3 b/lextest/tokenstreams/t3 new file mode 100644 index 0000000..6272e43 --- /dev/null +++ b/lextest/tokenstreams/t3 @@ -0,0 +1,3 @@ +"??? ?? ?" +'?' +'??=' diff --git a/lextest/warnings/h1.h b/lextest/warnings/h1.h new file mode 100644 index 0000000..f814582 --- /dev/null +++ b/lextest/warnings/h1.h @@ -0,0 +1 @@ +#include "h2.h" diff --git a/lextest/warnings/h10.h b/lextest/warnings/h10.h new file mode 100644 index 0000000..5815cb1 --- /dev/null +++ b/lextest/warnings/h10.h @@ -0,0 +1 @@ +#include "h11.h" diff --git a/lextest/warnings/h11.h b/lextest/warnings/h11.h new file mode 100644 index 0000000..199386b --- /dev/null +++ b/lextest/warnings/h11.h @@ -0,0 +1 @@ +#include "h12.h" diff --git a/lextest/warnings/h12.h b/lextest/warnings/h12.h new file mode 100644 index 0000000..f38ad90 --- /dev/null +++ b/lextest/warnings/h12.h @@ -0,0 +1 @@ +#include "h13.h" diff --git a/lextest/warnings/h13.h b/lextest/warnings/h13.h new file mode 100644 index 0000000..70b4d71 --- /dev/null +++ b/lextest/warnings/h13.h @@ -0,0 +1 @@ +#include "h14.h" diff --git a/lextest/warnings/h14.h b/lextest/warnings/h14.h new file mode 100644 index 0000000..749cc53 --- /dev/null +++ b/lextest/warnings/h14.h @@ -0,0 +1 @@ +#include "h15.h" diff --git a/lextest/warnings/h15.h b/lextest/warnings/h15.h new file mode 100644 index 0000000..14fb4dd --- /dev/null +++ b/lextest/warnings/h15.h @@ -0,0 +1 @@ +#include "h16.h" diff --git a/lextest/warnings/h16.h b/lextest/warnings/h16.h new file mode 100644 index 0000000..e69de29 diff --git a/lextest/warnings/h2.h b/lextest/warnings/h2.h new file mode 100644 index 0000000..02465eb --- /dev/null +++ b/lextest/warnings/h2.h @@ -0,0 +1 @@ +#include "h3.h" diff --git a/lextest/warnings/h3.h b/lextest/warnings/h3.h new file mode 100644 index 0000000..a65c928 --- /dev/null +++ b/lextest/warnings/h3.h @@ -0,0 +1 @@ +#include "h4.h" diff --git a/lextest/warnings/h4.h b/lextest/warnings/h4.h new file mode 100644 index 0000000..3f827cd --- /dev/null +++ b/lextest/warnings/h4.h @@ -0,0 +1 @@ +#include "h5.h" diff --git a/lextest/warnings/h5.h b/lextest/warnings/h5.h new file mode 100644 index 0000000..6306741 --- /dev/null +++ b/lextest/warnings/h5.h @@ -0,0 +1 @@ +#include "h6.h" diff --git a/lextest/warnings/h6.h b/lextest/warnings/h6.h new file mode 100644 index 0000000..7690ef6 --- /dev/null +++ b/lextest/warnings/h6.h @@ -0,0 +1 @@ +#include "h7.h" diff --git a/lextest/warnings/h7.h b/lextest/warnings/h7.h new file mode 100644 index 0000000..7675012 --- /dev/null +++ b/lextest/warnings/h7.h @@ -0,0 +1 @@ +#include "h8.h" diff --git a/lextest/warnings/h8.h b/lextest/warnings/h8.h new file mode 100644 index 0000000..850b321 --- /dev/null +++ b/lextest/warnings/h8.h @@ -0,0 +1 @@ +#include "h9.h" diff --git a/lextest/warnings/h9.h b/lextest/warnings/h9.h new file mode 100644 index 0000000..3cc4374 --- /dev/null +++ b/lextest/warnings/h9.h @@ -0,0 +1 @@ +#include "h10.h" diff --git a/main.c b/main.c new file mode 100644 index 0000000..81d4760 --- /dev/null +++ b/main.c @@ -0,0 +1,73 @@ +#include + +#include +#include +#include +#include + +#include "lexer_t.h" +#include "token_t.h" + +#if 0 +static +void get_output_name(char *buf, size_t buflen, const char *inputname, + const char *newext) +{ + size_t last_dot = 0xffffffff; + size_t i = 0; + for(const char *c = inputname; *c != 0; ++c) { + if(*c == '.') + last_dot = i; + ++i; + } + if(last_dot == 0xffffffff) + last_dot = i; + + if(last_dot >= buflen) + panic("filename too long"); + memcpy(buf, inputname, last_dot); + + size_t extlen = strlen(newext) + 1; + if(extlen + last_dot >= buflen) + panic("filename too long"); + memcpy(buf+last_dot, newext, extlen); +} +#endif + +static +void compile(const char *fname) +{ + lexer_t lexer; + token_t token; + + FILE *in = fopen(fname, "r"); + if(in == NULL) { + fprintf(stderr, "Couldn't open '%s': %s\n", fname, strerror(errno)); + exit(1); + } + + lexer_init(&lexer, in, fname); + + do { + lexer_next_token(&lexer, &token); + print_token(stdout, &token); + puts(""); + } while(token.type != T_EOF); + + lexer_destroy(&lexer); + fclose(in); +} + +int main(int argc, char **argv) +{ + init_symbol_table(); + init_tokens(); + + for(int i = 1; i < argc; ++i) { + compile(argv[i]); + } + + exit_tokens(); + exit_symbol_table(); + return 0; +} diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..a5628b4 --- /dev/null +++ b/parser.c @@ -0,0 +1,376 @@ +#include + +#include + +#include "lexer_t.h" +#include "token_t.h" +#include "type_t.h" +#include "ast_t.h" +#include "adt/bitfiddle.h" + +#define PRINT_TOKENS + +static lexer_t lexer; +static token_t token; + +static inline +void next_token() +{ + lexer_next_token(&lexer, &token); + +#ifdef PRINT_TOKENS + print_token(stderr, &token); + fprintf(stderr, "\n"); +#endif +} + +static inline +void eat(token_type_t type) +{ + assert(token.type == type); + next_token(); +} + +void parser_print_error_prefix() +{ + fputs(lexer.source_position.input_name, stderr); + fputc(':', stderr); + fprintf(stderr, "%d", lexer.source_position.linenr); + fputs(": error: ", stderr); +} + +static +void parse_error(const char *message) +{ + parser_print_error_prefix(); + fprintf(stderr, "parse error: %s\n", message); +} + +#define expect(expected) \ + if(UNLIKELY(token.type != (expected))) { \ + /*parse_error_expected(NULL, (expected), 0);*/ \ + /*eat_until_semi();*/ \ + return NULL; \ + } \ + next_token(); + +typedef enum { + SPECIFIER_SIGNED = 1 << 0, + SPECIFIER_UNSIGNED = 1 << 1, + SPECIFIER_LONG = 1 << 2, + SPECIFIER_INT = 1 << 3, + SPECIFIER_DOUBLE = 1 << 4, + SPECIFIER_CHAR = 1 << 5, + SPECIFIER_SHORT = 1 << 6, + SPECIFIER_LONG_LONG = 1 << 7, + SPECIFIER_FLOAT = 1 << 8, + SPECIFIER_BOOL = 1 << 9, + SPECIFIER_VOID = 1 << 10, +#ifdef PROVIDE_COMPLEX + SPECIFIER_COMPLEX = 1 << 11, +#endif +#ifdef PROVIDE_IMAGINARY + SPECIFIER_IMAGINARY = 1 << 12, +#endif +} specifiers_t; + +typedef enum { + TYPE_QUALIFIER_CONST = 1 << 0, + TYPE_QUALIFIER_RESTRICT = 1 << 1, + TYPE_QUALIFIER_VOLATILE = 1 << 2, + TYPE_QUALIFIER_INLINE = 1 << 3, +} type_qualifier_t; + +typedef enum { + STORAGE_CLASS_NONE, + STORAGE_CLASS_TYPEDEF, + STORAGE_CLASS_EXTERN, + STORAGE_CLASS_STATIC, + STORAGE_CLASS_AUTO, + STORAGE_CLASS_REGISTER +} storage_class_t; + +typedef struct declaration_specifiers_t declaration_specifiers_t; +struct declaration_specifiers_t { + storage_class_t storage_class; + int type_qualifiers; +}; + +static +void parse_declaration_specifiers(declaration_specifiers_t *specifiers) +{ + type_type_t type_type = TYPE_INVALID; + atomic_type_type_t atomic_type = ATOMIC_TYPE_INVALID; + unsigned type_specifiers = 0; + + while(1) { + switch(token.type) { + + /* storage class */ +#define MATCH_STORAGE_CLASS(token, class) \ + case token: \ + if(specifiers->storage_class != STORAGE_CLASS_NONE) { \ + parse_error("multiple storage classes in declaration " \ + "specifiers"); \ + } \ + specifiers->storage_class = class; \ + next_token(); \ + break; + + MATCH_STORAGE_CLASS(T_typedef, STORAGE_CLASS_TYPEDEF) + MATCH_STORAGE_CLASS(T_extern, STORAGE_CLASS_EXTERN) + MATCH_STORAGE_CLASS(T_static, STORAGE_CLASS_STATIC) + MATCH_STORAGE_CLASS(T_auto, STORAGE_CLASS_AUTO) + MATCH_STORAGE_CLASS(T_register, STORAGE_CLASS_REGISTER) + + /* type qualifiers */ +#define MATCH_TYPE_QUALIFIER(token, qualifier) \ + case token: \ + specifiers->type_qualifiers |= qualifier; \ + next_token(); \ + break; + + MATCH_TYPE_QUALIFIER(T_const, TYPE_QUALIFIER_CONST); + MATCH_TYPE_QUALIFIER(T_restrict, TYPE_QUALIFIER_RESTRICT); + MATCH_TYPE_QUALIFIER(T_volatile, TYPE_QUALIFIER_VOLATILE); + MATCH_TYPE_QUALIFIER(T_inline, TYPE_QUALIFIER_INLINE); + + /* type specifiers */ +#define MATCH_SPECIFIER(token, specifier, name) \ + case token: \ + next_token(); \ + if(type_specifiers & specifier) { \ + parse_error("multiple " name " type specifiers given"); \ + } else { \ + type_specifiers |= specifier; \ + } \ + break; + + MATCH_SPECIFIER(T_void, SPECIFIER_VOID, "void") + MATCH_SPECIFIER(T_char, SPECIFIER_CHAR, "char") + MATCH_SPECIFIER(T_short, SPECIFIER_SHORT, "short") + MATCH_SPECIFIER(T_int, SPECIFIER_INT, "int") + MATCH_SPECIFIER(T_float, SPECIFIER_FLOAT, "float") + MATCH_SPECIFIER(T_double, SPECIFIER_DOUBLE, "double") + MATCH_SPECIFIER(T_signed, SPECIFIER_SIGNED, "signed") + MATCH_SPECIFIER(T_unsigned, SPECIFIER_UNSIGNED, "unsigned") + MATCH_SPECIFIER(T__Bool, SPECIFIER_BOOL, "_Bool") +#ifdef PROVIDE_COMPLEX + MATCH_SPECIFIER(T__Complex, SPECIFIER_COMPLEX, "_Complex") +#endif +#ifdef PROVIDE_IMAGINARY + MATCH_SPECIFIER(T__Imaginary, SPECIFIER_IMAGINARY, "_Imaginary") +#endif + case T_long: + next_token(); + if(type_specifiers & SPECIFIER_LONG_LONG) { + parse_error("too many long type specifiers given"); + } else if(type_specifiers & SPECIFIER_LONG) { + type_specifiers |= SPECIFIER_LONG_LONG; + } else { + type_specifiers |= SPECIFIER_LONG; + } + break; + + /* struct or union specifier */ + /* enum specifier */ + /* typedef name */ + + /* function specifier */ + default: + return;; + } + } + + if(type_type == TYPE_INVALID) { + /* match valid basic types */ + switch(type_specifiers) { + case SPECIFIER_VOID: + atomic_type = ATOMIC_TYPE_VOID; + break; + case SPECIFIER_CHAR: + atomic_type = ATOMIC_TYPE_CHAR; + break; + case SPECIFIER_SIGNED | SPECIFIER_CHAR: + atomic_type = ATOMIC_TYPE_SCHAR; + break; + case SPECIFIER_UNSIGNED | SPECIFIER_CHAR: + atomic_type = ATOMIC_TYPE_UCHAR; + break; + case SPECIFIER_SHORT: + case SPECIFIER_SIGNED | SPECIFIER_SHORT: + case SPECIFIER_SHORT | SPECIFIER_INT: + case SPECIFIER_SIGNED | SPECIFIER_SHORT | SPECIFIER_INT: + atomic_type = ATOMIC_TYPE_SHORT; + break; + case SPECIFIER_UNSIGNED | SPECIFIER_SHORT: + case SPECIFIER_UNSIGNED | SPECIFIER_SHORT | SPECIFIER_INT: + atomic_type = ATOMIC_TYPE_USHORT; + break; + case SPECIFIER_INT: + case SPECIFIER_SIGNED: + case SPECIFIER_SIGNED | SPECIFIER_INT: + atomic_type = ATOMIC_TYPE_INT; + break; + case SPECIFIER_UNSIGNED: + case SPECIFIER_UNSIGNED | SPECIFIER_INT: + atomic_type = ATOMIC_TYPE_UINT; + break; + case SPECIFIER_LONG: + case SPECIFIER_SIGNED | SPECIFIER_LONG: + case SPECIFIER_LONG | SPECIFIER_INT: + case SPECIFIER_SIGNED | SPECIFIER_LONG | SPECIFIER_INT: + atomic_type = ATOMIC_TYPE_LONG; + break; + case SPECIFIER_UNSIGNED | SPECIFIER_LONG: + case SPECIFIER_UNSIGNED | SPECIFIER_LONG | SPECIFIER_INT: + atomic_type = ATOMIC_TYPE_ULONG; + break; + case SPECIFIER_LONG_LONG: + case SPECIFIER_SIGNED | SPECIFIER_LONG_LONG: + case SPECIFIER_LONG_LONG | SPECIFIER_INT: + case SPECIFIER_SIGNED | SPECIFIER_LONG_LONG | SPECIFIER_INT: + atomic_type = ATOMIC_TYPE_LONGLONG; + break; + case SPECIFIER_UNSIGNED | SPECIFIER_LONG_LONG: + case SPECIFIER_UNSIGNED | SPECIFIER_LONG_LONG | SPECIFIER_INT: + atomic_type = ATOMIC_TYPE_ULONGLONG; + break; + case SPECIFIER_FLOAT: + atomic_type = ATOMIC_TYPE_FLOAT; + break; + case SPECIFIER_DOUBLE: + atomic_type = ATOMIC_TYPE_DOUBLE; + break; + case SPECIFIER_LONG | SPECIFIER_DOUBLE: + atomic_type = ATOMIC_TYPE_LONG_DOUBLE; + break; + case SPECIFIER_BOOL: + atomic_type = ATOMIC_TYPE_BOOL; + break; + #ifdef PROVIDE_COMPLEX + case SPECIFIER_FLOAT | SPECIFIER_COMPLEX: + atomic_type = ATOMIC_TYPE_FLOAT_COMPLEX; + break; + case SPECIFIER_DOUBLE | SPECIFIER_COMPLEX: + atomic_type = ATOMIC_TYPE_DOUBLE_COMPLEX; + break; + case SPECIFIER_LONG | SPECIFIER_DOUBLE | SPECIFIER_COMPLEX: + atomic_type = ATOMIC_TYPE_LONG_DOUBLE_COMPLEX; + break; + #endif + #ifdef PROVIDE_IMAGINARY + case SPECIFIER_FLOAT | SPECIFIER_IMAGINARY: + atomic_type = ATOMIC_TYPE_FLOAT_IMAGINARY; + break; + case SPECIFIER_DOUBLE | SPECIFIER_IMAGINARY: + atomic_type = ATOMIC_TYPE_DOUBLE_IMAGINARY; + break; + case SPECIFIER_LONG | SPECIFIER_DOUBLE | SPECIFIER_IMAGINARY: + atomic_type = ATOMIC_TYPE_LONG_DOUBLE_IMAGINARY; + break; + #endif + default: + /* invalid specifier combination, give an error message */ + if(type_specifiers == 0) { + parse_error("no type specifiers given in declaration"); + } else if((type_specifiers & SPECIFIER_SIGNED) && + (type_specifiers & SPECIFIER_UNSIGNED)) { + parse_error("signed and unsigned specifiers gives"); + } else if(type_specifiers & (SPECIFIER_SIGNED | SPECIFIER_UNSIGNED)) { + parse_error("only integer types can be signed or unsigned"); + } else { + parse_error("multiple datatypes in declaration"); + } + } + } else { + if(type_specifiers != 0) { + parse_error("multiple datatypes in declaration"); + } + } +} + +typedef struct declarator_t declarator_t; +struct declarator_t { + /* pointer stuff... */ + symbol_t *symbol; + + declarator_t *next; +}; + +declarator_t *parse_declarator() +{ + while(token.type == '*') { + /* pointer */ + next_token(); + //parse_type_qualifiers(); + } + + declarator_t *declarator; + + switch(token.type) { + case T_IDENTIFIER: + declarator = allocate_ast(sizeof(declarator[0])); + memset(declarator, 0, sizeof(declarator[0])); + declarator->symbol = token.v.symbol; + return declarator; + case '(': + next_token(); + declarator = parse_declarator(); + expect(')') + return declarator; + default: + parse_error("problem while parsing declarator"); + } + + if(token.type == '(') { + next_token(); + + /* parse parameter-type-list or identifier-list */ + + expect(')'); + } else if(token.type == '[') { + next_token(); + + /* multiple type qualifiers, and static */ + + /* assignment_expression or '*' or nothing */ + + expect(']'); + } + + return declarator; +} + +declarator_t *parse_init_declarator() +{ + declarator_t *declarator = parse_declarator(); + if(token.type == '=') { + next_token(); + //parse_initialize(); + } + + return declarator; +} + +typedef struct declaration_t declaration_t; +struct declaration_t { + declaration_specifiers_t specifiers; + declaration_t *declarators; +}; + +void parse_declaration() +{ + declaration_specifiers_t specifiers; + memset(&specifiers, 0, sizeof(specifiers)); + parse_declaration_specifiers(&specifiers); +} + +#if 0 +namespace_t *parse(FILE *in, const char *input_name) +{ + namespace_t *namespace = parse_namespace(); + + return namespace; +} +#endif diff --git a/preprocessor_tokens.inc b/preprocessor_tokens.inc new file mode 100644 index 0000000..f1760ca --- /dev/null +++ b/preprocessor_tokens.inc @@ -0,0 +1,29 @@ +#ifndef TS +#define TS(x,str,val) +#endif + +TS(IDENTIFIER, "identifier", = 256) +TS(INTEGER, "integer number",) +TS(STRING_LITERAL, "string literal",) + +#define S(x) T(x,#x,) +S(include) +S(define) +S(undef) +S(line) +S(error) +S(pragma) +S(if) +S(else) +S(elif) +S(endif) +S(ifdef) +S(ifndef) +#undef S + +T(DOTDOTDOT, "...",) + +#define T_LAST_TOKEN (T_DOTDOTDOT+1) + +T(LPAREN, "(", = '(') +T(RPAREN, ")", = ')') diff --git a/symbol.h b/symbol.h new file mode 100644 index 0000000..413319b --- /dev/null +++ b/symbol.h @@ -0,0 +1,11 @@ +#ifndef SYMBOL_H +#define SYMBOL_H + +typedef struct symbol_t symbol_t; + +struct symbol_t { + const char *string; + unsigned ID; +}; + +#endif diff --git a/symbol_table.c b/symbol_table.c new file mode 100644 index 0000000..c422e0c --- /dev/null +++ b/symbol_table.c @@ -0,0 +1,68 @@ +#include + +#include "symbol_table_t.h" +#include "adt/hash_string.h" +#include "adt/obst.h" + +struct obstack symbol_obstack; + +static inline +void init_symbol_table_entry(symbol_t *entry, const char *string) +{ + entry->ID = 0; + entry->string = string; +} + +#define HashSet symbol_table_t +#define HashSetIterator symbol_table_iterator_t +#define HashSetEntry symbol_table_hash_entry_t +#define ValueType symbol_t* +#define NullValue NULL +#define DeletedValue ((symbol_t*)-1) +#define KeyType const char * +#define ConstKeyType const char * +#define GetKey(value) (value)->string +#define InitData(this,value,key) { (value) = (ValueType) obstack_alloc(&symbol_obstack, sizeof(symbol_t)); init_symbol_table_entry((value), key); } +#define Hash(this, key) hash_string(key) +#define KeysEqual(this,key1,key2) (strcmp(key1, key2) == 0) +#define SetRangeEmpty(ptr,size) memset(ptr, 0, (size) * sizeof(symbol_table_hash_entry_t)) + +#define hashset_init _symbol_table_init +#define hashset_init_size _symbol_table_init_size +#define hashset_destroy _symbol_table_destroy +#define hashset_insert _symbol_table_insert +#define hashset_remove _symbol_table_remove +#define hashset_find _symbol_table_find +#define hashset_size _symbol_table_size +#define hashset_iterator_init _symbol_table_iterator_init +#define hashset_iterator_next _symbol_table_iterator_next +#define hashset_remove_iterator _symbol_table_remove_iterator + +#include "adt/hashset.c" + +static symbol_table_t symbol_table; +static symbol_table_t preprocessor_symbol_table; + +symbol_t *symbol_table_insert(const char *symbol) +{ + return _symbol_table_insert(&symbol_table, symbol); +} + +symbol_t *preprocessor_symbol_table_insert(const char *symbol) +{ + return _symbol_table_insert(&preprocessor_symbol_table, symbol); +} + +void init_symbol_table(void) +{ + obstack_init(&symbol_obstack); + _symbol_table_init(&symbol_table); + _symbol_table_init(&preprocessor_symbol_table); +} + +void exit_symbol_table(void) +{ + _symbol_table_destroy(&symbol_table); + _symbol_table_destroy(&preprocessor_symbol_table); + obstack_free(&symbol_obstack, NULL); +} diff --git a/symbol_table.h b/symbol_table.h new file mode 100644 index 0000000..b6ef219 --- /dev/null +++ b/symbol_table.h @@ -0,0 +1,16 @@ +#ifndef SYMBOL_TABLE_H +#define SYMBOL_TABLE_H + +#include "symbol.h" +#include "adt/obst.h" + +symbol_t *symbol_table_insert(const char *symbol); +symbol_t *preprocessor_symbol_table_insert(const char *symbol); +symbol_t *preprocessor_symbol_table_find(const char *symbol); + +void init_symbol_table(void); +void exit_symbol_table(void); + +extern struct obstack symbol_obstack; + +#endif diff --git a/symbol_table_t.h b/symbol_table_t.h new file mode 100644 index 0000000..f175ca6 --- /dev/null +++ b/symbol_table_t.h @@ -0,0 +1,17 @@ +#ifndef SYMBOL_TABLE_T_H +#define SYMBOL_TABLE_T_H + +#include "symbol_table.h" +#include "symbol.h" + +#define HashSet symbol_table_t +#define HashSetIterator symbol_table_iterator_t +#define HashSetEntry symbol_table_hash_entry_t +#define ValueType symbol_t* +#include "adt/hashset.h" +#undef ValueType +#undef HashSetEntry +#undef HashSetIterator +#undef HashSet + +#endif diff --git a/token.c b/token.c new file mode 100644 index 0000000..50e166a --- /dev/null +++ b/token.c @@ -0,0 +1,81 @@ +#include + +#include "token_t.h" + +#include +#include + +#include "symbol.h" +#include "adt/array.h" + +static symbol_t *token_symbols[T_LAST_TOKEN]; + +void init_tokens(void) +{ + symbol_t *symbol; + + memset(token_symbols, 0, T_LAST_TOKEN * sizeof(token_symbols[0])); + +#define T(x,str,val) \ + assert(T_##x >= 0 && T_##x < T_LAST_TOKEN); \ + symbol = symbol_table_insert(str); \ + symbol->ID = T_##x; \ + token_symbols[T_##x] = symbol; + +#define TS(x,str,val) \ + assert(T_##x >= 0 && T_##x < T_LAST_TOKEN); \ + symbol = symbol_table_insert(str); \ + token_symbols[T_##x] = symbol; + +#include "tokens.inc" + +#undef TS +#undef T +} + +void exit_tokens(void) +{ +} + +void print_token_type(FILE *f, token_type_t token_type) +{ + if(token_type >= 0 && token_type < 256) { + fprintf(f, "'%c'", token_type); + return; + } + if(token_type == T_EOF) { + fputs("end of file", f); + return; + } + + int token_symbols_len = T_LAST_TOKEN; + if(token_type < 0 || token_type >= token_symbols_len) { + fputs("invalid token", f); + return; + } + + const symbol_t *symbol = token_symbols[token_type]; + if(symbol != NULL) { + fputs(symbol->string, f); + } else { + fputs("unknown token", f); + } +} + +void print_token(FILE *f, const token_t *token) +{ + switch(token->type) { + case T_IDENTIFIER: + fprintf(f, "symbol '%s'", token->v.symbol->string); + break; + case T_INTEGER: + fprintf(f, "integer number %d", token->v.intvalue); + break; + case T_STRING_LITERAL: + fprintf(f, "string '%s'", token->v.string); + break; + default: + print_token_type(f, token->type); + break; + } +} diff --git a/token_t.h b/token_t.h new file mode 100644 index 0000000..21669af --- /dev/null +++ b/token_t.h @@ -0,0 +1,44 @@ +#ifndef TOKEN_T_H +#define TOKEN_T_H + +#include +#include "symbol.h" +#include "symbol_table.h" + +typedef enum { +#define T(x,str,val) T_##x val, +#define TS(x,str,val) T_##x val, +#include "tokens.inc" +#undef TS +#undef T + + T_EOF = -1, + T_ERROR = -2 +} token_type_t; + +typedef enum { +#define T(x,str,val) TP_##x val, +#define TS(x,str,val) TP_##x val, +#include "tokens.inc" +#undef TS +#undef T + + TP_EOF = T_EOF, + TP_ERROR = T_ERROR +} preprocessor_token_type_t; + +typedef struct { + int type; + union { + symbol_t *symbol; + int intvalue; + const char *string; + } v; +} token_t; + +void init_tokens(void); +void exit_tokens(void); +void print_token_type(FILE *out, token_type_t token_type); +void print_token(FILE *out, const token_t *token); + +#endif diff --git a/tokens.inc b/tokens.inc new file mode 100644 index 0000000..f4f777a --- /dev/null +++ b/tokens.inc @@ -0,0 +1,106 @@ +#ifndef TS +#define TS(x,str,val) +#endif + +TS(IDENTIFIER, "identifier", = 256) +TS(INTEGER, "integer number",) +TS(STRING_LITERAL, "string literal",) + +#define S(x) T(x,#x,) +S(auto) +S(break) +S(case) +S(char) +S(const) +S(continue) +S(default) +S(do) +S(double) +S(else) +S(enum) +S(extern) +S(float) +S(for) +S(goto) +S(if) +S(inline) +S(int) +S(long) +S(register) +S(restrict) +S(return) +S(short) +S(signed) +S(sizeof) +S(static) +S(struct) +S(switch) +S(typedef) +S(union) +S(unsigned) +S(void) +S(volatile) +S(while) +S(_Bool) +S(_Complex) +S(_Imaginary) +#undef S + +T(SELECT, "->",) +T(PLUSPLUS, "++",) +T(MINUSMINUS, "--",) +T(LESSLESS, "<<",) +T(GREATERGREATER, ">>",) +T(LESSEQUAL, "<=",) +T(GREATEREQUAL, ">=",) +T(EQUALEQUAL, "==",) +T(EXCLAMATIONMARKEQUAL, "!=",) +T(ANDAND, "&&",) +T(PIPEPIPE, "||",) +T(DOTDOTDOT, "...",) +T(ASTERISKEQUAL, "*=",) +T(SLASHEQUAL, "/=",) +T(PERCENTEQUAL, "%=",) +T(PLUSEQUAL, "+=",) +T(MINUSEQUAL, "-=",) +T(LESSLESSEQUAL, "<<=",) +T(GREATERGREATEREQUAL, ">>=",) +T(ANDEQUAL, "&=",) +T(CARETEQUAL, "^=",) +T(PIPEEQUAL, "|=",) +T(HASHHASH, "##",) + +#define T_LAST_TOKEN (T_HASHHASH+1) + +T(RBRACK, "[", = '[') +T(LBRACK, "]", = ']') +T(LBRACE, "(", = '(') +T(RBRACE, ")", = ')') +T(RCURLY, "{", = '{') +T(LCURLY, "}", = '}') +T(DOT, ".", = '.') +T(AND, "&", = '&') +T(ASTERISK, "*", = '*') +T(PLUS, "+", = '+') +T(MINUS, "-", = '-') +T(TILDE, "~", = '~') +T(EXCLAMATIONMARK, "!", = '!') +T(SLASH, "/", = '/') +T(PERCENT, "%", = '%') +T(LESS, "<", = '<') +T(GREATER, ">", = '>') +T(CARET, "^", = '^') +T(PIPE, "|", = '|') +T(QUESTIONMARK, "?", = '?') +T(COLON, ":", = ':') +T(SEMICOLON, ";", = ';') +T(EQUAL, "=", = '=') +T(COMMA, ",", = ',') +T(HASH, "#", = '#') + +T(LESSCOLON, "<:", = '[') +T(COLONGREATER, ":>", = ']') +T(LESSPERCENT, "<%", = '{') +T(PERCENTGREATER, "%>", = '}') +T(PERCENTCOLON, "%:", = '#') +T(PERCENTCOLONPERCENTCOLON, "%:%:", = T_HASHHASH) diff --git a/type.c b/type.c new file mode 100644 index 0000000..7f64f86 --- /dev/null +++ b/type.c @@ -0,0 +1,141 @@ +#include + +#include "type_t.h" +#include "adt/error.h" + +static struct obstack _type_obst; +struct obstack *type_obst = &_type_obst; + +void init_type_module() +{ + obstack_init(type_obst); +} + +void exit_type_module() +{ + obstack_free(type_obst, NULL); +} + +static +void print_atomic_type(FILE *out, const atomic_type_t *type) +{ + switch(type->atype) { + case ATOMIC_TYPE_INVALID: fputs("INVALIDATOMIC", out); break; + case ATOMIC_TYPE_BOOL: fputs("bool", out); break; + case ATOMIC_TYPE_CHAR: fputs("char", out); break; + case ATOMIC_TYPE_SCHAR: fputs("signed char", out); break; + case ATOMIC_TYPE_UCHAR: fputs("unsigned char", out); break; + case ATOMIC_TYPE_INT: fputs("int", out); break; + case ATOMIC_TYPE_UINT: fputs("unsigned int", out); break; + case ATOMIC_TYPE_SHORT: fputs("short", out); break; + case ATOMIC_TYPE_USHORT: fputs("unsigned short", out); break; + case ATOMIC_TYPE_LONG: fputs("long", out); break; + case ATOMIC_TYPE_ULONG: fputs("unsigned long", out); break; + case ATOMIC_TYPE_LONGLONG: fputs("long long", out); break; + case ATOMIC_TYPE_ULONGLONG: fputs("unsigned long long", out); break; + case ATOMIC_TYPE_FLOAT: fputs("float", out); break; + case ATOMIC_TYPE_DOUBLE: fputs("double", out); break; + default: fputs("UNKNOWNATOMIC", out); break; + } +} + +static +void print_method_type(FILE *out, const method_type_t *type) +{ + fputs("<", out); + print_type(out, type->result_type); + fputs(" ", out); + + if(type->abi_style != NULL) { + fprintf(out, "\"%s\" ", type->abi_style); + } + fputs("method(", out); + method_parameter_type_t *param_type = type->parameter_types; + int first = 1; + while(param_type != NULL) { + if(first) { + first = 0; + } else { + fputs(", ", out); + } + print_type(out, param_type->type); + param_type = param_type->next; + } + fputs(")>", out); +} + +static +void print_pointer_type(FILE *out, const pointer_type_t *type) +{ + print_type(out, type->points_to); + fputs("*", out); +} + +void print_type(FILE *out, const type_t *type) +{ + if(type == NULL) { + fputs("nil type", out); + return; + } + + switch(type->type) { + case TYPE_INVALID: + fputs("invalid", out); + return; + case TYPE_ENUM: + case TYPE_TYPEDEF: + fputs("TODO", out); + return; + case TYPE_ATOMIC: + print_atomic_type(out, (const atomic_type_t*) type); + return; + case TYPE_COMPOUND_STRUCT: + case TYPE_COMPOUND_UNION: + fprintf(out, "%s", ((const compound_type_t*) type)->symbol->string); + return; + case TYPE_METHOD: + print_method_type(out, (const method_type_t*) type); + return; + case TYPE_POINTER: + print_pointer_type(out, (const pointer_type_t*) type); + return; + } + fputs("unknown", out); +} + +int type_valid(const type_t *type) +{ + return type->type != TYPE_INVALID; +} + +int is_type_int(const type_t *type) +{ + if(type->type != TYPE_ATOMIC) + return 0; + + atomic_type_t *atomic_type = (atomic_type_t*) type; + switch(atomic_type->atype) { + case ATOMIC_TYPE_CHAR: + case ATOMIC_TYPE_SCHAR: + case ATOMIC_TYPE_UCHAR: + case ATOMIC_TYPE_SHORT: + case ATOMIC_TYPE_USHORT: + case ATOMIC_TYPE_INT: + case ATOMIC_TYPE_UINT: + case ATOMIC_TYPE_LONG: + case ATOMIC_TYPE_ULONG: + case ATOMIC_TYPE_LONGLONG: + case ATOMIC_TYPE_ULONGLONG: + return 1; + default: + return 0; + } +} + +static __attribute__((unused)) +void dbg_type(const type_t *type) +{ + print_type(stdout,type); + puts("\n"); + fflush(stdout); +} diff --git a/type.h b/type.h new file mode 100644 index 0000000..5325463 --- /dev/null +++ b/type.h @@ -0,0 +1,33 @@ +#ifndef TYPE_H +#define TYPE_H + +#include + +typedef struct type_t type_t; +typedef struct atomic_type_t atomic_type_t; +typedef struct pointer_type_t pointer_type_t; +typedef struct method_parameter_type_t method_parameter_type_t; +typedef struct method_type_t method_type_t; +typedef struct compound_entry_t compound_entry_t; +typedef struct compound_type_t compound_type_t; + +void init_type_module(void); +void exit_type_module(void); + +/** + * prints a human readable form of @p type to a stream + */ +void print_type(FILE* out, const type_t *type); + +/** + * returns 1 if type contains integer numbers + */ +int is_type_int(const type_t *type); + +/** + * returns 1 if the type is valid. A type is valid if it contains no unresolved + * references anymore and is not of TYPE_INVALID. + */ +int type_valid(const type_t *type); + +#endif diff --git a/type_t.h b/type_t.h new file mode 100644 index 0000000..6e05d31 --- /dev/null +++ b/type_t.h @@ -0,0 +1,92 @@ +#ifndef TYPE_T_H +#define TYPE_T_H + +#include "type.h" +#include "symbol.h" +#include "lexer_t.h" +#include "adt/obst.h" + +struct obstack *type_obst; + +typedef enum { + TYPE_INVALID, + TYPE_ATOMIC, + TYPE_COMPOUND_STRUCT, + TYPE_COMPOUND_UNION, + TYPE_ENUM, + TYPE_TYPEDEF, + TYPE_METHOD, + TYPE_POINTER +} type_type_t; + +typedef enum { + ATOMIC_TYPE_INVALID, + ATOMIC_TYPE_VOID, + ATOMIC_TYPE_CHAR, + ATOMIC_TYPE_SCHAR, + ATOMIC_TYPE_UCHAR, + ATOMIC_TYPE_SHORT, + ATOMIC_TYPE_USHORT, + ATOMIC_TYPE_INT, + ATOMIC_TYPE_UINT, + ATOMIC_TYPE_LONG, + ATOMIC_TYPE_ULONG, + ATOMIC_TYPE_LONGLONG, + ATOMIC_TYPE_ULONGLONG, + ATOMIC_TYPE_FLOAT, + ATOMIC_TYPE_DOUBLE, + ATOMIC_TYPE_LONG_DOUBLE, + ATOMIC_TYPE_BOOL, +#ifdef PROVIDE_COMPLEX + ATOMIC_TYPE_FLOAT_COMPLEX, + ATOMIC_TYPE_DOUBLE_COMPLEX, + ATOMIC_TYPE_LONG_DOUBLE_COMPLEX, +#endif +#ifdef PROVIDE_IMAGINARY + ATOMIC_TYPE_FLOAT_IMAGINARY, + ATOMIC_TYPE_DOUBLE_IMAGINARY, + ATOMIC_TYPE_LONG_DOUBLE_IMAGINARY, +#endif +} atomic_type_type_t; + +struct type_t { + type_type_t type; +}; + +struct atomic_type_t { + type_t type; + atomic_type_type_t atype; +}; + +struct pointer_type_t { + type_t type; + type_t *points_to; +}; + +struct method_parameter_type_t { + type_t *type; + method_parameter_type_t *next; +}; + +struct method_type_t { + type_t type; + type_t *result_type; + method_parameter_type_t *parameter_types; + const char *abi_style; +}; + +struct compound_entry_t { + type_t *type; + symbol_t *symbol; + compound_entry_t *next; + source_position_t source_position; +}; + +struct compound_type_t { + type_t type; + compound_entry_t *entries; + symbol_t *symbol; + source_position_t source_position; +}; + +#endif -- 2.20.1