made error output for missing labels gcc like
[cparser] / adt / hashset.c
1 /**
2  * @file
3  * @date    17.03.2007
4  * @brief   Geberic hashset implementation
5  * @author  Matthias Braun, inspiration from densehash from google sparsehash
6  *          package
7  * @version $Id$
8  *
9  *
10  * You have to specialize this file by defining:
11  *
12  * <ul>
13  *  <li><b>HashSet</b>         The name of the hashset type</li>
14  *  <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
15  *  <li><b>ValueType</b>       The type of the stored data values</li>
16  *  <li><b>NullValue</b>       A special value representing no values</li>
17  *  <li><b>DeletedValue</b>    A special value representing deleted entries</li>
18  *  <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
19  * </ul>
20  *
21  * Note that by default it is assumed that the data values themselfes are used
22  * as keys. However you can change that with additional defines:
23  *
24  * <ul>
25  *  <li><b>KeyType</b>         The type of the keys identifying data values.
26  *                             Defining this implies, that a data value contains
27  *                             more than just the key.</li>
28  *  <li><b>GetKey(value)</b>   Extracts the key from a data value</li>
29  *  <li><b>KeysEqual(hashset,key1,key2)</b>  Tests wether 2 keys are equal</li>
30  *  <li><b>DO_REHASH</b>       Instead of storing the hash-values, recalculate
31  *                             them on demand from the datavalues. (usefull if
32  *                             calculating the hash-values takes less time than
33  *                             a memory access)</li>
34  * </ul>
35  *
36  * You can further fine tune your hashset by defining the following:
37  *
38  * <ul>
39  *  <li><b>JUMP(num_probes)</b> The probing method</li>
40  *  <li><b>Alloc(count)</b>     Allocates count hashset entries (NOT bytes)</li>
41  *  <li><b>Free(ptr)</b>        Frees a block of memory allocated by Alloc</li>
42  *  <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
43  *                                      the Null value</li>
44  *  <li><b>ADDITIONAL_DATA<b>   Additional fields appended to the hashset struct</li>
45  * </ul>
46  */
47 #ifdef HashSet
48
49 #include <stdlib.h>
50 #include <string.h>
51 #include <assert.h>
52
53 #include "bitfiddle.h"
54 #include "util.h"
55
56 /* quadratic probing */
57 #ifndef JUMP
58 #define JUMP(num_probes)      (num_probes)
59 #endif
60
61 #ifndef Hash
62 #define ID_HASH
63 #define Hash(this,value)      ((unsigned)(value))
64 #endif
65
66 #ifdef DO_REHASH
67 #define HashSetEntry                   ValueType
68 #define EntrySetHash(entry,new_hash)
69 #define EntryGetHash(this,entry)       Hash(this,entry)
70 #define EntryGetValue(entry)           (entry)
71 #else
72 #define EntryGetHash(this,entry)       (entry).hash
73 #define EntrySetHash(entry,new_hash)   (entry).hash = (new_hash)
74 #define EntryGetValue(entry)           (entry).data
75 #endif
76
77 #ifndef Alloc
78 #include "xmalloc.h"
79 #define Alloc(size)    (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
80 #define Free(ptr)      free(ptr)
81 #endif
82
83 #ifdef ID_HASH
84 #define InsertReturnValue               int
85 #define GetInsertReturnValue(entry,new) (new)
86 #else
87 #define InsertReturnValue               ValueType
88 #define GetInsertReturnValue(entry,new) EntryGetValue(entry)
89 #endif
90
91 #ifndef KeyType
92 #define KeyType                  ValueType
93 #define GetKey(value)            (value)
94 #define InitData(this,value,key) (value) = (key)
95 #endif
96
97 #ifndef ConstKeyType
98 #define ConstKeyType             const KeyType
99 #endif
100
101 #ifndef EntrySetEmpty
102 #define EntrySetEmpty(entry)    EntryGetValue(entry) = NullValue
103 #endif
104 #ifndef EntrySetDeleted
105 #define EntrySetDeleted(entry)  EntryGetValue(entry) = DeletedValue
106 #endif
107 #ifndef EntryIsEmpty
108 #define EntryIsEmpty(entry)     (EntryGetValue(entry) == NullValue)
109 #endif
110 #ifndef EntryIsDeleted
111 #define EntryIsDeleted(entry)   (EntryGetValue(entry) == DeletedValue)
112 #endif
113 #ifndef SetRangeEmpty
114 #define SetRangeEmpty(ptr,size)                \
115 {                                              \
116         size_t _i;                                 \
117         size_t _size = (size);                     \
118         HashSetEntry *entries = (ptr);             \
119         for(_i = 0; _i < _size; ++_i) {            \
120                 HashSetEntry *entry = & entries[_i];   \
121                 EntrySetEmpty(*entry);                 \
122         }                                          \
123 }
124 #endif
125
126 #ifndef HT_OCCUPANCY_FLT
127 /** how full before we double size */
128 #define HT_OCCUPANCY_FLT  0.5f
129 #endif
130
131 #ifndef HT_EMPTY_FLT
132 /** how empty before we half size */
133 #define HT_EMPTY_FLT      (0.4f * (HT_OCCUPANCY_FLT))
134 #endif
135
136 #ifndef HT_MIN_BUCKETS
137 /** default smallest bucket size */
138 #define HT_MIN_BUCKETS    32
139 #endif
140
141 #define ILLEGAL_POS       ((size_t)-1)
142
143 #ifndef hashset_init
144 #error You have to redefine hashset_init
145 #endif
146 #ifndef hashset_init_size
147 #error You have to redefine hashset_init_size
148 #endif
149 #ifndef hashset_destroy
150 #error You have to redefine hashset_destroy
151 #endif
152 #ifndef hashset_insert
153 #error You have to redefine hashset_insert
154 #endif
155 #ifndef hashset_remove
156 #error You have to redefine hashset_remove
157 #endif
158 #ifndef hashset_find
159 #error You have to redefine hashset_find
160 #endif
161 #ifndef hashset_size
162 #error You have to redefine hashset_size
163 #endif
164 #ifndef hashset_iterator_init
165 #error You have to redefine hashset_iterator_init
166 #endif
167 #ifndef hashset_iterator_next
168 #error You have to redefine hashset_iterator_next
169 #endif
170 #ifndef hashset_remove_iterator
171 #error You have to redefine hashset_remove_iterator
172 #endif
173
174 /* prototypes to silence warnings */
175 size_t hashset_size(const HashSet *this);
176 void hashset_init(HashSet *this);
177 void hashset_init_size(HashSet *this, size_t size);
178 void hashset_destroy(HashSet *this);
179 InsertReturnValue hashset_insert(HashSet *this, KeyType key);
180 ValueType hashset_find(const HashSet *this, ConstKeyType key);
181 void hashset_remove(HashSet *this, ConstKeyType key);
182 void hashset_iterator_init(HashSetIterator *this, const HashSet *hashset);
183 ValueType hashset_iterator_next(HashSetIterator *this);
184 void hashset_remove_iterator(HashSet *this, const HashSetIterator *iter);
185
186 /**
187  * Returns the number of elements in the hashset
188  */
189 size_t hashset_size(const HashSet *this)
190 {
191         return this->num_elements - this->num_deleted;
192 }
193
194 /**
195  * Inserts an element into a hashset without growing the set (you have to make
196  * sure there's enough room for that.
197  * @note also see comments for hashset_insert()
198  * @internal
199  */
200 static inline
201 InsertReturnValue insert_nogrow(HashSet *this, KeyType key)
202 {
203         size_t num_probes = 0;
204         size_t num_buckets = this->num_buckets;
205         size_t hashmask = num_buckets - 1;
206         unsigned hash = Hash(this, key);
207         size_t bucknum = hash & hashmask;
208         size_t insert_pos = ILLEGAL_POS;
209
210         assert((num_buckets & (num_buckets - 1)) == 0);
211
212         while(1) {
213                 HashSetEntry *entry = & this->entries[bucknum];
214
215                 if(EntryIsEmpty(*entry)) {
216                         size_t p;
217                         HashSetEntry *nentry;
218
219                         if(insert_pos != ILLEGAL_POS) {
220                                 p = insert_pos;
221                         } else {
222                                 p = bucknum;
223                         }
224
225                         nentry = &this->entries[p];
226                         InitData(this, EntryGetValue(*nentry), key);
227                         EntrySetHash(*nentry, hash);
228                         this->num_elements++;
229                         return GetInsertReturnValue(*nentry, 1);
230                 }
231                 if(EntryIsDeleted(*entry)) {
232                         if(insert_pos == ILLEGAL_POS)
233                                 insert_pos = bucknum;
234                 } else if(EntryGetHash(this, *entry) == hash) {
235                         if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
236                                 // Value already in the set, return it
237                                 return GetInsertReturnValue(*entry, 0);
238                         }
239                 }
240
241                 ++num_probes;
242                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
243                 assert(num_probes < num_buckets);
244         }
245 }
246
247 /**
248  * Inserts an element into a hashset under the assumption that the hashset
249  * contains no deleted entries and the element doesn't exist in the hashset yet.
250  * @internal
251  */
252 static
253 void insert_new(HashSet *this, unsigned hash, ValueType value)
254 {
255         size_t num_probes = 0;
256         size_t num_buckets = this->num_buckets;
257         size_t hashmask = num_buckets - 1;
258         size_t bucknum = hash & hashmask;
259         size_t insert_pos = ILLEGAL_POS;
260
261         assert(value != NullValue);
262
263         while(1) {
264                 HashSetEntry *entry = & this->entries[bucknum];
265
266                 if(EntryIsEmpty(*entry)) {
267                         size_t p;
268                         HashSetEntry *nentry;
269
270                         if(insert_pos != ILLEGAL_POS) {
271                                 p = insert_pos;
272                         } else {
273                                 p = bucknum;
274                         }
275                         nentry = &this->entries[p];
276
277                         EntryGetValue(*nentry) = value;
278                         EntrySetHash(*nentry, hash);
279                         this->num_elements++;
280                         return;
281                 }
282                 assert(!EntryIsDeleted(*entry));
283
284                 ++num_probes;
285                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
286                 assert(num_probes < num_buckets);
287         }
288 }
289
290 /**
291  * calculate shrink and enlarge limits
292  * @internal
293  */
294 static inline
295 void reset_thresholds(HashSet *this)
296 {
297         this->enlarge_threshold = (size_t) (this->num_buckets * HT_OCCUPANCY_FLT);
298         this->shrink_threshold = (size_t) (this->num_buckets * HT_EMPTY_FLT);
299         this->consider_shrink = 0;
300 }
301
302 /**
303  * Resize the hashset
304  * @internal
305  */
306 static inline
307 void resize(HashSet *this, size_t new_size)
308 {
309         size_t num_buckets = this->num_buckets;
310         size_t i;
311         HashSetEntry *old_entries = this->entries;
312         HashSetEntry *new_entries;
313
314         /* allocate a new array with double size */
315         new_entries = Alloc(new_size);
316         SetRangeEmpty(new_entries, new_size);
317
318         /* use the new array */
319         this->entries = new_entries;
320         this->num_buckets = new_size;
321         this->num_elements = 0;
322         this->num_deleted = 0;
323 #ifndef NDEBUG
324         this->entries_version++;
325 #endif
326         reset_thresholds(this);
327
328         /* reinsert all elements */
329         for(i = 0; i < num_buckets; ++i) {
330                 HashSetEntry *entry = & old_entries[i];
331                 if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
332                         continue;
333
334                 insert_new(this, EntryGetHash(this, *entry), EntryGetValue(*entry));
335         }
336
337         /* now we can free the old array */
338         Free(old_entries);
339 }
340
341 /**
342  * grow the hashset if adding 1 more elements would make it too crowded
343  * @internal
344  */
345 static inline
346 void maybe_grow(HashSet *this)
347 {
348         size_t resize_to;
349
350         if(LIKELY(this->num_elements + 1 <= this->enlarge_threshold))
351                 return;
352
353         /* double table size */
354         resize_to = this->num_buckets * 2;
355         resize(this, resize_to);
356 }
357
358 /**
359  * shrink the hashset if it is only sparsely filled
360  * @internal
361  */
362 static inline
363 void maybe_shrink(HashSet *this)
364 {
365         size_t size;
366         size_t resize_to;
367
368         if(!this->consider_shrink)
369                 return;
370
371         this->consider_shrink = 0;
372         size = hashset_size(this);
373         if(LIKELY(size > this->shrink_threshold))
374                 return;
375
376         resize_to = ceil_po2(size);
377
378         if(resize_to < 4)
379                 resize_to = 4;
380
381         resize(this, resize_to);
382 }
383
384 /**
385  * Insert an element into the hashset. If no element with key key exists yet,
386  * then a new one is created and initialized with the InitData function.
387  * Otherwise the exisiting element is returned (for hashs where key is equal to
388  * value, nothing is returned.)
389  *
390  * @param this   the hashset
391  * @param key    the key that identifies the data
392  * @returns      the existing or newly created data element (or nothing in case of hashs where keys are the while value)
393  */
394 InsertReturnValue hashset_insert(HashSet *this, KeyType key)
395 {
396 #ifndef NDEBUG
397         this->entries_version++;
398 #endif
399
400         maybe_shrink(this);
401         maybe_grow(this);
402         return insert_nogrow(this, key);
403 }
404
405 /**
406  * Searchs for an element with key @p key.
407  *
408  * @param this      the hashset
409  * @param key       the key to search for
410  * @returns         the found value or NullValue if nothing was found
411  */
412 ValueType hashset_find(const HashSet *this, ConstKeyType key)
413 {
414         size_t num_probes = 0;
415         size_t num_buckets = this->num_buckets;
416         size_t hashmask = num_buckets - 1;
417         unsigned hash = Hash(this, key);
418         size_t bucknum = hash & hashmask;
419
420         while(1) {
421                 HashSetEntry *entry = & this->entries[bucknum];
422
423                 if(EntryIsEmpty(*entry)) {
424                         return NullValue;
425                 }
426                 if(EntryIsDeleted(*entry)) {
427                         // value is deleted
428                 } else if(EntryGetHash(this, *entry) == hash) {
429                         if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
430                                 // found the value
431                                 return EntryGetValue(*entry);
432                         }
433                 }
434
435                 ++num_probes;
436                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
437                 assert(num_probes < num_buckets);
438         }
439 }
440
441 /**
442  * Removes an element from a hashset. Does nothing if the set doesn't contain
443  * the element.
444  *
445  * @param this    the hashset
446  * @param key     key that identifies the data to remove
447  */
448 void hashset_remove(HashSet *this, ConstKeyType key)
449 {
450         size_t num_probes = 0;
451         size_t num_buckets = this->num_buckets;
452         size_t hashmask = num_buckets - 1;
453         unsigned hash = Hash(this, key);
454         size_t bucknum = hash & hashmask;
455
456 #ifndef NDEBUG
457         this->entries_version++;
458 #endif
459
460         while(1) {
461                 HashSetEntry *entry = & this->entries[bucknum];
462
463                 if(EntryIsEmpty(*entry)) {
464                         return;
465                 }
466                 if(EntryIsDeleted(*entry)) {
467                         // entry is deleted
468                 } else if(EntryGetHash(this, *entry) == hash) {
469                         if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
470                                 EntrySetDeleted(*entry);
471                                 this->num_deleted++;
472                                 this->consider_shrink = 1;
473                                 return;
474                         }
475                 }
476
477                 ++num_probes;
478                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
479                 assert(num_probes < num_buckets);
480         }
481 }
482
483 /**
484  * Initializes hashset with a specific size
485  * @internal
486  */
487 static inline
488 void init_size(HashSet *this, size_t initial_size)
489 {
490         if(initial_size < 4)
491                 initial_size = 4;
492
493         this->entries = Alloc(initial_size);
494         SetRangeEmpty(this->entries, initial_size);
495         this->num_buckets = initial_size;
496         this->consider_shrink = 0;
497         this->num_elements = 0;
498         this->num_deleted = 0;
499 #ifndef NDEBUG
500         this->entries_version = 0;
501 #endif
502
503         reset_thresholds(this);
504 }
505
506 /**
507  * Initialializes a hashset with the default size. The memory for the set has to
508  * already allocated.
509  */
510 void hashset_init(HashSet *this)
511 {
512         init_size(this, HT_MIN_BUCKETS);
513 }
514
515 /**
516  * Destroys a hashset, freeing all used memory (except the memory for the
517  * HashSet struct itself).
518  */
519 void hashset_destroy(HashSet *this)
520 {
521         Free(this->entries);
522 #ifndef NDEBUG
523         this->entries = NULL;
524 #endif
525 }
526
527 /**
528  * Initializes a hashset expecting expected_element size
529  */
530 void hashset_init_size(HashSet *this, size_t expected_elements)
531 {
532         size_t needed_size;
533         size_t po2size;
534
535         if(expected_elements >= UINT_MAX/2) {
536                 abort();
537         }
538
539         needed_size = (size_t) (expected_elements * (1.0 / HT_OCCUPANCY_FLT));
540         po2size = ceil_po2(needed_size);
541         init_size(this, po2size);
542 }
543
544 /**
545  * Initializes a hashset iterator. The memory for the allocator has to be
546  * already allocated.
547  * @note it is not allowed to remove or insert elements while iterating
548  */
549 void hashset_iterator_init(HashSetIterator *this, const HashSet *hashset)
550 {
551         this->current_bucket = hashset->entries - 1;
552         this->end = hashset->entries + hashset->num_buckets;
553 #ifndef NDEBUG
554         this->set = hashset;
555         this->entries_version = hashset->entries_version;
556 #endif
557 }
558
559 /**
560  * Returns the next value in the iterator or NULL if no value is left
561  * in the hashset.
562  * @note it is not allowed to remove or insert elements while iterating
563  */
564 ValueType hashset_iterator_next(HashSetIterator *this)
565 {
566         HashSetEntry *current_bucket = this->current_bucket;
567         HashSetEntry *end = this->end;
568
569         if(current_bucket >= end)
570                 return NullValue;
571
572         /* using hashset_insert or hashset_remove is not allowed while iterating */
573         assert(this->entries_version == this->set->entries_version);
574
575         do {
576                 current_bucket++;
577         } while(current_bucket < end &&
578                         (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket)));
579
580         if(current_bucket >= end)
581                 return NullValue;
582
583         this->current_bucket = current_bucket;
584         return EntryGetValue(*current_bucket);
585 }
586
587 /**
588  * Removes the element the iterator points to. Removing an element a second time
589  * has no result.
590  */
591 void hashset_remove_iterator(HashSet *this, const HashSetIterator *iter)
592 {
593         HashSetEntry *entry = iter->current_bucket;
594
595         /* iterator_next needs to have been called at least once */
596         assert(entry >= this->entries);
597         /* needs to be on a valid element */
598         assert(entry < this->entries + this->num_buckets);
599
600         if(EntryIsDeleted(*entry))
601                 return;
602
603         EntrySetDeleted(*entry);
604         this->num_deleted++;
605         this->consider_shrink = 1;
606 }
607
608 #else
609 __attribute__((unused)) static int dummy;
610 #endif