- 2009 patch
[cparser] / adt / hashset.c
1 /*
2  * Copyright (C) 1995-2009 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Generic hashset implementation
23  * @author  Matthias Braun, inspiration from densehash from google sparsehash
24  *          package
25  * @date    17.03.2007
26  * @version $Id$
27  *
28  *
29  * You have to specialize this file by defining:
30  *
31  * <ul>
32  *  <li><b>HashSet</b>         The name of the hashset type</li>
33  *  <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
34  *  <li><b>ValueType</b>       The type of the stored data values</li>
35  *  <li><b>NullValue</b>       A special value representing no values</li>
36  *  <li><b>DeletedValue</b>    A special value representing deleted entries</li>
37  *  <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
38  * </ul>
39  *
40  * Note that by default it is assumed that the data values themselfes are used
41  * as keys. However you can change that with additional defines:
42  *
43  * <ul>
44  *  <li><b>KeyType</b>         The type of the keys identifying data values.
45  *                             Defining this implies, that a data value contains
46  *                             more than just the key.</li>
47  *  <li><b>GetKey(value)</b>   Extracts the key from a data value</li>
48  *  <li><b>KeysEqual(hashset,key1,key2)</b>  Tests wether 2 keys are equal</li>
49  *  <li><b>DO_REHASH</b>       Instead of storing the hash-values, recalculate
50  *                             them on demand from the datavalues. (usefull if
51  *                             calculating the hash-values takes less time than
52  *                             a memory access)</li>
53  * </ul>
54  *
55  * You can further fine tune your hashset by defining the following:
56  *
57  * <ul>
58  *  <li><b>JUMP(num_probes)</b> The probing method</li>
59  *  <li><b>Alloc(count)</b>     Allocates count hashset entries (NOT bytes)</li>
60  *  <li><b>Free(ptr)</b>        Frees a block of memory allocated by Alloc</li>
61  *  <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
62  *                                      the Null value</li>
63  *  <li><b>ADDITIONAL_DATA<b>   Additional fields appended to the hashset struct</li>
64  * </ul>
65  */
66 #ifdef HashSet
67
68 #include <stdlib.h>
69 #include <string.h>
70 #include <assert.h>
71
72 #include "bitfiddle.h"
73 #include "util.h"
74
75 /* quadratic probing */
76 #ifndef JUMP
77 #define JUMP(num_probes)      (num_probes)
78 #endif /* JUMP */
79
80 #ifndef Hash
81 #define ID_HASH
82 #define Hash(self,key)        ((unsigned)(((char *)key) - (char *)0))
83 #endif /* Hash */
84
85 #ifdef DO_REHASH
86 #define HashSetEntry                   ValueType
87 #define EntrySetHash(entry,new_hash)   ((void)0)
88 #define EntryGetHash(self,entry)       Hash(self, GetKey(entry))
89 #define EntryGetValue(entry)           (entry)
90 #else /* ! DO_REHASH */
91 #define EntryGetHash(self,entry)       (entry).hash
92 #define EntrySetHash(entry,new_hash)   (entry).hash = (new_hash)
93 #define EntryGetValue(entry)           (entry).data
94 #endif /* DO_REHASH */
95
96 #ifndef Alloc
97 #include "xmalloc.h"
98 #define Alloc(size)    (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
99 #define Free(ptr)      free(ptr)
100 #endif /* Alloc */
101
102 #ifdef ID_HASH
103 #define InsertReturnValue                 int
104 #define GetInsertReturnValue(entry,found) (found)
105 #define NullReturnValue                   0
106 #else /* ! ID_HASH */
107 #ifdef SCALAR_RETURN
108 #define InsertReturnValue                 ValueType
109 #define GetInsertReturnValue(entry,found) EntryGetValue(entry)
110 #define NullReturnValue                   NullValue
111 #else
112 #define InsertReturnValue                 ValueType*
113 #define GetInsertReturnValue(entry,found) & EntryGetValue(entry)
114 #define NullReturnValue                   & NullValue
115 #endif
116 #endif /* ID_HASH */
117
118 #ifndef KeyType
119 #define KeyType                  ValueType
120 #define GetKey(value)            (value)
121 #define InitData(self,value,key) (value) = (key)
122 #endif /* KeyType */
123
124 #ifndef ConstKeyType
125 #define ConstKeyType             const KeyType
126 #endif /* ConstKeyType */
127
128 #ifndef EntrySetEmpty
129 #define EntrySetEmpty(entry)    EntryGetValue(entry) = NullValue
130 #endif /* EntrySetEmpty */
131 #ifndef EntrySetDeleted
132 #define EntrySetDeleted(entry)  EntryGetValue(entry) = DeletedValue
133 #endif /* EntrySetDeleted */
134 #ifndef EntryIsEmpty
135 #define EntryIsEmpty(entry)     (EntryGetValue(entry) == NullValue)
136 #endif /* EntryIsEmpty */
137 #ifndef EntryIsDeleted
138 #define EntryIsDeleted(entry)   (EntryGetValue(entry) == DeletedValue)
139 #endif /* EntryIsDeleted */
140 #ifndef SetRangeEmpty
141 #define SetRangeEmpty(ptr,size)                \
142 {                                              \
143         size_t _i;                                 \
144         size_t _size = (size);                     \
145         HashSetEntry *entries = (ptr);             \
146         for(_i = 0; _i < _size; ++_i) {            \
147                 HashSetEntry *entry = & entries[_i];   \
148                 EntrySetEmpty(*entry);                 \
149         }                                          \
150 }
151 #endif /* SetRangeEmpty */
152
153 #ifndef HT_OCCUPANCY_FLT
154 /** how full before we double size */
155 #define HT_OCCUPANCY_FLT(x) ((x)/2)
156 #endif /* HT_OCCUPANCY_FLT */
157 #ifndef HT_1_DIV_OCCUPANCY_FLT
158 #define HT_1_DIV_OCCUPANCY_FLT 2
159 #endif
160
161 #ifndef HT_EMPTY_FLT
162 /** how empty before we half size */
163 #define HT_EMPTY_FLT(x)     ((x)/5)
164 #endif /* HT_EMPTY_FLT */
165
166 #ifndef HT_MIN_BUCKETS
167 /** default smallest bucket size */
168 #define HT_MIN_BUCKETS    32
169 #endif /* HT_MIN_BUCKETS */
170
171 #define ILLEGAL_POS       ((size_t)-1)
172
173 /* check that all needed functions are defined */
174 #ifndef hashset_init
175 #error You have to redefine hashset_init
176 #endif
177 #ifndef hashset_init_size
178 #error You have to redefine hashset_init_size
179 #endif
180 #ifndef hashset_destroy
181 #error You have to redefine hashset_destroy
182 #endif
183 #ifndef hashset_insert
184 #error You have to redefine hashset_insert
185 #endif
186 #ifndef hashset_remove
187 #error You have to redefine hashset_remove
188 #endif
189 #ifndef hashset_find
190 #error You have to redefine hashset_find
191 #endif
192 #ifndef hashset_size
193 #error You have to redefine hashset_size
194 #endif
195 #ifndef hashset_iterator_init
196 #error You have to redefine hashset_iterator_init
197 #endif
198 #ifndef hashset_iterator_next
199 #error You have to redefine hashset_iterator_next
200 #endif
201 #ifndef hashset_remove_iterator
202 #error You have to redefine hashset_remove_iterator
203 #endif
204
205 /* prototypes to avoid warnings */
206 void hashset_init(HashSet *self);
207 void hashset_destroy(HashSet *self);
208 size_t hashset_size(const HashSet *self);
209 InsertReturnValue hashset_insert(HashSet *self, KeyType key);
210 InsertReturnValue hashset_find(const HashSet *self, ConstKeyType key);
211 void hashset_remove(HashSet *self, ConstKeyType key);
212 void hashset_init_size(HashSet *self, size_t expected_elements);
213 void hashset_iterator_init(HashSetIterator *self, const HashSet *hashset);
214 ValueType hashset_iterator_next(HashSetIterator *self);
215 void hashset_remove_iterator(HashSet *self, const HashSetIterator *iter);
216
217 /**
218  * Returns the number of elements in the hashset
219  */
220 size_t hashset_size(const HashSet *self)
221 {
222         return self->num_elements - self->num_deleted;
223 }
224
225 /**
226  * Inserts an element into a hashset without growing the set (you have to make
227  * sure there's enough room for that.
228  * @note also see comments for hashset_insert()
229  * @internal
230  */
231 static inline
232 InsertReturnValue insert_nogrow(HashSet *self, KeyType key)
233 {
234         size_t   num_probes  = 0;
235         size_t   num_buckets = self->num_buckets;
236         size_t   hashmask    = num_buckets - 1;
237         unsigned hash        = Hash(self, key);
238         size_t   bucknum     = hash & hashmask;
239         size_t   insert_pos  = ILLEGAL_POS;
240
241         assert((num_buckets & (num_buckets - 1)) == 0);
242
243         while(1) {
244                 HashSetEntry *entry = & self->entries[bucknum];
245
246                 if(EntryIsEmpty(*entry)) {
247                         size_t p;
248                         HashSetEntry *nentry;
249
250                         if(insert_pos != ILLEGAL_POS) {
251                                 p = insert_pos;
252                         } else {
253                                 p = bucknum;
254                         }
255
256                         nentry = &self->entries[p];
257                         InitData(self, EntryGetValue(*nentry), key);
258                         EntrySetHash(*nentry, hash);
259                         self->num_elements++;
260                         return GetInsertReturnValue(*nentry, 0);
261                 }
262                 if(EntryIsDeleted(*entry)) {
263                         if(insert_pos == ILLEGAL_POS)
264                                 insert_pos = bucknum;
265                 } else if(EntryGetHash(self, *entry) == hash) {
266                         if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
267                                 // Value already in the set, return it
268                                 return GetInsertReturnValue(*entry, 1);
269                         }
270                 }
271
272                 ++num_probes;
273                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
274                 assert(num_probes < num_buckets);
275         }
276 }
277
278 /**
279  * Inserts an element into a hashset under the assumption that the hashset
280  * contains no deleted entries and the element doesn't exist in the hashset yet.
281  * @internal
282  */
283 static
284 void insert_new(HashSet *self, unsigned hash, ValueType value)
285 {
286         size_t num_probes  = 0;
287         size_t num_buckets = self->num_buckets;
288         size_t hashmask    = num_buckets - 1;
289         size_t bucknum     = hash & hashmask;
290         size_t insert_pos  = ILLEGAL_POS;
291
292         //assert(value != NullValue);
293
294         while(1) {
295                 HashSetEntry *entry = & self->entries[bucknum];
296
297                 if(EntryIsEmpty(*entry)) {
298                         size_t        p;
299                         HashSetEntry *nentry;
300
301                         if(insert_pos != ILLEGAL_POS) {
302                                 p = insert_pos;
303                         } else {
304                                 p = bucknum;
305                         }
306                         nentry = &self->entries[p];
307
308                         EntryGetValue(*nentry) = value;
309                         EntrySetHash(*nentry, hash);
310                         self->num_elements++;
311                         return;
312                 }
313                 assert(!EntryIsDeleted(*entry));
314
315                 ++num_probes;
316                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
317                 assert(num_probes < num_buckets);
318         }
319 }
320
321 /**
322  * calculate shrink and enlarge limits
323  * @internal
324  */
325 static inline
326 void reset_thresholds(HashSet *self)
327 {
328         self->enlarge_threshold = (size_t) HT_OCCUPANCY_FLT(self->num_buckets);
329         self->shrink_threshold  = (size_t) HT_EMPTY_FLT(self->num_buckets);
330         self->consider_shrink   = 0;
331 }
332
333 /**
334  * Resize the hashset
335  * @internal
336  */
337 static inline
338 void resize(HashSet *self, size_t new_size)
339 {
340         size_t num_buckets = self->num_buckets;
341         size_t i;
342         HashSetEntry *old_entries = self->entries;
343         HashSetEntry *new_entries;
344
345         /* allocate a new array with double size */
346         new_entries = Alloc(new_size);
347         SetRangeEmpty(new_entries, new_size);
348
349         /* use the new array */
350         self->entries      = new_entries;
351         self->num_buckets  = new_size;
352         self->num_elements = 0;
353         self->num_deleted  = 0;
354 #ifndef NDEBUG
355         self->entries_version++;
356 #endif
357         reset_thresholds(self);
358
359         /* reinsert all elements */
360         for(i = 0; i < num_buckets; ++i) {
361                 HashSetEntry *entry = & old_entries[i];
362                 if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
363                         continue;
364
365                 insert_new(self, EntryGetHash(self, *entry), EntryGetValue(*entry));
366         }
367
368         /* now we can free the old array */
369         Free(old_entries);
370 }
371
372 /**
373  * grow the hashset if adding 1 more elements would make it too crowded
374  * @internal
375  */
376 static inline
377 void maybe_grow(HashSet *self)
378 {
379         size_t resize_to;
380
381         if(LIKELY(self->num_elements + 1 <= self->enlarge_threshold))
382                 return;
383
384         /* double table size */
385         resize_to = self->num_buckets * 2;
386         resize(self, resize_to);
387 }
388
389 /**
390  * shrink the hashset if it is only sparsely filled
391  * @internal
392  */
393 static inline
394 void maybe_shrink(HashSet *self)
395 {
396         size_t size;
397         size_t resize_to;
398
399         if(!self->consider_shrink)
400                 return;
401
402         self->consider_shrink = 0;
403         size                  = hashset_size(self);
404         if(size <= HT_MIN_BUCKETS)
405                 return;
406
407         if(LIKELY(size > self->shrink_threshold))
408                 return;
409
410         resize_to = ceil_po2(size);
411
412         if(resize_to < 4)
413                 resize_to = 4;
414
415         resize(self, resize_to);
416 }
417
418 /**
419  * Insert an element into the hashset. If no element with key key exists yet,
420  * then a new one is created and initialized with the InitData function.
421  * Otherwise the exisiting element is returned (for hashs where key is equal to
422  * value, nothing is returned.)
423  *
424  * @param self   the hashset
425  * @param key    the key that identifies the data
426  * @returns      the existing or newly created data element (or nothing in case of hashs where keys are the while value)
427  */
428 InsertReturnValue hashset_insert(HashSet *self, KeyType key)
429 {
430 #ifndef NDEBUG
431         self->entries_version++;
432 #endif
433
434         maybe_shrink(self);
435         maybe_grow(self);
436         return insert_nogrow(self, key);
437 }
438
439 /**
440  * Searchs for an element with key @p key.
441  *
442  * @param self      the hashset
443  * @param key       the key to search for
444  * @returns         the found value or NullValue if nothing was found
445  */
446 InsertReturnValue hashset_find(const HashSet *self, ConstKeyType key)
447 {
448         size_t   num_probes  = 0;
449         size_t   num_buckets = self->num_buckets;
450         size_t   hashmask    = num_buckets - 1;
451         unsigned hash        = Hash(self, key);
452         size_t   bucknum     = hash & hashmask;
453
454         while(1) {
455                 HashSetEntry *entry = & self->entries[bucknum];
456
457                 if(EntryIsEmpty(*entry)) {
458                         return NullReturnValue;
459                 }
460                 if(EntryIsDeleted(*entry)) {
461                         // value is deleted
462                 } else if(EntryGetHash(self, *entry) == hash) {
463                         if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
464                                 // found the value
465                                 return GetInsertReturnValue(*entry, 1);
466                         }
467                 }
468
469                 ++num_probes;
470                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
471                 assert(num_probes < num_buckets);
472         }
473 }
474
475 /**
476  * Removes an element from a hashset. Does nothing if the set doesn't contain
477  * the element.
478  *
479  * @param self    the hashset
480  * @param key     key that identifies the data to remove
481  */
482 void hashset_remove(HashSet *self, ConstKeyType key)
483 {
484         size_t   num_probes  = 0;
485         size_t   num_buckets = self->num_buckets;
486         size_t   hashmask    = num_buckets - 1;
487         unsigned hash        = Hash(self, key);
488         size_t   bucknum     = hash & hashmask;
489
490 #ifndef NDEBUG
491         self->entries_version++;
492 #endif
493
494         while(1) {
495                 HashSetEntry *entry = & self->entries[bucknum];
496
497                 if(EntryIsEmpty(*entry)) {
498                         return;
499                 }
500                 if(EntryIsDeleted(*entry)) {
501                         // entry is deleted
502                 } else if(EntryGetHash(self, *entry) == hash) {
503                         if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
504                                 EntrySetDeleted(*entry);
505                                 self->num_deleted++;
506                                 self->consider_shrink = 1;
507                                 return;
508                         }
509                 }
510
511                 ++num_probes;
512                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
513                 assert(num_probes < num_buckets);
514         }
515 }
516
517 /**
518  * Initializes hashset with a specific size
519  * @internal
520  */
521 static inline
522 void init_size(HashSet *self, size_t initial_size)
523 {
524         if(initial_size < 4)
525                 initial_size = 4;
526
527         self->entries         = Alloc(initial_size);
528         SetRangeEmpty(self->entries, initial_size);
529         self->num_buckets     = initial_size;
530         self->consider_shrink = 0;
531         self->num_elements    = 0;
532         self->num_deleted     = 0;
533 #ifndef NDEBUG
534         self->entries_version = 0;
535 #endif
536
537         reset_thresholds(self);
538 }
539
540 /**
541  * Initialializes a hashset with the default size. The memory for the set has to
542  * already allocated.
543  */
544 void hashset_init(HashSet *self)
545 {
546         init_size(self, HT_MIN_BUCKETS);
547 }
548
549 /**
550  * Destroys a hashset, freeing all used memory (except the memory for the
551  * HashSet struct itself).
552  */
553 void hashset_destroy(HashSet *self)
554 {
555         Free(self->entries);
556 #ifndef NDEBUG
557         self->entries = NULL;
558 #endif
559 }
560
561 /**
562  * Initializes a hashset expecting expected_element size
563  */
564 void hashset_init_size(HashSet *self, size_t expected_elements)
565 {
566         size_t needed_size;
567         size_t po2size;
568
569         if(expected_elements >= UINT_MAX/2) {
570                 abort();
571         }
572
573         needed_size = expected_elements * HT_1_DIV_OCCUPANCY_FLT;
574         po2size     = ceil_po2(needed_size);
575         init_size(self, po2size);
576 }
577
578 /**
579  * Initializes a hashset iterator. The memory for the allocator has to be
580  * already allocated.
581  * @note it is not allowed to remove or insert elements while iterating
582  */
583 void hashset_iterator_init(HashSetIterator *self, const HashSet *hashset)
584 {
585         self->current_bucket = hashset->entries - 1;
586         self->end            = hashset->entries + hashset->num_buckets;
587 #ifndef NDEBUG
588         self->set             = hashset;
589         self->entries_version = hashset->entries_version;
590 #endif
591 }
592
593 /**
594  * Returns the next value in the iterator or NULL if no value is left
595  * in the hashset.
596  * @note it is not allowed to remove or insert elements while iterating
597  */
598 ValueType hashset_iterator_next(HashSetIterator *self)
599 {
600         HashSetEntry *current_bucket = self->current_bucket;
601         HashSetEntry *end            = self->end;
602
603         /* using hashset_insert or hashset_remove is not allowed while iterating */
604         assert(self->entries_version == self->set->entries_version);
605
606         do {
607                 current_bucket++;
608                 if(current_bucket >= end)
609                         return NullValue;
610         } while(EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket));
611
612         self->current_bucket = current_bucket;
613         return EntryGetValue(*current_bucket);
614 }
615
616 /**
617  * Removes the element the iterator points to. Removing an element a second time
618  * has no result.
619  */
620 void hashset_remove_iterator(HashSet *self, const HashSetIterator *iter)
621 {
622         HashSetEntry *entry = iter->current_bucket;
623
624         /* iterator_next needs to have been called at least once */
625         assert(entry >= self->entries);
626         /* needs to be on a valid element */
627         assert(entry < self->entries + self->num_buckets);
628
629         if(EntryIsDeleted(*entry))
630                 return;
631
632         EntrySetDeleted(*entry);
633         self->num_deleted++;
634         self->consider_shrink = 1;
635 }
636
637 #else
638 __attribute__((unused)) static int dummy;
639 #endif