added cast to avoid compiler warning
[libfirm] / ir / adt / hashset.c
1 /**
2  * @file
3  * @date    17.03.2007
4  * @brief   Geberic hashset implementation
5  * @author  Matthias Braun, inspiration from densehash from google sparsehash
6  *          package
7  * @version $Id$
8  *
9  *
10  * You have to specialize this file by defining:
11  *
12  * <ul>
13  *  <li><b>HashSet</b>         The name of the hashset type</li>
14  *  <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
15  *  <li><b>ValueType</b>       The type of the stored data values</li>
16  *  <li><b>NullValue</b>       A special value representing no values</li>
17  *  <li><b>DeletedValue</b>    A special value representing deleted entries</li>
18  *  <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
19  * </ul>
20  *
21  * Note that by default it is assumed that the data values themselfes are used
22  * as keys. However you can change that with additional defines:
23  *
24  * <ul>
25  *  <li><b>KeyType</b>         The type of the keys identifying data values.
26  *                             Defining this implies, that a data value contains
27  *                             more than just the key.</li>
28  *  <li><b>GetKey(value)</b>   Extracts the key from a data value</li>
29  *  <li><b>KeysEqual(hashset,key1,key2)</b>  Tests wether 2 keys are equal</li>
30  *  <li><b>DO_REHASH</b>       Instead of storing the hash-values, recalculate
31  *                             them on demand from the datavalues. (usefull if
32  *                             calculating the hash-values takes less time than
33  *                             a memory access)</li>
34  * </ul>
35  *
36  * You can further fine tune your hashset by defining the following:
37  *
38  * <ul>
39  *  <li><b>JUMP(num_probes)</b> The probing method</li>
40  *  <li><b>Alloc(count)</b>     Allocates count hashset entries (NOT bytes)</li>
41  *  <li><b>Free(ptr)</b>        Frees a block of memory allocated by Alloc</li>
42  *  <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
43  *                                      the Null value</li>
44  *  <li><b>ADDITIONAL_DATA<b>   Additional fields appended to the hashset struct</li>
45  * </ul>
46  */
47 #ifdef HashSet
48
49 #include <stdlib.h>
50 #include <string.h>
51 #include <assert.h>
52
53 #include "bitfiddle.h"
54 #include "util.h"
55
56 /* quadratic probing */
57 #ifndef JUMP
58 #define JUMP(num_probes)      (num_probes)
59 #endif /* JUMP */
60
61 #ifndef Hash
62 #define ID_HASH
63 #define Hash(this,value)      ((unsigned)(value))
64 #endif /* Hash */
65
66 #ifdef DO_REHASH
67 #define HashSetEntry                   ValueType
68 #define EntrySetHash(entry,new_hash)
69 #define EntryGetHash(this,entry)       Hash(this,entry)
70 #define EntryGetValue(entry)           (entry)
71 #else /* ! DO_REHASH */
72 #define EntryGetHash(this,entry)       (entry).hash
73 #define EntrySetHash(entry,new_hash)   (entry).hash = (new_hash)
74 #define EntryGetValue(entry)           (entry).data
75 #endif /* DO_REHASH */
76
77 #ifndef Alloc
78 #include "xmalloc.h"
79 #define Alloc(size)    (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
80 #define Free(ptr)      free(ptr)
81 #endif /* Alloc */
82
83 #ifdef ID_HASH
84 #define InsertReturnValue               int
85 #define GetInsertReturnValue(entry,new) (new)
86 #else /* ! ID_HASH */
87 #define InsertReturnValue               ValueType
88 #define GetInsertReturnValue(entry,new) EntryGetValue(entry)
89 #endif /* ID_HASH */
90
91 #ifndef KeyType
92 #define KeyType                  ValueType
93 #define GetKey(value)            (value)
94 #define InitData(this,value,key) (value) = (key)
95 #endif /* KeyType */
96
97 #ifndef ConstKeyType
98 #define ConstKeyType             const KeyType
99 #endif /* ConstKeyType */
100
101 #ifndef EntrySetEmpty
102 #define EntrySetEmpty(entry)    EntryGetValue(entry) = NullValue
103 #endif /* EntrySetEmpty */
104 #ifndef EntrySetDeleted
105 #define EntrySetDeleted(entry)  EntryGetValue(entry) = DeletedValue
106 #endif /* EntrySetDeleted */
107 #ifndef EntryIsEmpty
108 #define EntryIsEmpty(entry)     (EntryGetValue(entry) == NullValue)
109 #endif /* EntryIsEmpty */
110 #ifndef EntryIsDeleted
111 #define EntryIsDeleted(entry)   (EntryGetValue(entry) == DeletedValue)
112 #endif /* EntryIsDeleted */
113 #ifndef SetRangeEmpty
114 #define SetRangeEmpty(ptr,size)                \
115 {                                              \
116         size_t _i;                                 \
117         size_t _size = (size);                     \
118         HashSetEntry *entries = (ptr);             \
119         for(_i = 0; _i < _size; ++_i) {            \
120                 HashSetEntry *entry = & entries[_i];   \
121                 EntrySetEmpty(*entry);                 \
122         }                                          \
123 }
124 #endif /* SetRangeEmpty */
125
126 #ifndef HT_OCCUPANCY_FLT
127 /** how full before we double size */
128 #define HT_OCCUPANCY_FLT  0.5f
129 #endif /* HT_OCCUPANCY_FLT */
130
131 #ifndef HT_EMPTY_FLT
132 /** how empty before we half size */
133 #define HT_EMPTY_FLT      (0.4f * (HT_OCCUPANCY_FLT))
134 #endif /* HT_EMPTY_FLT */
135
136 #ifndef HT_MIN_BUCKETS
137 /** default smallest bucket size */
138 #define HT_MIN_BUCKETS    32
139 #endif /* HT_MIN_BUCKETS */
140
141 #define ILLEGAL_POS       ((size_t)-1)
142
143 /* check that all needed functions are defined */
144 #ifndef hashset_init
145 #error You have to redefine hashset_init
146 #endif
147 #ifndef hashset_init_size
148 #error You have to redefine hashset_init_size
149 #endif
150 #ifndef hashset_destroy
151 #error You have to redefine hashset_destroy
152 #endif
153 #ifndef hashset_insert
154 #error You have to redefine hashset_insert
155 #endif
156 #ifndef hashset_remove
157 #error You have to redefine hashset_remove
158 #endif
159 #ifndef hashset_find
160 #error You have to redefine hashset_find
161 #endif
162 #ifndef hashset_size
163 #error You have to redefine hashset_size
164 #endif
165 #ifndef hashset_iterator_init
166 #error You have to redefine hashset_iterator_init
167 #endif
168 #ifndef hashset_iterator_next
169 #error You have to redefine hashset_iterator_next
170 #endif
171 #ifndef hashset_remove_iterator
172 #error You have to redefine hashset_remove_iterator
173 #endif
174
175 /**
176  * Returns the number of elements in the hashset
177  */
178 size_t hashset_size(const HashSet *this)
179 {
180         return this->num_elements - this->num_deleted;
181 }
182
183 /**
184  * Inserts an element into a hashset without growing the set (you have to make
185  * sure there's enough room for that.
186  * @note also see comments for hashset_insert()
187  * @internal
188  */
189 static INLINE
190 InsertReturnValue insert_nogrow(HashSet *this, KeyType key)
191 {
192         size_t num_probes = 0;
193         size_t num_buckets = this->num_buckets;
194         size_t hashmask = num_buckets - 1;
195         unsigned hash = Hash(this, key);
196         size_t bucknum = hash & hashmask;
197         size_t insert_pos = ILLEGAL_POS;
198
199         assert((num_buckets & (num_buckets - 1)) == 0);
200
201         while(1) {
202                 HashSetEntry *entry = & this->entries[bucknum];
203
204                 if(EntryIsEmpty(*entry)) {
205                         size_t p;
206                         HashSetEntry *nentry;
207
208                         if(insert_pos != ILLEGAL_POS) {
209                                 p = insert_pos;
210                         } else {
211                                 p = bucknum;
212                         }
213
214                         nentry = &this->entries[p];
215                         InitData(this, EntryGetValue(*nentry), key);
216                         EntrySetHash(*nentry, hash);
217                         this->num_elements++;
218                         return GetInsertReturnValue(*nentry, 1);
219                 }
220                 if(EntryIsDeleted(*entry)) {
221                         if(insert_pos == ILLEGAL_POS)
222                                 insert_pos = bucknum;
223                 } else if(EntryGetHash(this, *entry) == hash) {
224                         if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
225                                 // Value already in the set, return it
226                                 return GetInsertReturnValue(*entry, 0);
227                         }
228                 }
229
230                 ++num_probes;
231                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
232                 assert(num_probes < num_buckets);
233         }
234 }
235
236 /**
237  * Inserts an element into a hashset under the assumption that the hashset
238  * contains no deleted entries and the element doesn't exist in the hashset yet.
239  * @internal
240  */
241 static
242 void insert_new(HashSet *this, unsigned hash, ValueType value)
243 {
244         size_t num_probes = 0;
245         size_t num_buckets = this->num_buckets;
246         size_t hashmask = num_buckets - 1;
247         size_t bucknum = hash & hashmask;
248         size_t insert_pos = ILLEGAL_POS;
249
250         assert(value != NullValue);
251
252         while(1) {
253                 HashSetEntry *entry = & this->entries[bucknum];
254
255                 if(EntryIsEmpty(*entry)) {
256                         size_t p;
257                         HashSetEntry *nentry;
258
259                         if(insert_pos != ILLEGAL_POS) {
260                                 p = insert_pos;
261                         } else {
262                                 p = bucknum;
263                         }
264                         nentry = &this->entries[p];
265
266                         EntryGetValue(*nentry) = value;
267                         EntrySetHash(*nentry, hash);
268                         this->num_elements++;
269                         return;
270                 }
271                 assert(!EntryIsDeleted(*entry));
272
273                 ++num_probes;
274                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
275                 assert(num_probes < num_buckets);
276         }
277 }
278
279 /**
280  * calculate shrink and enlarge limits
281  * @internal
282  */
283 static INLINE
284 void reset_thresholds(HashSet *this)
285 {
286         this->enlarge_threshold = (size_t) (this->num_buckets * HT_OCCUPANCY_FLT);
287         this->shrink_threshold = (size_t) (this->num_buckets * HT_EMPTY_FLT);
288         this->consider_shrink = 0;
289 }
290
291 /**
292  * Resize the hashset
293  * @internal
294  */
295 static INLINE
296 void resize(HashSet *this, size_t new_size)
297 {
298         size_t num_buckets = this->num_buckets;
299         size_t i;
300         HashSetEntry *old_entries = this->entries;
301         HashSetEntry *new_entries;
302
303         /* allocate a new array with double size */
304         new_entries = Alloc(new_size);
305         SetRangeEmpty(new_entries, new_size);
306
307         /* use the new array */
308         this->entries = new_entries;
309         this->num_buckets = new_size;
310         this->num_elements = 0;
311         this->num_deleted = 0;
312 #ifndef NDEBUG
313         this->entries_version++;
314 #endif
315         reset_thresholds(this);
316
317         /* reinsert all elements */
318         for(i = 0; i < num_buckets; ++i) {
319                 HashSetEntry *entry = & old_entries[i];
320                 if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
321                         continue;
322
323                 insert_new(this, EntryGetHash(this, *entry), EntryGetValue(*entry));
324         }
325
326         /* now we can free the old array */
327         Free(old_entries);
328 }
329
330 /**
331  * grow the hashset if adding 1 more elements would make it too crowded
332  * @internal
333  */
334 static INLINE
335 void maybe_grow(HashSet *this)
336 {
337         size_t resize_to;
338
339         if(LIKELY(this->num_elements + 1 <= this->enlarge_threshold))
340                 return;
341
342         /* double table size */
343         resize_to = this->num_buckets * 2;
344         resize(this, resize_to);
345 }
346
347 /**
348  * shrink the hashset if it is only sparsely filled
349  * @internal
350  */
351 static INLINE
352 void maybe_shrink(HashSet *this)
353 {
354         size_t size;
355         size_t resize_to;
356
357         if(!this->consider_shrink)
358                 return;
359
360         this->consider_shrink = 0;
361         size = hashset_size(this);
362         if(LIKELY(size > this->shrink_threshold))
363                 return;
364
365         resize_to = ceil_po2(size);
366
367         if(resize_to < 4)
368                 resize_to = 4;
369
370         resize(this, resize_to);
371 }
372
373 /**
374  * Insert an element into the hashset. If no element with key key exists yet,
375  * then a new one is created and initialized with the InitData function.
376  * Otherwise the exisiting element is returned (for hashs where key is equal to
377  * value, nothing is returned.)
378  *
379  * @param this   the hashset
380  * @param key    the key that identifies the data
381  * @returns      the existing or newly created data element (or nothing in case of hashs where keys are the while value)
382  */
383 InsertReturnValue hashset_insert(HashSet *this, KeyType key)
384 {
385 #ifndef NDEBUG
386         this->entries_version++;
387 #endif
388
389         maybe_shrink(this);
390         maybe_grow(this);
391         return insert_nogrow(this, key);
392 }
393
394 /**
395  * Searchs for an element with key @p key.
396  *
397  * @param this      the hashset
398  * @param key       the key to search for
399  * @returns         the found value or NullValue if nothing was found
400  */
401 ValueType hashset_find(const HashSet *this, ConstKeyType key)
402 {
403         size_t num_probes = 0;
404         size_t num_buckets = this->num_buckets;
405         size_t hashmask = num_buckets - 1;
406         unsigned hash = Hash(this, key);
407         size_t bucknum = hash & hashmask;
408
409         while(1) {
410                 HashSetEntry *entry = & this->entries[bucknum];
411
412                 if(EntryIsEmpty(*entry)) {
413                         return NullValue;
414                 }
415                 if(EntryIsDeleted(*entry)) {
416                         // value is deleted
417                 } else if(EntryGetHash(this, *entry) == hash) {
418                         if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
419                                 // found the value
420                                 return EntryGetValue(*entry);
421                         }
422                 }
423
424                 ++num_probes;
425                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
426                 assert(num_probes < num_buckets);
427         }
428 }
429
430 /**
431  * Removes an element from a hashset. Does nothing if the set doesn't contain
432  * the element.
433  *
434  * @param this    the hashset
435  * @param key     key that identifies the data to remove
436  */
437 void hashset_remove(HashSet *this, ConstKeyType key)
438 {
439         size_t num_probes = 0;
440         size_t num_buckets = this->num_buckets;
441         size_t hashmask = num_buckets - 1;
442         unsigned hash = Hash(this, key);
443         size_t bucknum = hash & hashmask;
444
445 #ifndef NDEBUG
446         this->entries_version++;
447 #endif
448
449         while(1) {
450                 HashSetEntry *entry = & this->entries[bucknum];
451
452                 if(EntryIsEmpty(*entry)) {
453                         return;
454                 }
455                 if(EntryIsDeleted(*entry)) {
456                         // entry is deleted
457                 } else if(EntryGetHash(this, *entry) == hash) {
458                         if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
459                                 EntrySetDeleted(*entry);
460                                 this->num_deleted++;
461                                 this->consider_shrink = 1;
462                                 return;
463                         }
464                 }
465
466                 ++num_probes;
467                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
468                 assert(num_probes < num_buckets);
469         }
470 }
471
472 /**
473  * Initializes hashset with a specific size
474  * @internal
475  */
476 static INLINE
477 void init_size(HashSet *this, size_t initial_size)
478 {
479         if(initial_size < 4)
480                 initial_size = 4;
481
482         this->entries = Alloc(initial_size);
483         SetRangeEmpty(this->entries, initial_size);
484         this->num_buckets = initial_size;
485         this->consider_shrink = 0;
486         this->num_elements = 0;
487         this->num_deleted = 0;
488 #ifndef NDEBUG
489         this->entries_version = 0;
490 #endif
491
492         reset_thresholds(this);
493 }
494
495 /**
496  * Initialializes a hashset with the default size. The memory for the set has to
497  * already allocated.
498  */
499 void hashset_init(HashSet *this)
500 {
501         init_size(this, HT_MIN_BUCKETS);
502 }
503
504 /**
505  * Destroys a hashset, freeing all used memory (except the memory for the
506  * HashSet struct itself).
507  */
508 void hashset_destroy(HashSet *this)
509 {
510         Free(this->entries);
511 #ifndef NDEBUG
512         this->entries = NULL;
513 #endif
514 }
515
516 /**
517  * Initializes a hashset expecting expected_element size
518  */
519 void hashset_init_size(HashSet *this, size_t expected_elements)
520 {
521         size_t needed_size;
522         size_t po2size;
523
524         if(expected_elements >= UINT_MAX/2) {
525                 abort();
526         }
527
528         needed_size = expected_elements * (int)(1.0 / HT_OCCUPANCY_FLT);
529         po2size = ceil_po2(needed_size);
530         init_size(this, po2size);
531 }
532
533 /**
534  * Initializes a hashset iterator. The memory for the allocator has to be
535  * already allocated.
536  * @note it is not allowed to remove or insert elements while iterating
537  */
538 void hashset_iterator_init(HashSetIterator *this, const HashSet *hashset)
539 {
540         this->current_bucket = hashset->entries - 1;
541         this->end = hashset->entries + hashset->num_buckets;
542 #ifndef NDEBUG
543         this->set = hashset;
544         this->entries_version = hashset->entries_version;
545 #endif
546 }
547
548 /**
549  * Returns the next value in the iterator or NULL if no value is left
550  * in the hashset.
551  * @note it is not allowed to remove or insert elements while iterating
552  */
553 ValueType hashset_iterator_next(HashSetIterator *this)
554 {
555         HashSetEntry *current_bucket = this->current_bucket;
556         HashSetEntry *end = this->end;
557
558         if(current_bucket >= end)
559                 return NullValue;
560
561         /* using hashset_insert or hashset_remove is not allowed while iterating */
562         assert(this->entries_version == this->set->entries_version);
563
564         do {
565                 current_bucket++;
566         } while(current_bucket < end &&
567                         (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket)));
568
569         if(current_bucket >= end)
570                 return NullValue;
571
572         this->current_bucket = current_bucket;
573         return EntryGetValue(*current_bucket);
574 }
575
576 /**
577  * Removes the element the iterator points to. Removing an element a second time
578  * has no result.
579  */
580 void hashset_remove_iterator(HashSet *this, const HashSetIterator *iter)
581 {
582         HashSetEntry *entry = iter->current_bucket;
583
584         /* iterator_next needs to have been called at least once */
585         assert(entry >= this->entries);
586         /* needs to be on a valid element */
587         assert(entry < this->entries + this->num_buckets);
588
589         if(EntryIsDeleted(*entry))
590                 return;
591
592         EntrySetDeleted(*entry);
593         this->num_deleted++;
594         this->consider_shrink = 1;
595 }
596
597 #endif