do not use ir_.*_ptr types
[cparser] / adt / hashset.c
1 /*
2  * Copyright (C) 1995-2009 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Generic hashset implementation
23  * @author  Matthias Braun, inspiration from densehash from google sparsehash
24  *          package
25  * @date    17.03.2007
26  *
27  *
28  * You have to specialize this file by defining:
29  *
30  * <ul>
31  *  <li><b>HashSet</b>         The name of the hashset type</li>
32  *  <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
33  *  <li><b>ValueType</b>       The type of the stored data values</li>
34  *  <li><b>NullValue</b>       A special value representing no values</li>
35  *  <li><b>DeletedValue</b>    A special value representing deleted entries</li>
36  *  <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
37  * </ul>
38  *
39  * Note that by default it is assumed that the data values themselves are used
40  * as keys. However you can change that with additional defines:
41  *
42  * <ul>
43  *  <li><b>KeyType</b>         The type of the keys identifying data values.
44  *                             Defining this implies, that a data value contains
45  *                             more than just the key.</li>
46  *  <li><b>GetKey(value)</b>   Extracts the key from a data value</li>
47  *  <li><b>KeysEqual(hashset,key1,key2)</b>  Tests wether 2 keys are equal</li>
48  *  <li><b>DO_REHASH</b>       Instead of storing the hash-values, recalculate
49  *                             them on demand from the datavalues. (usefull if
50  *                             calculating the hash-values takes less time than
51  *                             a memory access)</li>
52  * </ul>
53  *
54  * You can further fine tune your hashset by defining the following:
55  *
56  * <ul>
57  *  <li><b>JUMP(num_probes)</b> The probing method</li>
58  *  <li><b>Alloc(count)</b>     Allocates count hashset entries (NOT bytes)</li>
59  *  <li><b>Free(ptr)</b>        Frees a block of memory allocated by Alloc</li>
60  *  <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
61  *                                      the Null value</li>
62  *  <li><b>ADDITIONAL_DATA<b>   Additional fields appended to the hashset struct</li>
63  * </ul>
64  */
65 #ifdef HashSet
66
67 #include <stdlib.h>
68 #include <string.h>
69 #include <assert.h>
70
71 #include "bitfiddle.h"
72 #include "util.h"
73
74 /* quadratic probing */
75 #ifndef JUMP
76 #define JUMP(num_probes)      (num_probes)
77 #endif /* JUMP */
78
79 #ifndef Hash
80 #define ID_HASH
81 #define Hash(self,key)        ((unsigned)(((char *)key) - (char *)0))
82 #endif /* Hash */
83
84 #ifdef DO_REHASH
85 #define HashSetEntry                   ValueType
86 #define EntrySetHash(entry,new_hash)   ((void)0)
87 #define EntryGetHash(self,entry)       Hash(self, GetKey(entry))
88 #define EntryGetValue(entry)           (entry)
89 #else /* ! DO_REHASH */
90 #define EntryGetHash(self,entry)       (entry).hash
91 #define EntrySetHash(entry,new_hash)   (entry).hash = (new_hash)
92 #define EntryGetValue(entry)           (entry).data
93 #endif /* DO_REHASH */
94
95 #ifndef Alloc
96 #include "xmalloc.h"
97 #define Alloc(size)    (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
98 #define Free(ptr)      free(ptr)
99 #endif /* Alloc */
100
101 #ifdef ID_HASH
102 #define InsertReturnValue                 int
103 #define GetInsertReturnValue(entry,found) (found)
104 #define NullReturnValue                   0
105 #else /* ! ID_HASH */
106 #ifdef SCALAR_RETURN
107 #define InsertReturnValue                 ValueType
108 #define GetInsertReturnValue(entry,found) EntryGetValue(entry)
109 #define NullReturnValue                   NullValue
110 #else
111 #define InsertReturnValue                 ValueType*
112 #define GetInsertReturnValue(entry,found) & EntryGetValue(entry)
113 #define NullReturnValue                   & NullValue
114 #endif
115 #endif /* ID_HASH */
116
117 #ifndef KeyType
118 #define KeyType                  ValueType
119 #define GetKey(value)            (value)
120 #define InitData(self,value,key) (value) = (key)
121 #endif /* KeyType */
122
123 #ifndef ConstKeyType
124 #define ConstKeyType             const KeyType
125 #endif /* ConstKeyType */
126
127 #ifndef EntrySetEmpty
128 #define EntrySetEmpty(entry)    EntryGetValue(entry) = NullValue
129 #endif /* EntrySetEmpty */
130 #ifndef EntrySetDeleted
131 #define EntrySetDeleted(entry)  EntryGetValue(entry) = DeletedValue
132 #endif /* EntrySetDeleted */
133 #ifndef EntryIsEmpty
134 #define EntryIsEmpty(entry)     (EntryGetValue(entry) == NullValue)
135 #endif /* EntryIsEmpty */
136 #ifndef EntryIsDeleted
137 #define EntryIsDeleted(entry)   (EntryGetValue(entry) == DeletedValue)
138 #endif /* EntryIsDeleted */
139 #ifndef SetRangeEmpty
140 #define SetRangeEmpty(ptr,size)                \
141 {                                              \
142         size_t _i;                                 \
143         size_t _size = (size);                     \
144         HashSetEntry *entries = (ptr);             \
145         for(_i = 0; _i < _size; ++_i) {            \
146                 HashSetEntry *entry = & entries[_i];   \
147                 EntrySetEmpty(*entry);                 \
148         }                                          \
149 }
150 #endif /* SetRangeEmpty */
151
152 #ifndef HT_OCCUPANCY_FLT
153 /** how full before we double size */
154 #define HT_OCCUPANCY_FLT(x) ((x)/2)
155 #endif /* HT_OCCUPANCY_FLT */
156 #ifndef HT_1_DIV_OCCUPANCY_FLT
157 #define HT_1_DIV_OCCUPANCY_FLT 2
158 #endif
159
160 #ifndef HT_EMPTY_FLT
161 /** how empty before we half size */
162 #define HT_EMPTY_FLT(x)     ((x)/5)
163 #endif /* HT_EMPTY_FLT */
164
165 #ifndef HT_MIN_BUCKETS
166 /** default smallest bucket size */
167 #define HT_MIN_BUCKETS    32
168 #endif /* HT_MIN_BUCKETS */
169
170 #define ILLEGAL_POS       ((size_t)-1)
171
172 /* check that all needed functions are defined */
173 #ifndef hashset_init
174 #error You have to redefine hashset_init
175 #endif
176 #ifndef hashset_init_size
177 #error You have to redefine hashset_init_size
178 #endif
179 #ifndef hashset_destroy
180 #error You have to redefine hashset_destroy
181 #endif
182 #ifndef hashset_insert
183 #error You have to redefine hashset_insert
184 #endif
185 #ifndef hashset_remove
186 #error You have to redefine hashset_remove
187 #endif
188 #ifndef hashset_find
189 #error You have to redefine hashset_find
190 #endif
191 #ifndef hashset_size
192 #error You have to redefine hashset_size
193 #endif
194 #ifndef hashset_iterator_init
195 #error You have to redefine hashset_iterator_init
196 #endif
197 #ifndef hashset_iterator_next
198 #error You have to redefine hashset_iterator_next
199 #endif
200 #ifndef hashset_remove_iterator
201 #error You have to redefine hashset_remove_iterator
202 #endif
203
204 /* prototypes to avoid warnings */
205 void hashset_init(HashSet *self);
206 void hashset_destroy(HashSet *self);
207 size_t hashset_size(const HashSet *self);
208 InsertReturnValue hashset_insert(HashSet *self, KeyType key);
209 InsertReturnValue hashset_find(const HashSet *self, ConstKeyType key);
210 void hashset_remove(HashSet *self, ConstKeyType key);
211 void hashset_init_size(HashSet *self, size_t expected_elements);
212 void hashset_iterator_init(HashSetIterator *self, const HashSet *hashset);
213 ValueType hashset_iterator_next(HashSetIterator *self);
214 void hashset_remove_iterator(HashSet *self, const HashSetIterator *iter);
215
216 /**
217  * Returns the number of elements in the hashset
218  */
219 size_t hashset_size(const HashSet *self)
220 {
221         return self->num_elements - self->num_deleted;
222 }
223
224 /**
225  * Inserts an element into a hashset without growing the set (you have to make
226  * sure there's enough room for that.
227  * @note also see comments for hashset_insert()
228  * @internal
229  */
230 static inline
231 InsertReturnValue insert_nogrow(HashSet *self, KeyType key)
232 {
233         size_t   num_probes  = 0;
234         size_t   num_buckets = self->num_buckets;
235         size_t   hashmask    = num_buckets - 1;
236         unsigned hash        = Hash(self, key);
237         size_t   bucknum     = hash & hashmask;
238         size_t   insert_pos  = ILLEGAL_POS;
239
240         assert((num_buckets & (num_buckets - 1)) == 0);
241
242         while(1) {
243                 HashSetEntry *entry = & self->entries[bucknum];
244
245                 if(EntryIsEmpty(*entry)) {
246                         size_t p;
247                         HashSetEntry *nentry;
248
249                         if(insert_pos != ILLEGAL_POS) {
250                                 p = insert_pos;
251                         } else {
252                                 p = bucknum;
253                         }
254
255                         nentry = &self->entries[p];
256                         InitData(self, EntryGetValue(*nentry), key);
257                         EntrySetHash(*nentry, hash);
258                         self->num_elements++;
259                         return GetInsertReturnValue(*nentry, 0);
260                 }
261                 if(EntryIsDeleted(*entry)) {
262                         if(insert_pos == ILLEGAL_POS)
263                                 insert_pos = bucknum;
264                 } else if(EntryGetHash(self, *entry) == hash) {
265                         if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
266                                 // Value already in the set, return it
267                                 return GetInsertReturnValue(*entry, 1);
268                         }
269                 }
270
271                 ++num_probes;
272                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
273                 assert(num_probes < num_buckets);
274         }
275 }
276
277 /**
278  * Inserts an element into a hashset under the assumption that the hashset
279  * contains no deleted entries and the element doesn't exist in the hashset yet.
280  * @internal
281  */
282 static
283 void insert_new(HashSet *self, unsigned hash, ValueType value)
284 {
285         size_t num_probes  = 0;
286         size_t num_buckets = self->num_buckets;
287         size_t hashmask    = num_buckets - 1;
288         size_t bucknum     = hash & hashmask;
289         size_t insert_pos  = ILLEGAL_POS;
290
291         //assert(value != NullValue);
292
293         while(1) {
294                 HashSetEntry *entry = & self->entries[bucknum];
295
296                 if(EntryIsEmpty(*entry)) {
297                         size_t        p;
298                         HashSetEntry *nentry;
299
300                         if(insert_pos != ILLEGAL_POS) {
301                                 p = insert_pos;
302                         } else {
303                                 p = bucknum;
304                         }
305                         nentry = &self->entries[p];
306
307                         EntryGetValue(*nentry) = value;
308                         EntrySetHash(*nentry, hash);
309                         self->num_elements++;
310                         return;
311                 }
312                 assert(!EntryIsDeleted(*entry));
313
314                 ++num_probes;
315                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
316                 assert(num_probes < num_buckets);
317         }
318 }
319
320 /**
321  * calculate shrink and enlarge limits
322  * @internal
323  */
324 static inline
325 void reset_thresholds(HashSet *self)
326 {
327         self->enlarge_threshold = (size_t) HT_OCCUPANCY_FLT(self->num_buckets);
328         self->shrink_threshold  = (size_t) HT_EMPTY_FLT(self->num_buckets);
329         self->consider_shrink   = 0;
330 }
331
332 /**
333  * Resize the hashset
334  * @internal
335  */
336 static inline
337 void resize(HashSet *self, size_t new_size)
338 {
339         size_t num_buckets = self->num_buckets;
340         size_t i;
341         HashSetEntry *old_entries = self->entries;
342         HashSetEntry *new_entries;
343
344         /* allocate a new array with double size */
345         new_entries = Alloc(new_size);
346         SetRangeEmpty(new_entries, new_size);
347
348         /* use the new array */
349         self->entries      = new_entries;
350         self->num_buckets  = new_size;
351         self->num_elements = 0;
352         self->num_deleted  = 0;
353 #ifndef NDEBUG
354         self->entries_version++;
355 #endif
356         reset_thresholds(self);
357
358         /* reinsert all elements */
359         for(i = 0; i < num_buckets; ++i) {
360                 HashSetEntry *entry = & old_entries[i];
361                 if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
362                         continue;
363
364                 insert_new(self, EntryGetHash(self, *entry), EntryGetValue(*entry));
365         }
366
367         /* now we can free the old array */
368         Free(old_entries);
369 }
370
371 /**
372  * grow the hashset if adding 1 more elements would make it too crowded
373  * @internal
374  */
375 static inline
376 void maybe_grow(HashSet *self)
377 {
378         size_t resize_to;
379
380         if(LIKELY(self->num_elements + 1 <= self->enlarge_threshold))
381                 return;
382
383         /* double table size */
384         resize_to = self->num_buckets * 2;
385         resize(self, resize_to);
386 }
387
388 /**
389  * shrink the hashset if it is only sparsely filled
390  * @internal
391  */
392 static inline
393 void maybe_shrink(HashSet *self)
394 {
395         size_t size;
396         size_t resize_to;
397
398         if(!self->consider_shrink)
399                 return;
400
401         self->consider_shrink = 0;
402         size                  = hashset_size(self);
403         if(size <= HT_MIN_BUCKETS)
404                 return;
405
406         if(LIKELY(size > self->shrink_threshold))
407                 return;
408
409         resize_to = ceil_po2(size);
410
411         if(resize_to < 4)
412                 resize_to = 4;
413
414         resize(self, resize_to);
415 }
416
417 /**
418  * Insert an element into the hashset. If no element with key key exists yet,
419  * then a new one is created and initialized with the InitData function.
420  * Otherwise the exisiting element is returned (for hashs where key is equal to
421  * value, nothing is returned.)
422  *
423  * @param self   the hashset
424  * @param key    the key that identifies the data
425  * @returns      the existing or newly created data element (or nothing in case of hashs where keys are the while value)
426  */
427 InsertReturnValue hashset_insert(HashSet *self, KeyType key)
428 {
429 #ifndef NDEBUG
430         self->entries_version++;
431 #endif
432
433         maybe_shrink(self);
434         maybe_grow(self);
435         return insert_nogrow(self, key);
436 }
437
438 /**
439  * Searchs for an element with key @p key.
440  *
441  * @param self      the hashset
442  * @param key       the key to search for
443  * @returns         the found value or NullValue if nothing was found
444  */
445 InsertReturnValue hashset_find(const HashSet *self, ConstKeyType key)
446 {
447         size_t   num_probes  = 0;
448         size_t   num_buckets = self->num_buckets;
449         size_t   hashmask    = num_buckets - 1;
450         unsigned hash        = Hash(self, key);
451         size_t   bucknum     = hash & hashmask;
452
453         while(1) {
454                 HashSetEntry *entry = & self->entries[bucknum];
455
456                 if(EntryIsEmpty(*entry)) {
457                         return NullReturnValue;
458                 }
459                 if(EntryIsDeleted(*entry)) {
460                         // value is deleted
461                 } else if(EntryGetHash(self, *entry) == hash) {
462                         if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
463                                 // found the value
464                                 return GetInsertReturnValue(*entry, 1);
465                         }
466                 }
467
468                 ++num_probes;
469                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
470                 assert(num_probes < num_buckets);
471         }
472 }
473
474 /**
475  * Removes an element from a hashset. Does nothing if the set doesn't contain
476  * the element.
477  *
478  * @param self    the hashset
479  * @param key     key that identifies the data to remove
480  */
481 void hashset_remove(HashSet *self, ConstKeyType key)
482 {
483         size_t   num_probes  = 0;
484         size_t   num_buckets = self->num_buckets;
485         size_t   hashmask    = num_buckets - 1;
486         unsigned hash        = Hash(self, key);
487         size_t   bucknum     = hash & hashmask;
488
489 #ifndef NDEBUG
490         self->entries_version++;
491 #endif
492
493         while(1) {
494                 HashSetEntry *entry = & self->entries[bucknum];
495
496                 if(EntryIsEmpty(*entry)) {
497                         return;
498                 }
499                 if(EntryIsDeleted(*entry)) {
500                         // entry is deleted
501                 } else if(EntryGetHash(self, *entry) == hash) {
502                         if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
503                                 EntrySetDeleted(*entry);
504                                 self->num_deleted++;
505                                 self->consider_shrink = 1;
506                                 return;
507                         }
508                 }
509
510                 ++num_probes;
511                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
512                 assert(num_probes < num_buckets);
513         }
514 }
515
516 /**
517  * Initializes hashset with a specific size
518  * @internal
519  */
520 static inline
521 void init_size(HashSet *self, size_t initial_size)
522 {
523         if(initial_size < 4)
524                 initial_size = 4;
525
526         self->entries         = Alloc(initial_size);
527         SetRangeEmpty(self->entries, initial_size);
528         self->num_buckets     = initial_size;
529         self->consider_shrink = 0;
530         self->num_elements    = 0;
531         self->num_deleted     = 0;
532 #ifndef NDEBUG
533         self->entries_version = 0;
534 #endif
535
536         reset_thresholds(self);
537 }
538
539 /**
540  * Initialializes a hashset with the default size. The memory for the set has to
541  * already allocated.
542  */
543 void hashset_init(HashSet *self)
544 {
545         init_size(self, HT_MIN_BUCKETS);
546 }
547
548 /**
549  * Destroys a hashset, freeing all used memory (except the memory for the
550  * HashSet struct itself).
551  */
552 void hashset_destroy(HashSet *self)
553 {
554         Free(self->entries);
555 #ifndef NDEBUG
556         self->entries = NULL;
557 #endif
558 }
559
560 /**
561  * Initializes a hashset expecting expected_element size
562  */
563 void hashset_init_size(HashSet *self, size_t expected_elements)
564 {
565         size_t needed_size;
566         size_t po2size;
567
568         if(expected_elements >= UINT_MAX/2) {
569                 abort();
570         }
571
572         needed_size = expected_elements * HT_1_DIV_OCCUPANCY_FLT;
573         po2size     = ceil_po2(needed_size);
574         init_size(self, po2size);
575 }
576
577 /**
578  * Initializes a hashset iterator. The memory for the allocator has to be
579  * already allocated.
580  * @note it is not allowed to remove or insert elements while iterating
581  */
582 void hashset_iterator_init(HashSetIterator *self, const HashSet *hashset)
583 {
584         self->current_bucket = hashset->entries - 1;
585         self->end            = hashset->entries + hashset->num_buckets;
586 #ifndef NDEBUG
587         self->set             = hashset;
588         self->entries_version = hashset->entries_version;
589 #endif
590 }
591
592 /**
593  * Returns the next value in the iterator or NULL if no value is left
594  * in the hashset.
595  * @note it is not allowed to remove or insert elements while iterating
596  */
597 ValueType hashset_iterator_next(HashSetIterator *self)
598 {
599         HashSetEntry *current_bucket = self->current_bucket;
600         HashSetEntry *end            = self->end;
601
602         /* using hashset_insert or hashset_remove is not allowed while iterating */
603         assert(self->entries_version == self->set->entries_version);
604
605         do {
606                 current_bucket++;
607                 if(current_bucket >= end)
608                         return NullValue;
609         } while(EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket));
610
611         self->current_bucket = current_bucket;
612         return EntryGetValue(*current_bucket);
613 }
614
615 /**
616  * Removes the element the iterator points to. Removing an element a second time
617  * has no result.
618  */
619 void hashset_remove_iterator(HashSet *self, const HashSetIterator *iter)
620 {
621         HashSetEntry *entry = iter->current_bucket;
622
623         /* iterator_next needs to have been called at least once */
624         assert(entry >= self->entries);
625         /* needs to be on a valid element */
626         assert(entry < self->entries + self->num_buckets);
627
628         if(EntryIsDeleted(*entry))
629                 return;
630
631         EntrySetDeleted(*entry);
632         self->num_deleted++;
633         self->consider_shrink = 1;
634 }
635
636 #else
637 __attribute__((unused)) static int dummy;
638 #endif