cleanup: Add and use macro MAX().
[cparser] / adt / hashset.c.inl
1 /*
2  * Copyright (C) 1995-2012 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Generic hashset implementation
23  * @author  Matthias Braun, inspiration from densehash from google sparsehash
24  *          package
25  * @date    17.03.2007
26  *
27  *
28  * You have to specialize this file by defining:
29  *
30  * <ul>
31  *  <li><b>HashSet</b>         The name of the hashset type</li>
32  *  <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
33  *  <li><b>ValueType</b>       The type of the stored data values</li>
34  *  <li><b>NullValue</b>       A special value representing no values</li>
35  *  <li><b>DeletedValue</b>    A special value representing deleted entries</li>
36  *  <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
37  * </ul>
38  *
39  * Note that by default it is assumed that the data values themselves are used
40  * as keys. However you can change that with additional defines:
41  *
42  * <ul>
43  *  <li><b>KeyType</b>         The type of the keys identifying data values.
44  *                             Defining this implies, that a data value contains
45  *                             more than just the key.</li>
46  *  <li><b>GetKey(value)</b>   Extracts the key from a data value</li>
47  *  <li><b>KeysEqual(hashset,key1,key2)</b>  Tests whether 2 keys are equal</li>
48  *  <li><b>DO_REHASH</b>       Instead of storing the hash-values, recalculate
49  *                             them on demand from the datavalues. (useful if
50  *                             calculating the hash-values takes less time than
51  *                             a memory access)</li>
52  * </ul>
53  *
54  * You can further fine tune your hashset by defining the following:
55  *
56  * <ul>
57  *  <li><b>JUMP(num_probes)</b> The probing method</li>
58  *  <li><b>Alloc(count)</b>     Allocates count hashset entries (NOT bytes)</li>
59  *  <li><b>Free(ptr)</b>        Frees a block of memory allocated by Alloc</li>
60  *  <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
61  *                                      the Null value</li>
62  *  <li><b>ADDITIONAL_DATA<b>   Additional fields appended to the hashset struct</li>
63  * </ul>
64  */
65 #ifdef HashSet
66
67 #include <stdlib.h>
68 #include <string.h>
69 #include <assert.h>
70
71 #include "bitfiddle.h"
72 #include "util.h"
73
74 /* quadratic probing */
75 #ifndef JUMP
76 #define JUMP(num_probes)      (num_probes)
77 #endif /* JUMP */
78
79 #ifndef Hash
80 #define ID_HASH
81 #define Hash(self,key)        ((unsigned)(((char *)key) - (char *)0))
82 #endif /* Hash */
83
84 #ifdef DO_REHASH
85 #define HashSetEntry                   ValueType
86 #define EntrySetHash(entry,new_hash)   ((void)0)
87 #define EntryGetHash(self,entry)       Hash(self, GetKey(entry))
88 #define EntryGetValue(entry)           (entry)
89 #else /* ! DO_REHASH */
90 #define EntryGetHash(self,entry)       (entry).hash
91 #define EntrySetHash(entry,new_hash)   (entry).hash = (new_hash)
92 #define EntryGetValue(entry)           (entry).data
93 #endif /* DO_REHASH */
94
95 #ifndef Alloc
96 #include "xmalloc.h"
97 #define Alloc(size) XMALLOCN(HashSetEntry, (size))
98 #define Free(ptr)      free(ptr)
99 #endif /* Alloc */
100
101 #ifdef ID_HASH
102 #define FindReturnValue                 bool
103 #define GetFindReturnValue(entry,found) (found)
104 #define NullReturnValue                 false
105 #define InsertReturnValue(findreturn)   !(findreturn)
106 #else /* ! ID_HASH */
107 #ifdef SCALAR_RETURN
108 #define FindReturnValue                 ValueType
109 #define GetFindReturnValue(entry,found) EntryGetValue(entry)
110 #define NullReturnValue                 NullValue
111 #else
112 #define FindReturnValue                 ValueType*
113 #define GetFindReturnValue(entry,found) & EntryGetValue(entry)
114 #define NullReturnValue                 & NullValue
115 #endif
116 #endif /* ID_HASH */
117
118 #ifndef InsertReturnValue
119 #define InsertReturnValue(findreturn)   findreturn
120 #endif
121
122 #ifndef KeyType
123 #define KeyType                  ValueType
124 #define GetKey(value)            (value)
125 #define InitData(self,value,key) (value) = (key)
126 #endif /* KeyType */
127
128 #ifndef ConstKeyType
129 #define ConstKeyType             const KeyType
130 #endif /* ConstKeyType */
131
132 #ifndef EntrySetEmpty
133 #define EntrySetEmpty(entry)    EntryGetValue(entry) = NullValue
134 #endif /* EntrySetEmpty */
135 #ifndef EntrySetDeleted
136 #define EntrySetDeleted(entry)  EntryGetValue(entry) = DeletedValue
137 #endif /* EntrySetDeleted */
138 #ifndef EntryIsEmpty
139 #define EntryIsEmpty(entry)     (EntryGetValue(entry) == NullValue)
140 #endif /* EntryIsEmpty */
141 #ifndef EntryIsDeleted
142 #define EntryIsDeleted(entry)   (EntryGetValue(entry) == DeletedValue)
143 #endif /* EntryIsDeleted */
144 #ifndef SetRangeEmpty
145 #define SetRangeEmpty(ptr,size)                \
146 {                                              \
147         size_t _i;                                 \
148         size_t _size = (size);                     \
149         HashSetEntry *entries = (ptr);             \
150         for (_i = 0; _i < _size; ++_i) {            \
151                 HashSetEntry *entry = & entries[_i];   \
152                 EntrySetEmpty(*entry);                 \
153         }                                          \
154 }
155 #endif /* SetRangeEmpty */
156
157 #ifndef HT_OCCUPANCY_FLT
158 /** how full before we double size */
159 #define HT_OCCUPANCY_FLT(x) ((x)/2)
160 #endif /* HT_OCCUPANCY_FLT */
161 #ifndef HT_1_DIV_OCCUPANCY_FLT
162 #define HT_1_DIV_OCCUPANCY_FLT 2
163 #endif
164
165 #ifndef HT_EMPTY_FLT
166 /** how empty before we half size */
167 #define HT_EMPTY_FLT(x)     ((x)/5)
168 #endif /* HT_EMPTY_FLT */
169
170 #ifndef HT_MIN_BUCKETS
171 /** default smallest bucket size */
172 #define HT_MIN_BUCKETS    32
173 #endif /* HT_MIN_BUCKETS */
174
175 #define ILLEGAL_POS       ((size_t)-1)
176
177 #ifdef hashset_size
178 /**
179  * Returns the number of elements in the hashset
180  */
181 size_t hashset_size(const HashSet *self)
182 {
183         return self->num_elements - self->num_deleted;
184 }
185 #else
186 static inline size_t hashset_size(const HashSet *self)
187 {
188         return self->num_elements - self->num_deleted;
189 }
190 #endif
191
192 /**
193  * Inserts an element into a hashset without growing the set (you have to make
194  * sure there's enough room for that.
195  * @returns  previous value if found, NullValue otherwise
196  * @note also see comments for hashset_insert()
197  * @internal
198  */
199 static inline FindReturnValue insert_nogrow(HashSet *self, KeyType key)
200 {
201         size_t   num_probes  = 0;
202         size_t   num_buckets = self->num_buckets;
203         size_t   hashmask    = num_buckets - 1;
204         unsigned hash        = Hash(self, key);
205         size_t   bucknum     = hash & hashmask;
206         size_t   insert_pos  = ILLEGAL_POS;
207
208         assert((num_buckets & (num_buckets - 1)) == 0);
209
210         for (;;) {
211                 HashSetEntry *entry = & self->entries[bucknum];
212
213                 if (EntryIsEmpty(*entry)) {
214                         size_t p;
215                         HashSetEntry *nentry;
216
217                         if (insert_pos != ILLEGAL_POS) {
218                                 p = insert_pos;
219                         } else {
220                                 p = bucknum;
221                         }
222
223                         nentry = &self->entries[p];
224                         InitData(self, EntryGetValue(*nentry), key);
225                         EntrySetHash(*nentry, hash);
226                         self->num_elements++;
227                         return GetFindReturnValue(*nentry, false);
228                 }
229                 if (EntryIsDeleted(*entry)) {
230                         if (insert_pos == ILLEGAL_POS)
231                                 insert_pos = bucknum;
232                 } else if (EntryGetHash(self, *entry) == hash) {
233                         if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
234                                 // Value already in the set, return it
235                                 return GetFindReturnValue(*entry, true);
236                         }
237                 }
238
239                 ++num_probes;
240                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
241                 assert(num_probes < num_buckets);
242         }
243 }
244
245 /**
246  * calculate shrink and enlarge limits
247  * @internal
248  */
249 static inline void reset_thresholds(HashSet *self)
250 {
251         self->enlarge_threshold = (size_t) HT_OCCUPANCY_FLT(self->num_buckets);
252         self->shrink_threshold  = (size_t) HT_EMPTY_FLT(self->num_buckets);
253         self->consider_shrink   = 0;
254 }
255
256 #ifndef HAVE_OWN_RESIZE
257 /**
258  * Inserts an element into a hashset under the assumption that the hashset
259  * contains no deleted entries and the element doesn't exist in the hashset yet.
260  * @internal
261  */
262 static void insert_new(HashSet *self, unsigned hash, ValueType value)
263 {
264         size_t num_probes  = 0;
265         size_t num_buckets = self->num_buckets;
266         size_t hashmask    = num_buckets - 1;
267         size_t bucknum     = hash & hashmask;
268         size_t insert_pos  = ILLEGAL_POS;
269
270         //assert(value != NullValue);
271
272         for (;;) {
273                 HashSetEntry *entry = & self->entries[bucknum];
274
275                 if (EntryIsEmpty(*entry)) {
276                         size_t        p;
277                         HashSetEntry *nentry;
278
279                         if (insert_pos != ILLEGAL_POS) {
280                                 p = insert_pos;
281                         } else {
282                                 p = bucknum;
283                         }
284                         nentry = &self->entries[p];
285
286                         EntryGetValue(*nentry) = value;
287                         EntrySetHash(*nentry, hash);
288                         self->num_elements++;
289                         return;
290                 }
291                 assert(!EntryIsDeleted(*entry));
292
293                 ++num_probes;
294                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
295                 assert(num_probes < num_buckets);
296         }
297 }
298
299 /**
300  * Resize the hashset
301  * @internal
302  */
303 static inline void resize(HashSet *self, size_t new_size)
304 {
305         size_t num_buckets = self->num_buckets;
306         size_t i;
307         HashSetEntry *old_entries = self->entries;
308         HashSetEntry *new_entries;
309
310         /* allocate a new array with double size */
311         new_entries = Alloc(new_size);
312         SetRangeEmpty(new_entries, new_size);
313
314         /* use the new array */
315         self->entries      = new_entries;
316         self->num_buckets  = new_size;
317         self->num_elements = 0;
318         self->num_deleted  = 0;
319 #ifndef NDEBUG
320         self->entries_version++;
321 #endif
322         reset_thresholds(self);
323
324         /* reinsert all elements */
325         for (i = 0; i < num_buckets; ++i) {
326                 HashSetEntry *entry = & old_entries[i];
327                 if (EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
328                         continue;
329
330                 insert_new(self, EntryGetHash(self, *entry), EntryGetValue(*entry));
331         }
332
333         /* now we can free the old array */
334         Free(old_entries);
335 }
336 #else
337
338 /* resize must be defined outside */
339 static inline void resize(HashSet *self, size_t new_size);
340
341 #endif
342
343 /**
344  * grow the hashset if adding 1 more elements would make it too crowded
345  * @internal
346  */
347 static inline void maybe_grow(HashSet *self)
348 {
349         if (LIKELY(self->num_elements + 1 <= self->enlarge_threshold))
350                 return;
351
352         size_t resize_to;
353         if (self->num_elements - self->num_deleted + 2 > self->enlarge_threshold) {
354                 /* double table size */
355                 resize_to = self->num_buckets * 2;
356                 if (resize_to <= self->num_buckets) {
357                         abort();
358                 }
359         } else {
360                 /* no need to resize, we just clean up the deleted entries */
361                 resize_to = self->num_buckets;
362         }
363         resize(self, resize_to);
364 }
365
366 /**
367  * shrink the hashset if it is only sparsely filled
368  * @internal
369  */
370 static inline void maybe_shrink(HashSet *self)
371 {
372         size_t size;
373         size_t resize_to;
374
375         if (!self->consider_shrink)
376                 return;
377
378         self->consider_shrink = 0;
379         size                  = hashset_size(self);
380         if (size <= HT_MIN_BUCKETS)
381                 return;
382
383         if (LIKELY(size > self->shrink_threshold))
384                 return;
385
386         resize_to = ceil_po2(size);
387         resize_to = MAX(resize_to, 4);
388
389         resize(self, resize_to);
390 }
391
392 #ifdef hashset_insert
393 /**
394  * Insert an element into the hashset. If no element with the given key exists yet,
395  * then a new one is created and initialized with the InitData function.
396  * Otherwise the existing element is returned (for hashs where key is equal to
397  * value, nothing is returned.)
398  *
399  * @param self   the hashset
400  * @param key    the key that identifies the data
401  * @returns      the existing or newly created data element (or nothing in case of hashs where keys are the while value)
402  */
403 FindReturnValue hashset_insert(HashSet *self, KeyType key)
404 {
405 #ifndef NDEBUG
406         self->entries_version++;
407 #endif
408
409         maybe_shrink(self);
410         maybe_grow(self);
411         return InsertReturnValue(insert_nogrow(self, key));
412 }
413 #endif
414
415 #ifdef hashset_find
416 /**
417  * Searches for an element with key @p key.
418  *
419  * @param self      the hashset
420  * @param key       the key to search for
421  * @returns         the found value or NullValue if nothing was found
422  */
423 FindReturnValue hashset_find(const HashSet *self, ConstKeyType key)
424 {
425         size_t   num_probes  = 0;
426         size_t   num_buckets = self->num_buckets;
427         size_t   hashmask    = num_buckets - 1;
428         unsigned hash        = Hash(self, key);
429         size_t   bucknum     = hash & hashmask;
430
431         for (;;) {
432                 HashSetEntry *entry = & self->entries[bucknum];
433
434                 if (EntryIsEmpty(*entry)) {
435                         return NullReturnValue;
436                 }
437                 if (EntryIsDeleted(*entry)) {
438                         // value is deleted
439                 } else if (EntryGetHash(self, *entry) == hash) {
440                         if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
441                                 // found the value
442                                 return GetFindReturnValue(*entry, true);
443                         }
444                 }
445
446                 ++num_probes;
447                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
448                 assert(num_probes < num_buckets);
449         }
450 }
451 #endif
452
453 #ifdef hashset_remove
454 /**
455  * Removes an element from a hashset. Does nothing if the set doesn't contain
456  * the element.
457  *
458  * @param self    the hashset
459  * @param key     key that identifies the data to remove
460  */
461 void hashset_remove(HashSet *self, ConstKeyType key)
462 {
463         size_t   num_probes  = 0;
464         size_t   num_buckets = self->num_buckets;
465         size_t   hashmask    = num_buckets - 1;
466         unsigned hash        = Hash(self, key);
467         size_t   bucknum     = hash & hashmask;
468
469 #ifndef NDEBUG
470         self->entries_version++;
471 #endif
472
473         for (;;) {
474                 HashSetEntry *entry = & self->entries[bucknum];
475
476                 if (EntryIsEmpty(*entry)) {
477                         return;
478                 }
479                 if (EntryIsDeleted(*entry)) {
480                         // entry is deleted
481                 } else if (EntryGetHash(self, *entry) == hash) {
482                         if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
483                                 EntrySetDeleted(*entry);
484                                 self->num_deleted++;
485                                 self->consider_shrink = 1;
486                                 return;
487                         }
488                 }
489
490                 ++num_probes;
491                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
492                 assert(num_probes < num_buckets);
493         }
494 }
495 #endif
496
497 /**
498  * Initializes hashset with a specific size
499  * @internal
500  */
501 static inline void init_size(HashSet *self, size_t initial_size)
502 {
503         initial_size = MAX(initial_size, 4);
504
505         self->entries         = Alloc(initial_size);
506         SetRangeEmpty(self->entries, initial_size);
507         self->num_buckets     = initial_size;
508         self->consider_shrink = 0;
509         self->num_elements    = 0;
510         self->num_deleted     = 0;
511 #ifndef NDEBUG
512         self->entries_version = 0;
513 #endif
514 #ifdef ADDITIONAL_INIT
515         ADDITIONAL_INIT
516 #endif
517
518         reset_thresholds(self);
519 }
520
521 #ifdef hashset_init
522 /**
523  * Initializes a hashset with the default size. The memory for the set has to
524  * already allocated.
525  */
526 void hashset_init(HashSet *self)
527 {
528         init_size(self, HT_MIN_BUCKETS);
529 }
530 #endif
531
532 #ifdef hashset_destroy
533 /**
534  * Destroys a hashset, freeing all used memory (except the memory for the
535  * HashSet struct itself).
536  */
537 void hashset_destroy(HashSet *self)
538 {
539 #ifdef ADDITIONAL_TERM
540         ADDITIONAL_TERM
541 #endif
542         Free(self->entries);
543 #ifndef NDEBUG
544         self->entries = NULL;
545 #endif
546 }
547 #endif
548
549 #ifdef hashset_init_size
550 /**
551  * Initializes a hashset expecting expected_element size.
552  */
553 void hashset_init_size(HashSet *self, size_t expected_elements)
554 {
555         size_t needed_size;
556         size_t po2size;
557
558         if (expected_elements >= UINT_MAX/2) {
559                 abort();
560         }
561
562         needed_size = expected_elements * HT_1_DIV_OCCUPANCY_FLT;
563         po2size     = ceil_po2(needed_size);
564         init_size(self, po2size);
565 }
566 #endif
567
568 #ifdef hashset_iterator_init
569 /**
570  * Initializes a hashset iterator. The memory for the allocator has to be
571  * already allocated.
572  * @note it is not allowed to remove or insert elements while iterating
573  */
574 void hashset_iterator_init(HashSetIterator *self, const HashSet *hashset)
575 {
576         self->current_bucket = hashset->entries - 1;
577         self->end            = hashset->entries + hashset->num_buckets;
578 #ifndef NDEBUG
579         self->set             = hashset;
580         self->entries_version = hashset->entries_version;
581 #endif
582 }
583 #endif
584
585 #ifdef hashset_iterator_next
586 /**
587  * Returns the next value in the iterator or NULL if no value is left
588  * in the hashset.
589  * @note it is not allowed to remove or insert elements while iterating
590  */
591 ValueType hashset_iterator_next(HashSetIterator *self)
592 {
593         HashSetEntry *current_bucket = self->current_bucket;
594         HashSetEntry *end            = self->end;
595
596         /* using hashset_insert or hashset_remove is not allowed while iterating */
597         assert(self->entries_version == self->set->entries_version);
598
599         do {
600                 current_bucket++;
601                 if (current_bucket >= end)
602                         return NullValue;
603         } while (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket));
604
605         self->current_bucket = current_bucket;
606         return EntryGetValue(*current_bucket);
607 }
608 #endif
609
610 #ifdef hashset_remove_iterator
611 /**
612  * Removes the element the iterator points to. Removing an element a second time
613  * has no result.
614  */
615 void hashset_remove_iterator(HashSet *self, const HashSetIterator *iter)
616 {
617         HashSetEntry *entry = iter->current_bucket;
618
619         /* iterator_next needs to have been called at least once */
620         assert(entry >= self->entries);
621         /* needs to be on a valid element */
622         assert(entry < self->entries + self->num_buckets);
623
624         if (EntryIsDeleted(*entry))
625                 return;
626
627         EntrySetDeleted(*entry);
628         self->num_deleted++;
629         self->consider_shrink = 1;
630 }
631 #endif
632
633 #endif