e0cf70c77a091d9f9e44369cb777abbd066fec27
[cparser] / adt / hashset.c.inl
1 /*
2  * Copyright (C) 1995-2012 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Generic hashset implementation
23  * @author  Matthias Braun, inspiration from densehash from google sparsehash
24  *          package
25  * @date    17.03.2007
26  *
27  *
28  * You have to specialize this file by defining:
29  *
30  * <ul>
31  *  <li><b>HashSet</b>         The name of the hashset type</li>
32  *  <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
33  *  <li><b>ValueType</b>       The type of the stored data values</li>
34  *  <li><b>NullValue</b>       A special value representing no values</li>
35  *  <li><b>DeletedValue</b>    A special value representing deleted entries</li>
36  *  <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
37  * </ul>
38  *
39  * Note that by default it is assumed that the data values themselves are used
40  * as keys. However you can change that with additional defines:
41  *
42  * <ul>
43  *  <li><b>KeyType</b>         The type of the keys identifying data values.
44  *                             Defining this implies, that a data value contains
45  *                             more than just the key.</li>
46  *  <li><b>GetKey(value)</b>   Extracts the key from a data value</li>
47  *  <li><b>KeysEqual(hashset,key1,key2)</b>  Tests wether 2 keys are equal</li>
48  *  <li><b>DO_REHASH</b>       Instead of storing the hash-values, recalculate
49  *                             them on demand from the datavalues. (useful if
50  *                             calculating the hash-values takes less time than
51  *                             a memory access)</li>
52  * </ul>
53  *
54  * You can further fine tune your hashset by defining the following:
55  *
56  * <ul>
57  *  <li><b>JUMP(num_probes)</b> The probing method</li>
58  *  <li><b>Alloc(count)</b>     Allocates count hashset entries (NOT bytes)</li>
59  *  <li><b>Free(ptr)</b>        Frees a block of memory allocated by Alloc</li>
60  *  <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
61  *                                      the Null value</li>
62  *  <li><b>ADDITIONAL_DATA<b>   Additional fields appended to the hashset struct</li>
63  * </ul>
64  */
65 #ifdef HashSet
66
67 #include <stdlib.h>
68 #include <string.h>
69 #include <assert.h>
70
71 #include "bitfiddle.h"
72 #include "util.h"
73
74 /* quadratic probing */
75 #ifndef JUMP
76 #define JUMP(num_probes)      (num_probes)
77 #endif /* JUMP */
78
79 #ifndef Hash
80 #define ID_HASH
81 #define Hash(self,key)        ((unsigned)(((char *)key) - (char *)0))
82 #endif /* Hash */
83
84 #ifdef DO_REHASH
85 #define HashSetEntry                   ValueType
86 #define EntrySetHash(entry,new_hash)   ((void)0)
87 #define EntryGetHash(self,entry)       Hash(self, GetKey(entry))
88 #define EntryGetValue(entry)           (entry)
89 #else /* ! DO_REHASH */
90 #define EntryGetHash(self,entry)       (entry).hash
91 #define EntrySetHash(entry,new_hash)   (entry).hash = (new_hash)
92 #define EntryGetValue(entry)           (entry).data
93 #endif /* DO_REHASH */
94
95 #ifndef Alloc
96 #include "xmalloc.h"
97 #define Alloc(size) XMALLOCN(HashSetEntry, (size))
98 #define Free(ptr)      free(ptr)
99 #endif /* Alloc */
100
101 #ifdef ID_HASH
102 #define FindReturnValue                 bool
103 #define GetFindReturnValue(entry,found) (found)
104 #define NullReturnValue                 false
105 #define InsertReturnValue(findreturn)   !(findreturn)
106 #else /* ! ID_HASH */
107 #ifdef SCALAR_RETURN
108 #define FindReturnValue                 ValueType
109 #define GetFindReturnValue(entry,found) EntryGetValue(entry)
110 #define NullReturnValue                 NullValue
111 #else
112 #define FindReturnValue                 ValueType*
113 #define GetFindReturnValue(entry,found) & EntryGetValue(entry)
114 #define NullReturnValue                 & NullValue
115 #endif
116 #endif /* ID_HASH */
117
118 #ifndef InsertReturnValue
119 #define InsertReturnValue(findreturn)   findreturn
120 #endif
121
122 #ifndef KeyType
123 #define KeyType                  ValueType
124 #define GetKey(value)            (value)
125 #define InitData(self,value,key) (value) = (key)
126 #endif /* KeyType */
127
128 #ifndef ConstKeyType
129 #define ConstKeyType             const KeyType
130 #endif /* ConstKeyType */
131
132 #ifndef EntrySetEmpty
133 #define EntrySetEmpty(entry)    EntryGetValue(entry) = NullValue
134 #endif /* EntrySetEmpty */
135 #ifndef EntrySetDeleted
136 #define EntrySetDeleted(entry)  EntryGetValue(entry) = DeletedValue
137 #endif /* EntrySetDeleted */
138 #ifndef EntryIsEmpty
139 #define EntryIsEmpty(entry)     (EntryGetValue(entry) == NullValue)
140 #endif /* EntryIsEmpty */
141 #ifndef EntryIsDeleted
142 #define EntryIsDeleted(entry)   (EntryGetValue(entry) == DeletedValue)
143 #endif /* EntryIsDeleted */
144 #ifndef SetRangeEmpty
145 #define SetRangeEmpty(ptr,size)                \
146 {                                              \
147         size_t _i;                                 \
148         size_t _size = (size);                     \
149         HashSetEntry *entries = (ptr);             \
150         for (_i = 0; _i < _size; ++_i) {            \
151                 HashSetEntry *entry = & entries[_i];   \
152                 EntrySetEmpty(*entry);                 \
153         }                                          \
154 }
155 #endif /* SetRangeEmpty */
156
157 #ifndef HT_OCCUPANCY_FLT
158 /** how full before we double size */
159 #define HT_OCCUPANCY_FLT(x) ((x)/2)
160 #endif /* HT_OCCUPANCY_FLT */
161 #ifndef HT_1_DIV_OCCUPANCY_FLT
162 #define HT_1_DIV_OCCUPANCY_FLT 2
163 #endif
164
165 #ifndef HT_EMPTY_FLT
166 /** how empty before we half size */
167 #define HT_EMPTY_FLT(x)     ((x)/5)
168 #endif /* HT_EMPTY_FLT */
169
170 #ifndef HT_MIN_BUCKETS
171 /** default smallest bucket size */
172 #define HT_MIN_BUCKETS    32
173 #endif /* HT_MIN_BUCKETS */
174
175 #define ILLEGAL_POS       ((size_t)-1)
176
177 #ifdef hashset_size
178 /**
179  * Returns the number of elements in the hashset
180  */
181 size_t hashset_size(const HashSet *self)
182 {
183         return self->num_elements - self->num_deleted;
184 }
185 #else
186 static inline size_t hashset_size(const HashSet *self)
187 {
188         return self->num_elements - self->num_deleted;
189 }
190 #endif
191
192 /**
193  * Inserts an element into a hashset without growing the set (you have to make
194  * sure there's enough room for that.
195  * @returns  previous value if found, NullValue otherwise
196  * @note also see comments for hashset_insert()
197  * @internal
198  */
199 static inline FindReturnValue insert_nogrow(HashSet *self, KeyType key)
200 {
201         size_t   num_probes  = 0;
202         size_t   num_buckets = self->num_buckets;
203         size_t   hashmask    = num_buckets - 1;
204         unsigned hash        = Hash(self, key);
205         size_t   bucknum     = hash & hashmask;
206         size_t   insert_pos  = ILLEGAL_POS;
207
208         assert((num_buckets & (num_buckets - 1)) == 0);
209
210         for (;;) {
211                 HashSetEntry *entry = & self->entries[bucknum];
212
213                 if (EntryIsEmpty(*entry)) {
214                         size_t p;
215                         HashSetEntry *nentry;
216
217                         if (insert_pos != ILLEGAL_POS) {
218                                 p = insert_pos;
219                         } else {
220                                 p = bucknum;
221                         }
222
223                         nentry = &self->entries[p];
224                         InitData(self, EntryGetValue(*nentry), key);
225                         EntrySetHash(*nentry, hash);
226                         self->num_elements++;
227                         return GetFindReturnValue(*nentry, false);
228                 }
229                 if (EntryIsDeleted(*entry)) {
230                         if (insert_pos == ILLEGAL_POS)
231                                 insert_pos = bucknum;
232                 } else if (EntryGetHash(self, *entry) == hash) {
233                         if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
234                                 // Value already in the set, return it
235                                 return GetFindReturnValue(*entry, true);
236                         }
237                 }
238
239                 ++num_probes;
240                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
241                 assert(num_probes < num_buckets);
242         }
243 }
244
245 /**
246  * calculate shrink and enlarge limits
247  * @internal
248  */
249 static inline void reset_thresholds(HashSet *self)
250 {
251         self->enlarge_threshold = (size_t) HT_OCCUPANCY_FLT(self->num_buckets);
252         self->shrink_threshold  = (size_t) HT_EMPTY_FLT(self->num_buckets);
253         self->consider_shrink   = 0;
254 }
255
256 #ifndef HAVE_OWN_RESIZE
257 /**
258  * Inserts an element into a hashset under the assumption that the hashset
259  * contains no deleted entries and the element doesn't exist in the hashset yet.
260  * @internal
261  */
262 static void insert_new(HashSet *self, unsigned hash, ValueType value)
263 {
264         size_t num_probes  = 0;
265         size_t num_buckets = self->num_buckets;
266         size_t hashmask    = num_buckets - 1;
267         size_t bucknum     = hash & hashmask;
268         size_t insert_pos  = ILLEGAL_POS;
269
270         //assert(value != NullValue);
271
272         for (;;) {
273                 HashSetEntry *entry = & self->entries[bucknum];
274
275                 if (EntryIsEmpty(*entry)) {
276                         size_t        p;
277                         HashSetEntry *nentry;
278
279                         if (insert_pos != ILLEGAL_POS) {
280                                 p = insert_pos;
281                         } else {
282                                 p = bucknum;
283                         }
284                         nentry = &self->entries[p];
285
286                         EntryGetValue(*nentry) = value;
287                         EntrySetHash(*nentry, hash);
288                         self->num_elements++;
289                         return;
290                 }
291                 assert(!EntryIsDeleted(*entry));
292
293                 ++num_probes;
294                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
295                 assert(num_probes < num_buckets);
296         }
297 }
298
299 /**
300  * Resize the hashset
301  * @internal
302  */
303 static inline void resize(HashSet *self, size_t new_size)
304 {
305         size_t num_buckets = self->num_buckets;
306         size_t i;
307         HashSetEntry *old_entries = self->entries;
308         HashSetEntry *new_entries;
309
310         /* allocate a new array with double size */
311         new_entries = Alloc(new_size);
312         SetRangeEmpty(new_entries, new_size);
313
314         /* use the new array */
315         self->entries      = new_entries;
316         self->num_buckets  = new_size;
317         self->num_elements = 0;
318         self->num_deleted  = 0;
319 #ifndef NDEBUG
320         self->entries_version++;
321 #endif
322         reset_thresholds(self);
323
324         /* reinsert all elements */
325         for (i = 0; i < num_buckets; ++i) {
326                 HashSetEntry *entry = & old_entries[i];
327                 if (EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
328                         continue;
329
330                 insert_new(self, EntryGetHash(self, *entry), EntryGetValue(*entry));
331         }
332
333         /* now we can free the old array */
334         Free(old_entries);
335 }
336 #else
337
338 /* resize must be defined outside */
339 static inline void resize(HashSet *self, size_t new_size);
340
341 #endif
342
343 /**
344  * grow the hashset if adding 1 more elements would make it too crowded
345  * @internal
346  */
347 static inline void maybe_grow(HashSet *self)
348 {
349         size_t resize_to;
350
351         if (LIKELY(self->num_elements + 1 <= self->enlarge_threshold))
352                 return;
353
354         /* double table size */
355         resize_to = self->num_buckets * 2;
356         resize(self, resize_to);
357 }
358
359 /**
360  * shrink the hashset if it is only sparsely filled
361  * @internal
362  */
363 static inline void maybe_shrink(HashSet *self)
364 {
365         size_t size;
366         size_t resize_to;
367
368         if (!self->consider_shrink)
369                 return;
370
371         self->consider_shrink = 0;
372         size                  = hashset_size(self);
373         if (size <= HT_MIN_BUCKETS)
374                 return;
375
376         if (LIKELY(size > self->shrink_threshold))
377                 return;
378
379         resize_to = ceil_po2(size);
380
381         if (resize_to < 4)
382                 resize_to = 4;
383
384         resize(self, resize_to);
385 }
386
387 #ifdef hashset_insert
388 /**
389  * Insert an element into the hashset. If no element with the given key exists yet,
390  * then a new one is created and initialized with the InitData function.
391  * Otherwise the existing element is returned (for hashs where key is equal to
392  * value, nothing is returned.)
393  *
394  * @param self   the hashset
395  * @param key    the key that identifies the data
396  * @returns      the existing or newly created data element (or nothing in case of hashs where keys are the while value)
397  */
398 FindReturnValue hashset_insert(HashSet *self, KeyType key)
399 {
400 #ifndef NDEBUG
401         self->entries_version++;
402 #endif
403
404         maybe_shrink(self);
405         maybe_grow(self);
406         return InsertReturnValue(insert_nogrow(self, key));
407 }
408 #endif
409
410 #ifdef hashset_find
411 /**
412  * Searches for an element with key @p key.
413  *
414  * @param self      the hashset
415  * @param key       the key to search for
416  * @returns         the found value or NullValue if nothing was found
417  */
418 FindReturnValue hashset_find(const HashSet *self, ConstKeyType key)
419 {
420         size_t   num_probes  = 0;
421         size_t   num_buckets = self->num_buckets;
422         size_t   hashmask    = num_buckets - 1;
423         unsigned hash        = Hash(self, key);
424         size_t   bucknum     = hash & hashmask;
425
426         for (;;) {
427                 HashSetEntry *entry = & self->entries[bucknum];
428
429                 if (EntryIsEmpty(*entry)) {
430                         return NullReturnValue;
431                 }
432                 if (EntryIsDeleted(*entry)) {
433                         // value is deleted
434                 } else if (EntryGetHash(self, *entry) == hash) {
435                         if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
436                                 // found the value
437                                 return GetFindReturnValue(*entry, true);
438                         }
439                 }
440
441                 ++num_probes;
442                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
443                 assert(num_probes < num_buckets);
444         }
445 }
446 #endif
447
448 #ifdef hashset_remove
449 /**
450  * Removes an element from a hashset. Does nothing if the set doesn't contain
451  * the element.
452  *
453  * @param self    the hashset
454  * @param key     key that identifies the data to remove
455  */
456 void hashset_remove(HashSet *self, ConstKeyType key)
457 {
458         size_t   num_probes  = 0;
459         size_t   num_buckets = self->num_buckets;
460         size_t   hashmask    = num_buckets - 1;
461         unsigned hash        = Hash(self, key);
462         size_t   bucknum     = hash & hashmask;
463
464 #ifndef NDEBUG
465         self->entries_version++;
466 #endif
467
468         for (;;) {
469                 HashSetEntry *entry = & self->entries[bucknum];
470
471                 if (EntryIsEmpty(*entry)) {
472                         return;
473                 }
474                 if (EntryIsDeleted(*entry)) {
475                         // entry is deleted
476                 } else if (EntryGetHash(self, *entry) == hash) {
477                         if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
478                                 EntrySetDeleted(*entry);
479                                 self->num_deleted++;
480                                 self->consider_shrink = 1;
481                                 return;
482                         }
483                 }
484
485                 ++num_probes;
486                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
487                 assert(num_probes < num_buckets);
488         }
489 }
490 #endif
491
492 /**
493  * Initializes hashset with a specific size
494  * @internal
495  */
496 static inline void init_size(HashSet *self, size_t initial_size)
497 {
498         if (initial_size < 4)
499                 initial_size = 4;
500
501         self->entries         = Alloc(initial_size);
502         SetRangeEmpty(self->entries, initial_size);
503         self->num_buckets     = initial_size;
504         self->consider_shrink = 0;
505         self->num_elements    = 0;
506         self->num_deleted     = 0;
507 #ifndef NDEBUG
508         self->entries_version = 0;
509 #endif
510 #ifdef ADDITIONAL_INIT
511         ADDITIONAL_INIT
512 #endif
513
514         reset_thresholds(self);
515 }
516
517 #ifdef hashset_init
518 /**
519  * Initializes a hashset with the default size. The memory for the set has to
520  * already allocated.
521  */
522 void hashset_init(HashSet *self)
523 {
524         init_size(self, HT_MIN_BUCKETS);
525 }
526 #endif
527
528 #ifdef hashset_destroy
529 /**
530  * Destroys a hashset, freeing all used memory (except the memory for the
531  * HashSet struct itself).
532  */
533 void hashset_destroy(HashSet *self)
534 {
535 #ifdef ADDITIONAL_TERM
536         ADDITIONAL_TERM
537 #endif
538         Free(self->entries);
539 #ifndef NDEBUG
540         self->entries = NULL;
541 #endif
542 }
543 #endif
544
545 #ifdef hashset_init_size
546 /**
547  * Initializes a hashset expecting expected_element size.
548  */
549 void hashset_init_size(HashSet *self, size_t expected_elements)
550 {
551         size_t needed_size;
552         size_t po2size;
553
554         if (expected_elements >= UINT_MAX/2) {
555                 abort();
556         }
557
558         needed_size = expected_elements * HT_1_DIV_OCCUPANCY_FLT;
559         po2size     = ceil_po2(needed_size);
560         init_size(self, po2size);
561 }
562 #endif
563
564 #ifdef hashset_iterator_init
565 /**
566  * Initializes a hashset iterator. The memory for the allocator has to be
567  * already allocated.
568  * @note it is not allowed to remove or insert elements while iterating
569  */
570 void hashset_iterator_init(HashSetIterator *self, const HashSet *hashset)
571 {
572         self->current_bucket = hashset->entries - 1;
573         self->end            = hashset->entries + hashset->num_buckets;
574 #ifndef NDEBUG
575         self->set             = hashset;
576         self->entries_version = hashset->entries_version;
577 #endif
578 }
579 #endif
580
581 #ifdef hashset_iterator_next
582 /**
583  * Returns the next value in the iterator or NULL if no value is left
584  * in the hashset.
585  * @note it is not allowed to remove or insert elements while iterating
586  */
587 ValueType hashset_iterator_next(HashSetIterator *self)
588 {
589         HashSetEntry *current_bucket = self->current_bucket;
590         HashSetEntry *end            = self->end;
591
592         /* using hashset_insert or hashset_remove is not allowed while iterating */
593         assert(self->entries_version == self->set->entries_version);
594
595         do {
596                 current_bucket++;
597                 if (current_bucket >= end)
598                         return NullValue;
599         } while (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket));
600
601         self->current_bucket = current_bucket;
602         return EntryGetValue(*current_bucket);
603 }
604 #endif
605
606 #ifdef hashset_remove_iterator
607 /**
608  * Removes the element the iterator points to. Removing an element a second time
609  * has no result.
610  */
611 void hashset_remove_iterator(HashSet *self, const HashSetIterator *iter)
612 {
613         HashSetEntry *entry = iter->current_bucket;
614
615         /* iterator_next needs to have been called at least once */
616         assert(entry >= self->entries);
617         /* needs to be on a valid element */
618         assert(entry < self->entries + self->num_buckets);
619
620         if (EntryIsDeleted(*entry))
621                 return;
622
623         EntrySetDeleted(*entry);
624         self->num_deleted++;
625         self->consider_shrink = 1;
626 }
627 #endif
628
629 #endif