Fix typo in comment.
[libfirm] / ir / adt / hashset.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Generic hashset implementation
23  * @author  Matthias Braun, inspiration from densehash from google sparsehash
24  *          package
25  * @date    17.03.2007
26  *
27  *
28  * You have to specialize this file by defining:
29  *
30  * <ul>
31  *  <li><b>HashSet</b>         The name of the hashset type</li>
32  *  <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
33  *  <li><b>ValueType</b>       The type of the stored data values</li>
34  *  <li><b>NullValue</b>       A special value representing no values</li>
35  *  <li><b>DeletedValue</b>    A special value representing deleted entries</li>
36  *  <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
37  * </ul>
38  *
39  * Note that by default it is assumed that the data values themselfes are used
40  * as keys. However you can change that with additional defines:
41  *
42  * <ul>
43  *  <li><b>KeyType</b>         The type of the keys identifying data values.
44  *                             Defining this implies, that a data value contains
45  *                             more than just the key.</li>
46  *  <li><b>GetKey(value)</b>   Extracts the key from a data value</li>
47  *  <li><b>KeysEqual(hashset,key1,key2)</b>  Tests wether 2 keys are equal</li>
48  *  <li><b>DO_REHASH</b>       Instead of storing the hash-values, recalculate
49  *                             them on demand from the datavalues. (useful if
50  *                             calculating the hash-values takes less time than
51  *                             a memory access)</li>
52  * </ul>
53  *
54  * You can further fine tune your hashset by defining the following:
55  *
56  * <ul>
57  *  <li><b>JUMP(num_probes)</b> The probing method</li>
58  *  <li><b>Alloc(count)</b>     Allocates count hashset entries (NOT bytes)</li>
59  *  <li><b>Free(ptr)</b>        Frees a block of memory allocated by Alloc</li>
60  *  <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
61  *                                      the Null value</li>
62  *  <li><b>ADDITIONAL_DATA<b>   Additional fields appended to the hashset struct</li>
63  * </ul>
64  */
65 #ifdef HashSet
66
67 #include <stdlib.h>
68 #include <string.h>
69 #include <assert.h>
70
71 #include "bitfiddle.h"
72 #include "util.h"
73
74 /* quadratic probing */
75 #ifndef JUMP
76 #define JUMP(num_probes)      (num_probes)
77 #endif /* JUMP */
78
79 #ifndef Hash
80 #define ID_HASH
81 #define Hash(self,key)        ((unsigned)(((char *)key) - (char *)0))
82 #endif /* Hash */
83
84 #ifdef DO_REHASH
85 #define HashSetEntry                   ValueType
86 #define EntrySetHash(entry,new_hash)   (void)0
87 #define EntryGetHash(self,entry)       Hash(self, GetKey(entry))
88 #define EntryGetValue(entry)           (entry)
89 #else /* ! DO_REHASH */
90 #define EntryGetHash(self,entry)       (entry).hash
91 #define EntrySetHash(entry,new_hash)   (entry).hash = (new_hash)
92 #define EntryGetValue(entry)           (entry).data
93 #endif /* DO_REHASH */
94
95 #ifndef Alloc
96 #include "xmalloc.h"
97 #define Alloc(size) XMALLOCN(HashSetEntry, (size))
98 #define Free(ptr)      free(ptr)
99 #endif /* Alloc */
100
101 #ifdef ID_HASH
102 #define InsertReturnValue                 int
103 #define GetInsertReturnValue(entry,found) (found)
104 #define NullReturnValue                   0
105 #else /* ! ID_HASH */
106 #ifdef SCALAR_RETURN
107 #define InsertReturnValue                 ValueType
108 #define GetInsertReturnValue(entry,found) EntryGetValue(entry)
109 #define NullReturnValue                   NullValue
110 #else
111 #define InsertReturnValue                 ValueType*
112 #define GetInsertReturnValue(entry,found) & EntryGetValue(entry)
113 #define NullReturnValue                   & NullValue
114 #endif
115 #endif /* ID_HASH */
116
117 #ifndef KeyType
118 #define KeyType                  ValueType
119 #define GetKey(value)            (value)
120 #define InitData(self,value,key) (value) = (key)
121 #endif /* KeyType */
122
123 #ifndef ConstKeyType
124 #define ConstKeyType             const KeyType
125 #endif /* ConstKeyType */
126
127 #ifndef EntrySetEmpty
128 #define EntrySetEmpty(entry)    EntryGetValue(entry) = NullValue
129 #endif /* EntrySetEmpty */
130 #ifndef EntrySetDeleted
131 #define EntrySetDeleted(entry)  EntryGetValue(entry) = DeletedValue
132 #endif /* EntrySetDeleted */
133 #ifndef EntryIsEmpty
134 #define EntryIsEmpty(entry)     (EntryGetValue(entry) == NullValue)
135 #endif /* EntryIsEmpty */
136 #ifndef EntryIsDeleted
137 #define EntryIsDeleted(entry)   (EntryGetValue(entry) == DeletedValue)
138 #endif /* EntryIsDeleted */
139 #ifndef SetRangeEmpty
140 #define SetRangeEmpty(ptr,size)                \
141 {                                              \
142         size_t _i;                                 \
143         size_t _size = (size);                     \
144         HashSetEntry *entries = (ptr);             \
145         for (_i = 0; _i < _size; ++_i) {            \
146                 HashSetEntry *entry = & entries[_i];   \
147                 EntrySetEmpty(*entry);                 \
148         }                                          \
149 }
150 #endif /* SetRangeEmpty */
151
152 #ifndef HT_OCCUPANCY_FLT
153 /** how full before we double size */
154 #define HT_OCCUPANCY_FLT(x) ((x)/2)
155 #endif /* HT_OCCUPANCY_FLT */
156 #ifndef HT_1_DIV_OCCUPANCY_FLT
157 #define HT_1_DIV_OCCUPANCY_FLT 2
158 #endif
159
160 #ifndef HT_EMPTY_FLT
161 /** how empty before we half size */
162 #define HT_EMPTY_FLT(x)     ((x)/5)
163 #endif /* HT_EMPTY_FLT */
164
165 #ifndef HT_MIN_BUCKETS
166 /** default smallest bucket size */
167 #define HT_MIN_BUCKETS    32
168 #endif /* HT_MIN_BUCKETS */
169
170 #define ILLEGAL_POS       ((size_t)-1)
171
172 /* check that all needed functions are defined */
173 #ifndef hashset_init
174 #error You have to redefine hashset_init
175 #endif
176 #ifndef hashset_init_size
177 #error You have to redefine hashset_init_size
178 #endif
179 #ifndef hashset_destroy
180 #error You have to redefine hashset_destroy
181 #endif
182 #ifndef hashset_insert
183 #error You have to redefine hashset_insert
184 #endif
185 #ifndef hashset_remove
186 #error You have to redefine hashset_remove
187 #endif
188 #ifndef hashset_find
189 #error You have to redefine hashset_find
190 #endif
191 #ifndef hashset_size
192 #error You have to redefine hashset_size
193 #endif
194
195 #ifndef NO_ITERATOR
196 #ifndef hashset_iterator_init
197 #error You have to redefine hashset_iterator_init
198 #endif
199 #ifndef hashset_iterator_next
200 #error You have to redefine hashset_iterator_next
201 #endif
202 #ifndef hashset_remove_iterator
203 #error You have to redefine hashset_remove_iterator
204 #endif
205 #endif
206
207 /**
208  * Returns the number of elements in the hashset
209  */
210 size_t hashset_size(const HashSet *self)
211 {
212         return self->num_elements - self->num_deleted;
213 }
214
215 /**
216  * Inserts an element into a hashset without growing the set (you have to make
217  * sure there's enough room for that.
218  * @note also see comments for hashset_insert()
219  * @internal
220  */
221 static inline InsertReturnValue insert_nogrow(HashSet *self, KeyType key)
222 {
223         size_t   num_probes  = 0;
224         size_t   num_buckets = self->num_buckets;
225         size_t   hashmask    = num_buckets - 1;
226         unsigned hash        = Hash(self, key);
227         size_t   bucknum     = hash & hashmask;
228         size_t   insert_pos  = ILLEGAL_POS;
229
230         assert((num_buckets & (num_buckets - 1)) == 0);
231
232         for (;;) {
233                 HashSetEntry *entry = & self->entries[bucknum];
234
235                 if (EntryIsEmpty(*entry)) {
236                         size_t p;
237                         HashSetEntry *nentry;
238
239                         if (insert_pos != ILLEGAL_POS) {
240                                 p = insert_pos;
241                         } else {
242                                 p = bucknum;
243                         }
244
245                         nentry = &self->entries[p];
246                         InitData(self, EntryGetValue(*nentry), key);
247                         EntrySetHash(*nentry, hash);
248                         self->num_elements++;
249                         return GetInsertReturnValue(*nentry, 0);
250                 }
251                 if (EntryIsDeleted(*entry)) {
252                         if (insert_pos == ILLEGAL_POS)
253                                 insert_pos = bucknum;
254                 } else if (EntryGetHash(self, *entry) == hash) {
255                         if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
256                                 // Value already in the set, return it
257                                 return GetInsertReturnValue(*entry, 1);
258                         }
259                 }
260
261                 ++num_probes;
262                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
263                 assert(num_probes < num_buckets);
264         }
265 }
266
267 /**
268  * calculate shrink and enlarge limits
269  * @internal
270  */
271 static inline void reset_thresholds(HashSet *self)
272 {
273         self->enlarge_threshold = (size_t) HT_OCCUPANCY_FLT(self->num_buckets);
274         self->shrink_threshold  = (size_t) HT_EMPTY_FLT(self->num_buckets);
275         self->consider_shrink   = 0;
276 }
277
278 #ifndef HAVE_OWN_RESIZE
279 /**
280  * Inserts an element into a hashset under the assumption that the hashset
281  * contains no deleted entries and the element doesn't exist in the hashset yet.
282  * @internal
283  */
284 static void insert_new(HashSet *self, unsigned hash, ValueType value)
285 {
286         size_t num_probes  = 0;
287         size_t num_buckets = self->num_buckets;
288         size_t hashmask    = num_buckets - 1;
289         size_t bucknum     = hash & hashmask;
290         size_t insert_pos  = ILLEGAL_POS;
291
292         //assert(value != NullValue);
293
294         for (;;) {
295                 HashSetEntry *entry = & self->entries[bucknum];
296
297                 if (EntryIsEmpty(*entry)) {
298                         size_t        p;
299                         HashSetEntry *nentry;
300
301                         if (insert_pos != ILLEGAL_POS) {
302                                 p = insert_pos;
303                         } else {
304                                 p = bucknum;
305                         }
306                         nentry = &self->entries[p];
307
308                         EntryGetValue(*nentry) = value;
309                         EntrySetHash(*nentry, hash);
310                         self->num_elements++;
311                         return;
312                 }
313                 assert(!EntryIsDeleted(*entry));
314
315                 ++num_probes;
316                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
317                 assert(num_probes < num_buckets);
318         }
319 }
320
321 /**
322  * Resize the hashset
323  * @internal
324  */
325 static inline void resize(HashSet *self, size_t new_size)
326 {
327         size_t num_buckets = self->num_buckets;
328         size_t i;
329         HashSetEntry *old_entries = self->entries;
330         HashSetEntry *new_entries;
331
332         /* allocate a new array with double size */
333         new_entries = Alloc(new_size);
334         SetRangeEmpty(new_entries, new_size);
335
336         /* use the new array */
337         self->entries      = new_entries;
338         self->num_buckets  = new_size;
339         self->num_elements = 0;
340         self->num_deleted  = 0;
341 #ifndef NDEBUG
342         self->entries_version++;
343 #endif
344         reset_thresholds(self);
345
346         /* reinsert all elements */
347         for (i = 0; i < num_buckets; ++i) {
348                 HashSetEntry *entry = & old_entries[i];
349                 if (EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
350                         continue;
351
352                 insert_new(self, EntryGetHash(self, *entry), EntryGetValue(*entry));
353         }
354
355         /* now we can free the old array */
356         Free(old_entries);
357 }
358 #else
359
360 /* resize must be defined outside */
361 static inline void resize(HashSet *self, size_t new_size);
362
363 #endif
364
365 /**
366  * grow the hashset if adding 1 more elements would make it too crowded
367  * @internal
368  */
369 static inline void maybe_grow(HashSet *self)
370 {
371         size_t resize_to;
372
373         if (LIKELY(self->num_elements + 1 <= self->enlarge_threshold))
374                 return;
375
376         /* double table size */
377         resize_to = self->num_buckets * 2;
378         resize(self, resize_to);
379 }
380
381 /**
382  * shrink the hashset if it is only sparsely filled
383  * @internal
384  */
385 static inline void maybe_shrink(HashSet *self)
386 {
387         size_t size;
388         size_t resize_to;
389
390         if (!self->consider_shrink)
391                 return;
392
393         self->consider_shrink = 0;
394         size                  = hashset_size(self);
395         if (size <= HT_MIN_BUCKETS)
396                 return;
397
398         if (LIKELY(size > self->shrink_threshold))
399                 return;
400
401         resize_to = ceil_po2(size);
402
403         if (resize_to < 4)
404                 resize_to = 4;
405
406         resize(self, resize_to);
407 }
408
409 /**
410  * Insert an element into the hashset. If no element with the given key exists yet,
411  * then a new one is created and initialized with the InitData function.
412  * Otherwise the existing element is returned (for hashs where key is equal to
413  * value, nothing is returned.)
414  *
415  * @param self   the hashset
416  * @param key    the key that identifies the data
417  * @returns      the existing or newly created data element (or nothing in case of hashs where keys are the while value)
418  */
419 InsertReturnValue hashset_insert(HashSet *self, KeyType key)
420 {
421 #ifndef NDEBUG
422         self->entries_version++;
423 #endif
424
425         maybe_shrink(self);
426         maybe_grow(self);
427         return insert_nogrow(self, key);
428 }
429
430 /**
431  * Searches for an element with key @p key.
432  *
433  * @param self      the hashset
434  * @param key       the key to search for
435  * @returns         the found value or NullValue if nothing was found
436  */
437 InsertReturnValue hashset_find(const HashSet *self, ConstKeyType key)
438 {
439         size_t   num_probes  = 0;
440         size_t   num_buckets = self->num_buckets;
441         size_t   hashmask    = num_buckets - 1;
442         unsigned hash        = Hash(self, key);
443         size_t   bucknum     = hash & hashmask;
444
445         for (;;) {
446                 HashSetEntry *entry = & self->entries[bucknum];
447
448                 if (EntryIsEmpty(*entry)) {
449                         return NullReturnValue;
450                 }
451                 if (EntryIsDeleted(*entry)) {
452                         // value is deleted
453                 } else if (EntryGetHash(self, *entry) == hash) {
454                         if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
455                                 // found the value
456                                 return GetInsertReturnValue(*entry, 1);
457                         }
458                 }
459
460                 ++num_probes;
461                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
462                 assert(num_probes < num_buckets);
463         }
464 }
465
466 /**
467  * Removes an element from a hashset. Does nothing if the set doesn't contain
468  * the element.
469  *
470  * @param self    the hashset
471  * @param key     key that identifies the data to remove
472  */
473 void hashset_remove(HashSet *self, ConstKeyType key)
474 {
475         size_t   num_probes  = 0;
476         size_t   num_buckets = self->num_buckets;
477         size_t   hashmask    = num_buckets - 1;
478         unsigned hash        = Hash(self, key);
479         size_t   bucknum     = hash & hashmask;
480
481 #ifndef NDEBUG
482         self->entries_version++;
483 #endif
484
485         for (;;) {
486                 HashSetEntry *entry = & self->entries[bucknum];
487
488                 if (EntryIsEmpty(*entry)) {
489                         return;
490                 }
491                 if (EntryIsDeleted(*entry)) {
492                         // entry is deleted
493                 } else if (EntryGetHash(self, *entry) == hash) {
494                         if (KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
495                                 EntrySetDeleted(*entry);
496                                 self->num_deleted++;
497                                 self->consider_shrink = 1;
498                                 return;
499                         }
500                 }
501
502                 ++num_probes;
503                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
504                 assert(num_probes < num_buckets);
505         }
506 }
507
508 /**
509  * Initializes hashset with a specific size
510  * @internal
511  */
512 static inline void init_size(HashSet *self, size_t initial_size)
513 {
514         if (initial_size < 4)
515                 initial_size = 4;
516
517         self->entries         = Alloc(initial_size);
518         SetRangeEmpty(self->entries, initial_size);
519         self->num_buckets     = initial_size;
520         self->consider_shrink = 0;
521         self->num_elements    = 0;
522         self->num_deleted     = 0;
523 #ifndef NDEBUG
524         self->entries_version = 0;
525 #endif
526 #ifdef ADDITIONAL_INIT
527         ADDITIONAL_INIT
528 #endif
529
530         reset_thresholds(self);
531 }
532
533 /**
534  * Initializes a hashset with the default size. The memory for the set has to
535  * already allocated.
536  */
537 void hashset_init(HashSet *self)
538 {
539         init_size(self, HT_MIN_BUCKETS);
540 }
541
542 /**
543  * Destroys a hashset, freeing all used memory (except the memory for the
544  * HashSet struct itself).
545  */
546 void hashset_destroy(HashSet *self)
547 {
548 #ifdef ADDITIONAL_TERM
549         ADDITIONAL_TERM
550 #endif
551         Free(self->entries);
552 #ifndef NDEBUG
553         self->entries = NULL;
554 #endif
555 }
556
557 /**
558  * Initializes a hashset expecting expected_element size.
559  */
560 void hashset_init_size(HashSet *self, size_t expected_elements)
561 {
562         size_t needed_size;
563         size_t po2size;
564
565         if (expected_elements >= UINT_MAX/2) {
566                 abort();
567         }
568
569         needed_size = expected_elements * HT_1_DIV_OCCUPANCY_FLT;
570         po2size     = ceil_po2(needed_size);
571         init_size(self, po2size);
572 }
573
574 #ifndef NO_ITERATOR
575 /**
576  * Initializes a hashset iterator. The memory for the allocator has to be
577  * already allocated.
578  * @note it is not allowed to remove or insert elements while iterating
579  */
580 void hashset_iterator_init(HashSetIterator *self, const HashSet *hashset)
581 {
582         self->current_bucket = hashset->entries - 1;
583         self->end            = hashset->entries + hashset->num_buckets;
584 #ifndef NDEBUG
585         self->set             = hashset;
586         self->entries_version = hashset->entries_version;
587 #endif
588 }
589
590 /**
591  * Returns the next value in the iterator or NULL if no value is left
592  * in the hashset.
593  * @note it is not allowed to remove or insert elements while iterating
594  */
595 ValueType hashset_iterator_next(HashSetIterator *self)
596 {
597         HashSetEntry *current_bucket = self->current_bucket;
598         HashSetEntry *end            = self->end;
599
600         /* using hashset_insert or hashset_remove is not allowed while iterating */
601         assert(self->entries_version == self->set->entries_version);
602
603         do {
604                 current_bucket++;
605                 if (current_bucket >= end)
606                         return NullValue;
607         } while (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket));
608
609         self->current_bucket = current_bucket;
610         return EntryGetValue(*current_bucket);
611 }
612
613 /**
614  * Removes the element the iterator points to. Removing an element a second time
615  * has no result.
616  */
617 void hashset_remove_iterator(HashSet *self, const HashSetIterator *iter)
618 {
619         HashSetEntry *entry = iter->current_bucket;
620
621         /* iterator_next needs to have been called at least once */
622         assert(entry >= self->entries);
623         /* needs to be on a valid element */
624         assert(entry < self->entries + self->num_buckets);
625
626         if (EntryIsDeleted(*entry))
627                 return;
628
629         EntrySetDeleted(*entry);
630         self->num_deleted++;
631         self->consider_shrink = 1;
632 }
633 #endif /* NO_ITERATOR */
634
635 #endif /* HashSet */