Adapted to stat events
[libfirm] / ir / adt / hashset.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Generic hashset implementation
23  * @author  Matthias Braun, inspiration from densehash from google sparsehash
24  *          package
25  * @date    17.03.2007
26  * @version $Id$
27  *
28  *
29  * You have to specialize this file by defining:
30  *
31  * <ul>
32  *  <li><b>HashSet</b>         The name of the hashset type</li>
33  *  <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
34  *  <li><b>ValueType</b>       The type of the stored data values</li>
35  *  <li><b>NullValue</b>       A special value representing no values</li>
36  *  <li><b>DeletedValue</b>    A special value representing deleted entries</li>
37  *  <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
38  * </ul>
39  *
40  * Note that by default it is assumed that the data values themselfes are used
41  * as keys. However you can change that with additional defines:
42  *
43  * <ul>
44  *  <li><b>KeyType</b>         The type of the keys identifying data values.
45  *                             Defining this implies, that a data value contains
46  *                             more than just the key.</li>
47  *  <li><b>GetKey(value)</b>   Extracts the key from a data value</li>
48  *  <li><b>KeysEqual(hashset,key1,key2)</b>  Tests wether 2 keys are equal</li>
49  *  <li><b>DO_REHASH</b>       Instead of storing the hash-values, recalculate
50  *                             them on demand from the datavalues. (usefull if
51  *                             calculating the hash-values takes less time than
52  *                             a memory access)</li>
53  * </ul>
54  *
55  * You can further fine tune your hashset by defining the following:
56  *
57  * <ul>
58  *  <li><b>JUMP(num_probes)</b> The probing method</li>
59  *  <li><b>Alloc(count)</b>     Allocates count hashset entries (NOT bytes)</li>
60  *  <li><b>Free(ptr)</b>        Frees a block of memory allocated by Alloc</li>
61  *  <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
62  *                                      the Null value</li>
63  *  <li><b>ADDITIONAL_DATA<b>   Additional fields appended to the hashset struct</li>
64  * </ul>
65  */
66 #ifdef HashSet
67
68 #include <stdlib.h>
69 #include <string.h>
70 #include <assert.h>
71
72 #include "bitfiddle.h"
73 #include "util.h"
74
75 /* quadratic probing */
76 #ifndef JUMP
77 #define JUMP(num_probes)      (num_probes)
78 #endif /* JUMP */
79
80 #ifndef Hash
81 #define ID_HASH
82 #define Hash(self,value)      ((unsigned)(value))
83 #endif /* Hash */
84
85 #ifdef DO_REHASH
86 #define HashSetEntry                   ValueType
87 #define EntrySetHash(entry,new_hash)
88 #define EntryGetHash(self,entry)       Hash(self,entry)
89 #define EntryGetValue(entry)           (entry)
90 #else /* ! DO_REHASH */
91 #define EntryGetHash(self,entry)       (entry).hash
92 #define EntrySetHash(entry,new_hash)   (entry).hash = (new_hash)
93 #define EntryGetValue(entry)           (entry).data
94 #endif /* DO_REHASH */
95
96 #ifndef Alloc
97 #include "xmalloc.h"
98 #define Alloc(size)    (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
99 #define Free(ptr)      free(ptr)
100 #endif /* Alloc */
101
102 #ifdef ID_HASH
103 #define InsertReturnValue               int
104 #define GetInsertReturnValue(entry,new) (new)
105 #else /* ! ID_HASH */
106 #define InsertReturnValue               ValueType
107 #define GetInsertReturnValue(entry,new) EntryGetValue(entry)
108 #endif /* ID_HASH */
109
110 #ifndef KeyType
111 #define KeyType                  ValueType
112 #define GetKey(value)            (value)
113 #define InitData(self,value,key) (value) = (key)
114 #endif /* KeyType */
115
116 #ifndef ConstKeyType
117 #define ConstKeyType             const KeyType
118 #endif /* ConstKeyType */
119
120 #ifndef EntrySetEmpty
121 #define EntrySetEmpty(entry)    EntryGetValue(entry) = NullValue
122 #endif /* EntrySetEmpty */
123 #ifndef EntrySetDeleted
124 #define EntrySetDeleted(entry)  EntryGetValue(entry) = DeletedValue
125 #endif /* EntrySetDeleted */
126 #ifndef EntryIsEmpty
127 #define EntryIsEmpty(entry)     (EntryGetValue(entry) == NullValue)
128 #endif /* EntryIsEmpty */
129 #ifndef EntryIsDeleted
130 #define EntryIsDeleted(entry)   (EntryGetValue(entry) == DeletedValue)
131 #endif /* EntryIsDeleted */
132 #ifndef SetRangeEmpty
133 #define SetRangeEmpty(ptr,size)                \
134 {                                              \
135         size_t _i;                                 \
136         size_t _size = (size);                     \
137         HashSetEntry *entries = (ptr);             \
138         for(_i = 0; _i < _size; ++_i) {            \
139                 HashSetEntry *entry = & entries[_i];   \
140                 EntrySetEmpty(*entry);                 \
141         }                                          \
142 }
143 #endif /* SetRangeEmpty */
144
145 #ifndef HT_OCCUPANCY_FLT
146 /** how full before we double size */
147 #define HT_OCCUPANCY_FLT(x) ((x)/2)
148 #endif /* HT_OCCUPANCY_FLT */
149 #ifndef HT_1_DIV_OCCUPANCY_FLT
150 #define HT_1_DIV_OCCUPANCY_FLT 2
151 #endif
152
153 #ifndef HT_EMPTY_FLT
154 /** how empty before we half size */
155 #define HT_EMPTY_FLT(x)     ((x)/5)
156 #endif /* HT_EMPTY_FLT */
157
158 #ifndef HT_MIN_BUCKETS
159 /** default smallest bucket size */
160 #define HT_MIN_BUCKETS    32
161 #endif /* HT_MIN_BUCKETS */
162
163 #define ILLEGAL_POS       ((size_t)-1)
164
165 /* check that all needed functions are defined */
166 #ifndef hashset_init
167 #error You have to redefine hashset_init
168 #endif
169 #ifndef hashset_init_size
170 #error You have to redefine hashset_init_size
171 #endif
172 #ifndef hashset_destroy
173 #error You have to redefine hashset_destroy
174 #endif
175 #ifndef hashset_insert
176 #error You have to redefine hashset_insert
177 #endif
178 #ifndef hashset_remove
179 #error You have to redefine hashset_remove
180 #endif
181 #ifndef hashset_find
182 #error You have to redefine hashset_find
183 #endif
184 #ifndef hashset_size
185 #error You have to redefine hashset_size
186 #endif
187 #ifndef hashset_iterator_init
188 #error You have to redefine hashset_iterator_init
189 #endif
190 #ifndef hashset_iterator_next
191 #error You have to redefine hashset_iterator_next
192 #endif
193 #ifndef hashset_remove_iterator
194 #error You have to redefine hashset_remove_iterator
195 #endif
196
197 /**
198  * Returns the number of elements in the hashset
199  */
200 size_t hashset_size(const HashSet *self)
201 {
202         return self->num_elements - self->num_deleted;
203 }
204
205 /**
206  * Inserts an element into a hashset without growing the set (you have to make
207  * sure there's enough room for that.
208  * @note also see comments for hashset_insert()
209  * @internal
210  */
211 static INLINE
212 InsertReturnValue insert_nogrow(HashSet *self, KeyType key)
213 {
214         size_t num_probes = 0;
215         size_t num_buckets = self->num_buckets;
216         size_t hashmask = num_buckets - 1;
217         unsigned hash = Hash(self, key);
218         size_t bucknum = hash & hashmask;
219         size_t insert_pos = ILLEGAL_POS;
220
221         assert((num_buckets & (num_buckets - 1)) == 0);
222
223         while(1) {
224                 HashSetEntry *entry = & self->entries[bucknum];
225
226                 if(EntryIsEmpty(*entry)) {
227                         size_t p;
228                         HashSetEntry *nentry;
229
230                         if(insert_pos != ILLEGAL_POS) {
231                                 p = insert_pos;
232                         } else {
233                                 p = bucknum;
234                         }
235
236                         nentry = &self->entries[p];
237                         InitData(self, EntryGetValue(*nentry), key);
238                         EntrySetHash(*nentry, hash);
239                         self->num_elements++;
240                         return GetInsertReturnValue(*nentry, 1);
241                 }
242                 if(EntryIsDeleted(*entry)) {
243                         if(insert_pos == ILLEGAL_POS)
244                                 insert_pos = bucknum;
245                 } else if(EntryGetHash(self, *entry) == hash) {
246                         if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
247                                 // Value already in the set, return it
248                                 return GetInsertReturnValue(*entry, 0);
249                         }
250                 }
251
252                 ++num_probes;
253                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
254                 assert(num_probes < num_buckets);
255         }
256 }
257
258 /**
259  * Inserts an element into a hashset under the assumption that the hashset
260  * contains no deleted entries and the element doesn't exist in the hashset yet.
261  * @internal
262  */
263 static
264 void insert_new(HashSet *self, unsigned hash, ValueType value)
265 {
266         size_t num_probes = 0;
267         size_t num_buckets = self->num_buckets;
268         size_t hashmask = num_buckets - 1;
269         size_t bucknum = hash & hashmask;
270         size_t insert_pos = ILLEGAL_POS;
271
272         assert(value != NullValue);
273
274         while(1) {
275                 HashSetEntry *entry = & self->entries[bucknum];
276
277                 if(EntryIsEmpty(*entry)) {
278                         size_t p;
279                         HashSetEntry *nentry;
280
281                         if(insert_pos != ILLEGAL_POS) {
282                                 p = insert_pos;
283                         } else {
284                                 p = bucknum;
285                         }
286                         nentry = &self->entries[p];
287
288                         EntryGetValue(*nentry) = value;
289                         EntrySetHash(*nentry, hash);
290                         self->num_elements++;
291                         return;
292                 }
293                 assert(!EntryIsDeleted(*entry));
294
295                 ++num_probes;
296                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
297                 assert(num_probes < num_buckets);
298         }
299 }
300
301 /**
302  * calculate shrink and enlarge limits
303  * @internal
304  */
305 static INLINE
306 void reset_thresholds(HashSet *self)
307 {
308         self->enlarge_threshold = (size_t) HT_OCCUPANCY_FLT(self->num_buckets);
309         self->shrink_threshold = (size_t) HT_EMPTY_FLT(self->num_buckets);
310         self->consider_shrink = 0;
311 }
312
313 /**
314  * Resize the hashset
315  * @internal
316  */
317 static INLINE
318 void resize(HashSet *self, size_t new_size)
319 {
320         size_t num_buckets = self->num_buckets;
321         size_t i;
322         HashSetEntry *old_entries = self->entries;
323         HashSetEntry *new_entries;
324
325         /* allocate a new array with double size */
326         new_entries = Alloc(new_size);
327         SetRangeEmpty(new_entries, new_size);
328
329         /* use the new array */
330         self->entries = new_entries;
331         self->num_buckets = new_size;
332         self->num_elements = 0;
333         self->num_deleted = 0;
334 #ifndef NDEBUG
335         self->entries_version++;
336 #endif
337         reset_thresholds(self);
338
339         /* reinsert all elements */
340         for(i = 0; i < num_buckets; ++i) {
341                 HashSetEntry *entry = & old_entries[i];
342                 if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
343                         continue;
344
345                 insert_new(self, EntryGetHash(self, *entry), EntryGetValue(*entry));
346         }
347
348         /* now we can free the old array */
349         Free(old_entries);
350 }
351
352 /**
353  * grow the hashset if adding 1 more elements would make it too crowded
354  * @internal
355  */
356 static INLINE
357 void maybe_grow(HashSet *self)
358 {
359         size_t resize_to;
360
361         if(LIKELY(self->num_elements + 1 <= self->enlarge_threshold))
362                 return;
363
364         /* double table size */
365         resize_to = self->num_buckets * 2;
366         resize(self, resize_to);
367 }
368
369 /**
370  * shrink the hashset if it is only sparsely filled
371  * @internal
372  */
373 static INLINE
374 void maybe_shrink(HashSet *self)
375 {
376         size_t size;
377         size_t resize_to;
378
379         if(!self->consider_shrink)
380                 return;
381
382         self->consider_shrink = 0;
383         size = hashset_size(self);
384         if(size <= HT_MIN_BUCKETS)
385                 return;
386
387         if(LIKELY(size > self->shrink_threshold))
388                 return;
389
390         resize_to = ceil_po2(size);
391
392         if(resize_to < 4)
393                 resize_to = 4;
394
395         resize(self, resize_to);
396 }
397
398 /**
399  * Insert an element into the hashset. If no element with key key exists yet,
400  * then a new one is created and initialized with the InitData function.
401  * Otherwise the exisiting element is returned (for hashs where key is equal to
402  * value, nothing is returned.)
403  *
404  * @param self   the hashset
405  * @param key    the key that identifies the data
406  * @returns      the existing or newly created data element (or nothing in case of hashs where keys are the while value)
407  */
408 InsertReturnValue hashset_insert(HashSet *self, KeyType key)
409 {
410 #ifndef NDEBUG
411         self->entries_version++;
412 #endif
413
414         maybe_shrink(self);
415         maybe_grow(self);
416         return insert_nogrow(self, key);
417 }
418
419 /**
420  * Searchs for an element with key @p key.
421  *
422  * @param self      the hashset
423  * @param key       the key to search for
424  * @returns         the found value or NullValue if nothing was found
425  */
426 ValueType hashset_find(const HashSet *self, ConstKeyType key)
427 {
428         size_t num_probes = 0;
429         size_t num_buckets = self->num_buckets;
430         size_t hashmask = num_buckets - 1;
431         unsigned hash = Hash(self, key);
432         size_t bucknum = hash & hashmask;
433
434         while(1) {
435                 HashSetEntry *entry = & self->entries[bucknum];
436
437                 if(EntryIsEmpty(*entry)) {
438                         return NullValue;
439                 }
440                 if(EntryIsDeleted(*entry)) {
441                         // value is deleted
442                 } else if(EntryGetHash(self, *entry) == hash) {
443                         if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
444                                 // found the value
445                                 return EntryGetValue(*entry);
446                         }
447                 }
448
449                 ++num_probes;
450                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
451                 assert(num_probes < num_buckets);
452         }
453 }
454
455 /**
456  * Removes an element from a hashset. Does nothing if the set doesn't contain
457  * the element.
458  *
459  * @param self    the hashset
460  * @param key     key that identifies the data to remove
461  */
462 void hashset_remove(HashSet *self, ConstKeyType key)
463 {
464         size_t num_probes = 0;
465         size_t num_buckets = self->num_buckets;
466         size_t hashmask = num_buckets - 1;
467         unsigned hash = Hash(self, key);
468         size_t bucknum = hash & hashmask;
469
470 #ifndef NDEBUG
471         self->entries_version++;
472 #endif
473
474         while(1) {
475                 HashSetEntry *entry = & self->entries[bucknum];
476
477                 if(EntryIsEmpty(*entry)) {
478                         return;
479                 }
480                 if(EntryIsDeleted(*entry)) {
481                         // entry is deleted
482                 } else if(EntryGetHash(self, *entry) == hash) {
483                         if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
484                                 EntrySetDeleted(*entry);
485                                 self->num_deleted++;
486                                 self->consider_shrink = 1;
487                                 return;
488                         }
489                 }
490
491                 ++num_probes;
492                 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
493                 assert(num_probes < num_buckets);
494         }
495 }
496
497 /**
498  * Initializes hashset with a specific size
499  * @internal
500  */
501 static INLINE
502 void init_size(HashSet *self, size_t initial_size)
503 {
504         if(initial_size < 4)
505                 initial_size = 4;
506
507         self->entries = Alloc(initial_size);
508         SetRangeEmpty(self->entries, initial_size);
509         self->num_buckets = initial_size;
510         self->consider_shrink = 0;
511         self->num_elements = 0;
512         self->num_deleted = 0;
513 #ifndef NDEBUG
514         self->entries_version = 0;
515 #endif
516
517         reset_thresholds(self);
518 }
519
520 /**
521  * Initialializes a hashset with the default size. The memory for the set has to
522  * already allocated.
523  */
524 void hashset_init(HashSet *self)
525 {
526         init_size(self, HT_MIN_BUCKETS);
527 }
528
529 /**
530  * Destroys a hashset, freeing all used memory (except the memory for the
531  * HashSet struct itself).
532  */
533 void hashset_destroy(HashSet *self)
534 {
535         Free(self->entries);
536 #ifndef NDEBUG
537         self->entries = NULL;
538 #endif
539 }
540
541 /**
542  * Initializes a hashset expecting expected_element size
543  */
544 void hashset_init_size(HashSet *self, size_t expected_elements)
545 {
546         size_t needed_size;
547         size_t po2size;
548
549         if(expected_elements >= UINT_MAX/2) {
550                 abort();
551         }
552
553         needed_size = expected_elements * HT_1_DIV_OCCUPANCY_FLT;
554         po2size = ceil_po2(needed_size);
555         init_size(self, po2size);
556 }
557
558 /**
559  * Initializes a hashset iterator. The memory for the allocator has to be
560  * already allocated.
561  * @note it is not allowed to remove or insert elements while iterating
562  */
563 void hashset_iterator_init(HashSetIterator *self, const HashSet *hashset)
564 {
565         self->current_bucket = hashset->entries - 1;
566         self->end = hashset->entries + hashset->num_buckets;
567 #ifndef NDEBUG
568         self->set = hashset;
569         self->entries_version = hashset->entries_version;
570 #endif
571 }
572
573 /**
574  * Returns the next value in the iterator or NULL if no value is left
575  * in the hashset.
576  * @note it is not allowed to remove or insert elements while iterating
577  */
578 ValueType hashset_iterator_next(HashSetIterator *self)
579 {
580         HashSetEntry *current_bucket = self->current_bucket;
581         HashSetEntry *end = self->end;
582
583         /* using hashset_insert or hashset_remove is not allowed while iterating */
584         assert(self->entries_version == self->set->entries_version);
585
586         do {
587                 current_bucket++;
588                 if(current_bucket >= end)
589                         return NullValue;
590         } while(EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket));
591
592         self->current_bucket = current_bucket;
593         return EntryGetValue(*current_bucket);
594 }
595
596 /**
597  * Removes the element the iterator points to. Removing an element a second time
598  * has no result.
599  */
600 void hashset_remove_iterator(HashSet *self, const HashSetIterator *iter)
601 {
602         HashSetEntry *entry = iter->current_bucket;
603
604         /* iterator_next needs to have been called at least once */
605         assert(entry >= self->entries);
606         /* needs to be on a valid element */
607         assert(entry < self->entries + self->num_buckets);
608
609         if(EntryIsDeleted(*entry))
610                 return;
611
612         EntrySetDeleted(*entry);
613         self->num_deleted++;
614         self->consider_shrink = 1;
615 }
616
617 #endif