2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Generic hashset implementation
23 * @author Matthias Braun, inspiration from densehash from google sparsehash
29 * You have to specialize this file by defining:
32 * <li><b>HashSet</b> The name of the hashset type</li>
33 * <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
34 * <li><b>ValueType</b> The type of the stored data values</li>
35 * <li><b>NullValue</b> A special value representing no values</li>
36 * <li><b>DeletedValue</b> A special value representing deleted entries</li>
37 * <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
40 * Note that by default it is assumed that the data values themselfes are used
41 * as keys. However you can change that with additional defines:
44 * <li><b>KeyType</b> The type of the keys identifying data values.
45 * Defining this implies, that a data value contains
46 * more than just the key.</li>
47 * <li><b>GetKey(value)</b> Extracts the key from a data value</li>
48 * <li><b>KeysEqual(hashset,key1,key2)</b> Tests wether 2 keys are equal</li>
49 * <li><b>DO_REHASH</b> Instead of storing the hash-values, recalculate
50 * them on demand from the datavalues. (usefull if
51 * calculating the hash-values takes less time than
52 * a memory access)</li>
55 * You can further fine tune your hashset by defining the following:
58 * <li><b>JUMP(num_probes)</b> The probing method</li>
59 * <li><b>Alloc(count)</b> Allocates count hashset entries (NOT bytes)</li>
60 * <li><b>Free(ptr)</b> Frees a block of memory allocated by Alloc</li>
61 * <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
63 * <li><b>ADDITIONAL_DATA<b> Additional fields appended to the hashset struct</li>
72 #include "bitfiddle.h"
75 /* quadratic probing */
77 #define JUMP(num_probes) (num_probes)
82 #define Hash(self,value) ((unsigned)(value))
86 #define HashSetEntry ValueType
87 #define EntrySetHash(entry,new_hash)
88 #define EntryGetHash(self,entry) Hash(self,entry)
89 #define EntryGetValue(entry) (entry)
90 #else /* ! DO_REHASH */
91 #define EntryGetHash(self,entry) (entry).hash
92 #define EntrySetHash(entry,new_hash) (entry).hash = (new_hash)
93 #define EntryGetValue(entry) (entry).data
94 #endif /* DO_REHASH */
98 #define Alloc(size) (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
99 #define Free(ptr) free(ptr)
103 #define InsertReturnValue int
104 #define GetInsertReturnValue(entry,new) (new)
105 #else /* ! ID_HASH */
106 #define InsertReturnValue ValueType
107 #define GetInsertReturnValue(entry,new) EntryGetValue(entry)
111 #define KeyType ValueType
112 #define GetKey(value) (value)
113 #define InitData(self,value,key) (value) = (key)
117 #define ConstKeyType const KeyType
118 #endif /* ConstKeyType */
120 #ifndef EntrySetEmpty
121 #define EntrySetEmpty(entry) EntryGetValue(entry) = NullValue
122 #endif /* EntrySetEmpty */
123 #ifndef EntrySetDeleted
124 #define EntrySetDeleted(entry) EntryGetValue(entry) = DeletedValue
125 #endif /* EntrySetDeleted */
127 #define EntryIsEmpty(entry) (EntryGetValue(entry) == NullValue)
128 #endif /* EntryIsEmpty */
129 #ifndef EntryIsDeleted
130 #define EntryIsDeleted(entry) (EntryGetValue(entry) == DeletedValue)
131 #endif /* EntryIsDeleted */
132 #ifndef SetRangeEmpty
133 #define SetRangeEmpty(ptr,size) \
136 size_t _size = (size); \
137 HashSetEntry *entries = (ptr); \
138 for(_i = 0; _i < _size; ++_i) { \
139 HashSetEntry *entry = & entries[_i]; \
140 EntrySetEmpty(*entry); \
143 #endif /* SetRangeEmpty */
145 #ifndef HT_OCCUPANCY_FLT
146 /** how full before we double size */
147 #define HT_OCCUPANCY_FLT 0.5f
148 #endif /* HT_OCCUPANCY_FLT */
151 /** how empty before we half size */
152 #define HT_EMPTY_FLT (0.4f * (HT_OCCUPANCY_FLT))
153 #endif /* HT_EMPTY_FLT */
155 #ifndef HT_MIN_BUCKETS
156 /** default smallest bucket size */
157 #define HT_MIN_BUCKETS 32
158 #endif /* HT_MIN_BUCKETS */
160 #define ILLEGAL_POS ((size_t)-1)
162 /* check that all needed functions are defined */
164 #error You have to redefine hashset_init
166 #ifndef hashset_init_size
167 #error You have to redefine hashset_init_size
169 #ifndef hashset_destroy
170 #error You have to redefine hashset_destroy
172 #ifndef hashset_insert
173 #error You have to redefine hashset_insert
175 #ifndef hashset_remove
176 #error You have to redefine hashset_remove
179 #error You have to redefine hashset_find
182 #error You have to redefine hashset_size
184 #ifndef hashset_iterator_init
185 #error You have to redefine hashset_iterator_init
187 #ifndef hashset_iterator_next
188 #error You have to redefine hashset_iterator_next
190 #ifndef hashset_remove_iterator
191 #error You have to redefine hashset_remove_iterator
195 * Returns the number of elements in the hashset
197 size_t hashset_size(const HashSet *self)
199 return self->num_elements - self->num_deleted;
203 * Inserts an element into a hashset without growing the set (you have to make
204 * sure there's enough room for that.
205 * @note also see comments for hashset_insert()
209 InsertReturnValue insert_nogrow(HashSet *self, KeyType key)
211 size_t num_probes = 0;
212 size_t num_buckets = self->num_buckets;
213 size_t hashmask = num_buckets - 1;
214 unsigned hash = Hash(self, key);
215 size_t bucknum = hash & hashmask;
216 size_t insert_pos = ILLEGAL_POS;
218 assert((num_buckets & (num_buckets - 1)) == 0);
221 HashSetEntry *entry = & self->entries[bucknum];
223 if(EntryIsEmpty(*entry)) {
225 HashSetEntry *nentry;
227 if(insert_pos != ILLEGAL_POS) {
233 nentry = &self->entries[p];
234 InitData(self, EntryGetValue(*nentry), key);
235 EntrySetHash(*nentry, hash);
236 self->num_elements++;
237 return GetInsertReturnValue(*nentry, 1);
239 if(EntryIsDeleted(*entry)) {
240 if(insert_pos == ILLEGAL_POS)
241 insert_pos = bucknum;
242 } else if(EntryGetHash(self, *entry) == hash) {
243 if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
244 // Value already in the set, return it
245 return GetInsertReturnValue(*entry, 0);
250 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
251 assert(num_probes < num_buckets);
256 * Inserts an element into a hashset under the assumption that the hashset
257 * contains no deleted entries and the element doesn't exist in the hashset yet.
261 void insert_new(HashSet *self, unsigned hash, ValueType value)
263 size_t num_probes = 0;
264 size_t num_buckets = self->num_buckets;
265 size_t hashmask = num_buckets - 1;
266 size_t bucknum = hash & hashmask;
267 size_t insert_pos = ILLEGAL_POS;
269 assert(value != NullValue);
272 HashSetEntry *entry = & self->entries[bucknum];
274 if(EntryIsEmpty(*entry)) {
276 HashSetEntry *nentry;
278 if(insert_pos != ILLEGAL_POS) {
283 nentry = &self->entries[p];
285 EntryGetValue(*nentry) = value;
286 EntrySetHash(*nentry, hash);
287 self->num_elements++;
290 assert(!EntryIsDeleted(*entry));
293 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
294 assert(num_probes < num_buckets);
299 * calculate shrink and enlarge limits
303 void reset_thresholds(HashSet *self)
305 self->enlarge_threshold = (size_t) (self->num_buckets * HT_OCCUPANCY_FLT);
306 self->shrink_threshold = (size_t) (self->num_buckets * HT_EMPTY_FLT);
307 self->consider_shrink = 0;
315 void resize(HashSet *self, size_t new_size)
317 size_t num_buckets = self->num_buckets;
319 HashSetEntry *old_entries = self->entries;
320 HashSetEntry *new_entries;
322 /* allocate a new array with double size */
323 new_entries = Alloc(new_size);
324 SetRangeEmpty(new_entries, new_size);
326 /* use the new array */
327 self->entries = new_entries;
328 self->num_buckets = new_size;
329 self->num_elements = 0;
330 self->num_deleted = 0;
332 self->entries_version++;
334 reset_thresholds(self);
336 /* reinsert all elements */
337 for(i = 0; i < num_buckets; ++i) {
338 HashSetEntry *entry = & old_entries[i];
339 if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
342 insert_new(self, EntryGetHash(self, *entry), EntryGetValue(*entry));
345 /* now we can free the old array */
350 * grow the hashset if adding 1 more elements would make it too crowded
354 void maybe_grow(HashSet *self)
358 if(LIKELY(self->num_elements + 1 <= self->enlarge_threshold))
361 /* double table size */
362 resize_to = self->num_buckets * 2;
363 resize(self, resize_to);
367 * shrink the hashset if it is only sparsely filled
371 void maybe_shrink(HashSet *self)
376 if(!self->consider_shrink)
379 self->consider_shrink = 0;
380 size = hashset_size(self);
381 if(LIKELY(size > self->shrink_threshold))
384 resize_to = ceil_po2(size);
389 resize(self, resize_to);
393 * Insert an element into the hashset. If no element with key key exists yet,
394 * then a new one is created and initialized with the InitData function.
395 * Otherwise the exisiting element is returned (for hashs where key is equal to
396 * value, nothing is returned.)
398 * @param self the hashset
399 * @param key the key that identifies the data
400 * @returns the existing or newly created data element (or nothing in case of hashs where keys are the while value)
402 InsertReturnValue hashset_insert(HashSet *self, KeyType key)
405 self->entries_version++;
410 return insert_nogrow(self, key);
414 * Searchs for an element with key @p key.
416 * @param self the hashset
417 * @param key the key to search for
418 * @returns the found value or NullValue if nothing was found
420 ValueType hashset_find(const HashSet *self, ConstKeyType key)
422 size_t num_probes = 0;
423 size_t num_buckets = self->num_buckets;
424 size_t hashmask = num_buckets - 1;
425 unsigned hash = Hash(self, key);
426 size_t bucknum = hash & hashmask;
429 HashSetEntry *entry = & self->entries[bucknum];
431 if(EntryIsEmpty(*entry)) {
434 if(EntryIsDeleted(*entry)) {
436 } else if(EntryGetHash(self, *entry) == hash) {
437 if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
439 return EntryGetValue(*entry);
444 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
445 assert(num_probes < num_buckets);
450 * Removes an element from a hashset. Does nothing if the set doesn't contain
453 * @param self the hashset
454 * @param key key that identifies the data to remove
456 void hashset_remove(HashSet *self, ConstKeyType key)
458 size_t num_probes = 0;
459 size_t num_buckets = self->num_buckets;
460 size_t hashmask = num_buckets - 1;
461 unsigned hash = Hash(self, key);
462 size_t bucknum = hash & hashmask;
465 self->entries_version++;
469 HashSetEntry *entry = & self->entries[bucknum];
471 if(EntryIsEmpty(*entry)) {
474 if(EntryIsDeleted(*entry)) {
476 } else if(EntryGetHash(self, *entry) == hash) {
477 if(KeysEqual(self, GetKey(EntryGetValue(*entry)), key)) {
478 EntrySetDeleted(*entry);
480 self->consider_shrink = 1;
486 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
487 assert(num_probes < num_buckets);
492 * Initializes hashset with a specific size
496 void init_size(HashSet *self, size_t initial_size)
501 self->entries = Alloc(initial_size);
502 SetRangeEmpty(self->entries, initial_size);
503 self->num_buckets = initial_size;
504 self->consider_shrink = 0;
505 self->num_elements = 0;
506 self->num_deleted = 0;
508 self->entries_version = 0;
511 reset_thresholds(self);
515 * Initialializes a hashset with the default size. The memory for the set has to
518 void hashset_init(HashSet *self)
520 init_size(self, HT_MIN_BUCKETS);
524 * Destroys a hashset, freeing all used memory (except the memory for the
525 * HashSet struct itself).
527 void hashset_destroy(HashSet *self)
531 self->entries = NULL;
536 * Initializes a hashset expecting expected_element size
538 void hashset_init_size(HashSet *self, size_t expected_elements)
543 if(expected_elements >= UINT_MAX/2) {
547 needed_size = expected_elements * (int)(1.0 / HT_OCCUPANCY_FLT);
548 po2size = ceil_po2(needed_size);
549 init_size(self, po2size);
553 * Initializes a hashset iterator. The memory for the allocator has to be
555 * @note it is not allowed to remove or insert elements while iterating
557 void hashset_iterator_init(HashSetIterator *self, const HashSet *hashset)
559 self->current_bucket = hashset->entries - 1;
560 self->end = hashset->entries + hashset->num_buckets;
563 self->entries_version = hashset->entries_version;
568 * Returns the next value in the iterator or NULL if no value is left
570 * @note it is not allowed to remove or insert elements while iterating
572 ValueType hashset_iterator_next(HashSetIterator *self)
574 HashSetEntry *current_bucket = self->current_bucket;
575 HashSetEntry *end = self->end;
577 if(current_bucket >= end)
580 /* using hashset_insert or hashset_remove is not allowed while iterating */
581 assert(self->entries_version == self->set->entries_version);
585 } while(current_bucket < end &&
586 (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket)));
588 if(current_bucket >= end)
591 self->current_bucket = current_bucket;
592 return EntryGetValue(*current_bucket);
596 * Removes the element the iterator points to. Removing an element a second time
599 void hashset_remove_iterator(HashSet *self, const HashSetIterator *iter)
601 HashSetEntry *entry = iter->current_bucket;
603 /* iterator_next needs to have been called at least once */
604 assert(entry >= self->entries);
605 /* needs to be on a valid element */
606 assert(entry < self->entries + self->num_buckets);
608 if(EntryIsDeleted(*entry))
611 EntrySetDeleted(*entry);
613 self->consider_shrink = 1;