4 * @brief Geberic hashset implementation
5 * @author Matthias Braun, inspiration from densehash from google sparsehash
10 * You have to specialize this file by defining:
13 * <li><b>HashSet</b> The name of the hashset type</li>
14 * <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
15 * <li><b>ValueType</b> The type of the stored data values</li>
16 * <li><b>NullValue</b> A special value representing no values</li>
17 * <li><b>DeletedValue</b> A special value representing deleted entries</li>
18 * <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
21 * Note that by default it is assumed that the data values themselfes are used
22 * as keys. However you can change that with additional defines:
25 * <li><b>KeyType</b> The type of the keys identifying data values.
26 * Defining this implies, that a data value contains
27 * more than just the key.</li>
28 * <li><b>GetKey(value)</b> Extracts the key from a data value</li>
29 * <li><b>KeysEqual(hashset,key1,key2)</b> Tests wether 2 keys are equal</li>
30 * <li><b>DO_REHASH</b> Instead of storing the hash-values, recalculate
31 * them on demand from the datavalues. (usefull if
32 * calculating the hash-values takes less time than
33 * a memory access)</li>
36 * You can further fine tune your hashset by defining the following:
39 * <li><b>JUMP(num_probes)</b> The probing method</li>
40 * <li><b>Alloc(count)</b> Allocates count hashset entries (NOT bytes)</li>
41 * <li><b>Free(ptr)</b> Frees a block of memory allocated by Alloc</li>
42 * <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
44 * <li><b>ADDITIONAL_DATA<b> Additional fields appended to the hashset struct</li>
53 #include "bitfiddle.h"
56 /* quadratic probing */
58 #define JUMP(num_probes) (num_probes)
63 #define Hash(this,value) ((unsigned)(value))
67 #define HashSetEntry ValueType
68 #define EntrySetHash(entry,new_hash)
69 #define EntryGetHash(this,entry) Hash(this,entry)
70 #define EntryGetValue(entry) (entry)
71 #else /* ! DO_REHASH */
72 #define EntryGetHash(this,entry) (entry).hash
73 #define EntrySetHash(entry,new_hash) (entry).hash = (new_hash)
74 #define EntryGetValue(entry) (entry).data
75 #endif /* DO_REHASH */
79 #define Alloc(size) (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
80 #define Free(ptr) free(ptr)
84 #define InsertReturnValue int
85 #define GetInsertReturnValue(entry,new) (new)
87 #define InsertReturnValue ValueType
88 #define GetInsertReturnValue(entry,new) EntryGetValue(entry)
92 #define KeyType ValueType
93 #define GetKey(value) (value)
94 #define InitData(this,value,key) (value) = (key)
98 #define ConstKeyType const KeyType
99 #endif /* ConstKeyType */
101 #ifndef EntrySetEmpty
102 #define EntrySetEmpty(entry) EntryGetValue(entry) = NullValue
103 #endif /* EntrySetEmpty */
104 #ifndef EntrySetDeleted
105 #define EntrySetDeleted(entry) EntryGetValue(entry) = DeletedValue
106 #endif /* EntrySetDeleted */
108 #define EntryIsEmpty(entry) (EntryGetValue(entry) == NullValue)
109 #endif /* EntryIsEmpty */
110 #ifndef EntryIsDeleted
111 #define EntryIsDeleted(entry) (EntryGetValue(entry) == DeletedValue)
112 #endif /* EntryIsDeleted */
113 #ifndef SetRangeEmpty
114 #define SetRangeEmpty(ptr,size) \
117 size_t _size = (size); \
118 HashSetEntry *entries = (ptr); \
119 for(_i = 0; _i < _size; ++_i) { \
120 HashSetEntry *entry = & entries[_i]; \
121 EntrySetEmpty(*entry); \
124 #endif /* SetRangeEmpty */
126 #ifndef HT_OCCUPANCY_FLT
127 /** how full before we double size */
128 #define HT_OCCUPANCY_FLT 0.5f
129 #endif /* HT_OCCUPANCY_FLT */
132 /** how empty before we half size */
133 #define HT_EMPTY_FLT (0.4f * (HT_OCCUPANCY_FLT))
134 #endif /* HT_EMPTY_FLT */
136 #ifndef HT_MIN_BUCKETS
137 /** default smallest bucket size */
138 #define HT_MIN_BUCKETS 32
139 #endif /* HT_MIN_BUCKETS */
141 #define ILLEGAL_POS ((size_t)-1)
143 /* check that all needed functions are defined */
145 #error You have to redefine hashset_init
147 #ifndef hashset_init_size
148 #error You have to redefine hashset_init_size
150 #ifndef hashset_destroy
151 #error You have to redefine hashset_destroy
153 #ifndef hashset_insert
154 #error You have to redefine hashset_insert
156 #ifndef hashset_remove
157 #error You have to redefine hashset_remove
160 #error You have to redefine hashset_find
163 #error You have to redefine hashset_size
165 #ifndef hashset_iterator_init
166 #error You have to redefine hashset_iterator_init
168 #ifndef hashset_iterator_next
169 #error You have to redefine hashset_iterator_next
171 #ifndef hashset_remove_iterator
172 #error You have to redefine hashset_remove_iterator
176 * Returns the number of elements in the hashset
178 size_t hashset_size(const HashSet *this)
180 return this->num_elements - this->num_deleted;
184 * Inserts an element into a hashset without growing the set (you have to make
185 * sure there's enough room for that.
186 * @note also see comments for hashset_insert()
190 InsertReturnValue insert_nogrow(HashSet *this, KeyType key)
192 size_t num_probes = 0;
193 size_t num_buckets = this->num_buckets;
194 size_t hashmask = num_buckets - 1;
195 unsigned hash = Hash(this, key);
196 size_t bucknum = hash & hashmask;
197 size_t insert_pos = ILLEGAL_POS;
199 assert((num_buckets & (num_buckets - 1)) == 0);
202 HashSetEntry *entry = & this->entries[bucknum];
204 if(EntryIsEmpty(*entry)) {
206 HashSetEntry *nentry;
208 if(insert_pos != ILLEGAL_POS) {
214 nentry = &this->entries[p];
215 InitData(this, EntryGetValue(*nentry), key);
216 EntrySetHash(*nentry, hash);
217 this->num_elements++;
218 return GetInsertReturnValue(*nentry, 1);
220 if(EntryIsDeleted(*entry)) {
221 if(insert_pos == ILLEGAL_POS)
222 insert_pos = bucknum;
223 } else if(EntryGetHash(this, *entry) == hash) {
224 if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
225 // Value already in the set, return it
226 return GetInsertReturnValue(*entry, 0);
231 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
232 assert(num_probes < num_buckets);
237 * Inserts an element into a hashset under the assumption that the hashset
238 * contains no deleted entries and the element doesn't exist in the hashset yet.
242 void insert_new(HashSet *this, unsigned hash, ValueType value)
244 size_t num_probes = 0;
245 size_t num_buckets = this->num_buckets;
246 size_t hashmask = num_buckets - 1;
247 size_t bucknum = hash & hashmask;
248 size_t insert_pos = ILLEGAL_POS;
250 assert(value != NullValue);
253 HashSetEntry *entry = & this->entries[bucknum];
255 if(EntryIsEmpty(*entry)) {
257 HashSetEntry *nentry;
259 if(insert_pos != ILLEGAL_POS) {
264 nentry = &this->entries[p];
266 EntryGetValue(*nentry) = value;
267 EntrySetHash(*nentry, hash);
268 this->num_elements++;
271 assert(!EntryIsDeleted(*entry));
274 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
275 assert(num_probes < num_buckets);
280 * calculate shrink and enlarge limits
284 void reset_thresholds(HashSet *this)
286 this->enlarge_threshold = (size_t) (this->num_buckets * HT_OCCUPANCY_FLT);
287 this->shrink_threshold = (size_t) (this->num_buckets * HT_EMPTY_FLT);
288 this->consider_shrink = 0;
296 void resize(HashSet *this, size_t new_size)
298 size_t num_buckets = this->num_buckets;
300 HashSetEntry *old_entries = this->entries;
301 HashSetEntry *new_entries;
303 /* allocate a new array with double size */
304 new_entries = Alloc(new_size);
305 SetRangeEmpty(new_entries, new_size);
307 /* use the new array */
308 this->entries = new_entries;
309 this->num_buckets = new_size;
310 this->num_elements = 0;
311 this->num_deleted = 0;
313 this->entries_version++;
315 reset_thresholds(this);
317 /* reinsert all elements */
318 for(i = 0; i < num_buckets; ++i) {
319 HashSetEntry *entry = & old_entries[i];
320 if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
323 insert_new(this, EntryGetHash(this, *entry), EntryGetValue(*entry));
326 /* now we can free the old array */
331 * grow the hashset if adding 1 more elements would make it too crowded
335 void maybe_grow(HashSet *this)
339 if(LIKELY(this->num_elements + 1 <= this->enlarge_threshold))
342 /* double table size */
343 resize_to = this->num_buckets * 2;
344 resize(this, resize_to);
348 * shrink the hashset if it is only sparsely filled
352 void maybe_shrink(HashSet *this)
357 if(!this->consider_shrink)
360 this->consider_shrink = 0;
361 size = hashset_size(this);
362 if(LIKELY(size > this->shrink_threshold))
365 resize_to = ceil_po2(size);
370 resize(this, resize_to);
374 * Insert an element into the hashset. If no element with key key exists yet,
375 * then a new one is created and initialized with the InitData function.
376 * Otherwise the exisiting element is returned (for hashs where key is equal to
377 * value, nothing is returned.)
379 * @param this the hashset
380 * @param key the key that identifies the data
381 * @returns the existing or newly created data element (or nothing in case of hashs where keys are the while value)
383 InsertReturnValue hashset_insert(HashSet *this, KeyType key)
386 this->entries_version++;
391 return insert_nogrow(this, key);
395 * Searchs for an element with key @p key.
397 * @param this the hashset
398 * @param key the key to search for
399 * @returns the found value or NullValue if nothing was found
401 ValueType hashset_find(const HashSet *this, ConstKeyType key)
403 size_t num_probes = 0;
404 size_t num_buckets = this->num_buckets;
405 size_t hashmask = num_buckets - 1;
406 unsigned hash = Hash(this, key);
407 size_t bucknum = hash & hashmask;
410 HashSetEntry *entry = & this->entries[bucknum];
412 if(EntryIsEmpty(*entry)) {
415 if(EntryIsDeleted(*entry)) {
417 } else if(EntryGetHash(this, *entry) == hash) {
418 if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
420 return EntryGetValue(*entry);
425 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
426 assert(num_probes < num_buckets);
431 * Removes an element from a hashset. Does nothing if the set doesn't contain
434 * @param this the hashset
435 * @param key key that identifies the data to remove
437 void hashset_remove(HashSet *this, ConstKeyType key)
439 size_t num_probes = 0;
440 size_t num_buckets = this->num_buckets;
441 size_t hashmask = num_buckets - 1;
442 unsigned hash = Hash(this, key);
443 size_t bucknum = hash & hashmask;
446 this->entries_version++;
450 HashSetEntry *entry = & this->entries[bucknum];
452 if(EntryIsEmpty(*entry)) {
455 if(EntryIsDeleted(*entry)) {
457 } else if(EntryGetHash(this, *entry) == hash) {
458 if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
459 EntrySetDeleted(*entry);
461 this->consider_shrink = 1;
467 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
468 assert(num_probes < num_buckets);
473 * Initializes hashset with a specific size
477 void init_size(HashSet *this, size_t initial_size)
482 this->entries = Alloc(initial_size);
483 SetRangeEmpty(this->entries, initial_size);
484 this->num_buckets = initial_size;
485 this->consider_shrink = 0;
486 this->num_elements = 0;
487 this->num_deleted = 0;
489 this->entries_version = 0;
492 reset_thresholds(this);
496 * Initialializes a hashset with the default size. The memory for the set has to
499 void hashset_init(HashSet *this)
501 init_size(this, HT_MIN_BUCKETS);
505 * Destroys a hashset, freeing all used memory (except the memory for the
506 * HashSet struct itself).
508 void hashset_destroy(HashSet *this)
512 this->entries = NULL;
517 * Initializes a hashset expecting expected_element size
519 void hashset_init_size(HashSet *this, size_t expected_elements)
524 if(expected_elements >= UINT_MAX/2) {
528 needed_size = expected_elements * (1.0 / HT_OCCUPANCY_FLT);
529 po2size = ceil_po2(needed_size);
530 init_size(this, po2size);
534 * Initializes a hashset iterator. The memory for the allocator has to be
536 * @note it is not allowed to remove or insert elements while iterating
538 void hashset_iterator_init(HashSetIterator *this, const HashSet *hashset)
540 this->current_bucket = hashset->entries - 1;
541 this->end = hashset->entries + hashset->num_buckets;
544 this->entries_version = hashset->entries_version;
549 * Returns the next value in the iterator or NULL if no value is left
551 * @note it is not allowed to remove or insert elements while iterating
553 ValueType hashset_iterator_next(HashSetIterator *this)
555 HashSetEntry *current_bucket = this->current_bucket;
556 HashSetEntry *end = this->end;
558 if(current_bucket >= end)
561 /* using hashset_insert or hashset_remove is not allowed while iterating */
562 assert(this->entries_version == this->set->entries_version);
566 } while(current_bucket < end &&
567 (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket)));
569 if(current_bucket >= end)
572 this->current_bucket = current_bucket;
573 return EntryGetValue(*current_bucket);
577 * Removes the element the iterator points to. Removing an element a second time
580 void hashset_remove_iterator(HashSet *this, const HashSetIterator *iter)
582 HashSetEntry *entry = iter->current_bucket;
584 /* iterator_next needs to have been called at least once */
585 assert(entry >= this->entries);
586 /* needs to be on a valid element */
587 assert(entry < this->entries + this->num_buckets);
589 if(EntryIsDeleted(*entry))
592 EntrySetDeleted(*entry);
594 this->consider_shrink = 1;