4 * @brief Geberic hashset implementation
5 * @author Matthias Braun, inspiration from densehash from google sparsehash
10 * You have to specialize this file by defining:
13 * <li><b>HashSet</b> The name of the hashset type</li>
14 * <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
15 * <li><b>ValueType</b> The type of the stored data values</li>
16 * <li><b>NullValue</b> A special value representing no values</li>
17 * <li><b>DeletedValue</b> A special value representing deleted entries</li>
18 * <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
21 * Note that by default it is assumed that the data values themselfes are used
22 * as keys. However you can change that with additional defines:
25 * <li><b>KeyType</b> The type of the keys identifying data values.
26 * Defining this implies, that a data value contains
27 * more than just the key.</li>
28 * <li><b>GetKey(value)</b> Extracts the key from a data value</li>
29 * <li><b>KeysEqual(hashset,key1,key2)</b> Tests wether 2 keys are equal</li>
30 * <li><b>DO_REHASH</b> Instead of storing the hash-values, recalculate
31 * them on demand from the datavalues. (usefull if
32 * calculating the hash-values takes less time than
33 * a memory access)</li>
36 * You can further fine tune your hashset by defining the following:
39 * <li><b>JUMP(num_probes)</b> The probing method</li>
40 * <li><b>Alloc(count)</b> Allocates count hashset entries (NOT bytes)</li>
41 * <li><b>Free(ptr)</b> Frees a block of memory allocated by Alloc</li>
42 * <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
44 * <li><b>ADDITIONAL_DATA<b> Additional fields appended to the hashset struct</li>
53 #include "bitfiddle.h"
56 /* quadratic probing */
58 #define JUMP(num_probes) (num_probes)
63 #define Hash(this,value) ((unsigned)(value))
67 #define HashSetEntry ValueType
68 #define EntrySetHash(entry,new_hash)
69 #define EntryGetHash(this,entry) Hash(this,entry)
70 #define EntryGetValue(entry) (entry)
72 #define EntryGetHash(this,entry) (entry).hash
73 #define EntrySetHash(entry,new_hash) (entry).hash = (new_hash)
74 #define EntryGetValue(entry) (entry).data
79 #define Alloc(size) (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
80 #define Free(ptr) free(ptr)
84 #define InsertReturnValue int
85 #define GetInsertReturnValue(entry,new) (new)
87 #define InsertReturnValue ValueType
88 #define GetInsertReturnValue(entry,new) EntryGetValue(entry)
92 #define KeyType ValueType
93 #define GetKey(value) (value)
94 #define InitData(this,value,key) (value) = (key)
98 #define ConstKeyType const KeyType
101 #ifndef EntrySetEmpty
102 #define EntrySetEmpty(entry) EntryGetValue(entry) = NullValue
104 #ifndef EntrySetDeleted
105 #define EntrySetDeleted(entry) EntryGetValue(entry) = DeletedValue
108 #define EntryIsEmpty(entry) (EntryGetValue(entry) == NullValue)
110 #ifndef EntryIsDeleted
111 #define EntryIsDeleted(entry) (EntryGetValue(entry) == DeletedValue)
113 #ifndef SetRangeEmpty
114 #define SetRangeEmpty(ptr,size) \
117 size_t _size = (size); \
118 HashSetEntry *entries = (ptr); \
119 for(_i = 0; _i < _size; ++_i) { \
120 HashSetEntry *entry = & entries[_i]; \
121 EntrySetEmpty(*entry); \
126 #ifndef HT_OCCUPANCY_FLT
127 /** how full before we double size */
128 #define HT_OCCUPANCY_FLT 0.5f
132 /** how empty before we half size */
133 #define HT_EMPTY_FLT (0.4f * (HT_OCCUPANCY_FLT))
136 #ifndef HT_MIN_BUCKETS
137 /** default smallest bucket size */
138 #define HT_MIN_BUCKETS 32
141 #define ILLEGAL_POS ((size_t)-1)
144 #error You have to redefine hashset_init
146 #ifndef hashset_init_size
147 #error You have to redefine hashset_init_size
149 #ifndef hashset_destroy
150 #error You have to redefine hashset_destroy
152 #ifndef hashset_insert
153 #error You have to redefine hashset_insert
155 #ifndef hashset_remove
156 #error You have to redefine hashset_remove
159 #error You have to redefine hashset_find
162 #error You have to redefine hashset_size
164 #ifndef hashset_iterator_init
165 #error You have to redefine hashset_iterator_init
167 #ifndef hashset_iterator_next
168 #error You have to redefine hashset_iterator_next
170 #ifndef hashset_remove_iterator
171 #error You have to redefine hashset_remove_iterator
174 /* prototypes to silence warnings */
175 size_t hashset_size(const HashSet *this);
176 void hashset_init(HashSet *this);
177 void hashset_init_size(HashSet *this, size_t size);
178 void hashset_destroy(HashSet *this);
179 InsertReturnValue hashset_insert(HashSet *this, KeyType key);
180 ValueType hashset_find(const HashSet *this, ConstKeyType key);
181 void hashset_remove(HashSet *this, ConstKeyType key);
182 void hashset_iterator_init(HashSetIterator *this, const HashSet *hashset);
183 ValueType hashset_iterator_next(HashSetIterator *this);
184 void hashset_remove_iterator(HashSet *this, const HashSetIterator *iter);
187 * Returns the number of elements in the hashset
189 size_t hashset_size(const HashSet *this)
191 return this->num_elements - this->num_deleted;
195 * Inserts an element into a hashset without growing the set (you have to make
196 * sure there's enough room for that.
197 * @note also see comments for hashset_insert()
201 InsertReturnValue insert_nogrow(HashSet *this, KeyType key)
203 size_t num_probes = 0;
204 size_t num_buckets = this->num_buckets;
205 size_t hashmask = num_buckets - 1;
206 unsigned hash = Hash(this, key);
207 size_t bucknum = hash & hashmask;
208 size_t insert_pos = ILLEGAL_POS;
210 assert((num_buckets & (num_buckets - 1)) == 0);
213 HashSetEntry *entry = & this->entries[bucknum];
215 if(EntryIsEmpty(*entry)) {
217 HashSetEntry *nentry;
219 if(insert_pos != ILLEGAL_POS) {
225 nentry = &this->entries[p];
226 InitData(this, EntryGetValue(*nentry), key);
227 EntrySetHash(*nentry, hash);
228 this->num_elements++;
229 return GetInsertReturnValue(*nentry, 1);
231 if(EntryIsDeleted(*entry)) {
232 if(insert_pos == ILLEGAL_POS)
233 insert_pos = bucknum;
234 } else if(EntryGetHash(this, *entry) == hash) {
235 if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
236 // Value already in the set, return it
237 return GetInsertReturnValue(*entry, 0);
242 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
243 assert(num_probes < num_buckets);
248 * Inserts an element into a hashset under the assumption that the hashset
249 * contains no deleted entries and the element doesn't exist in the hashset yet.
253 void insert_new(HashSet *this, unsigned hash, ValueType value)
255 size_t num_probes = 0;
256 size_t num_buckets = this->num_buckets;
257 size_t hashmask = num_buckets - 1;
258 size_t bucknum = hash & hashmask;
259 size_t insert_pos = ILLEGAL_POS;
261 assert(value != NullValue);
264 HashSetEntry *entry = & this->entries[bucknum];
266 if(EntryIsEmpty(*entry)) {
268 HashSetEntry *nentry;
270 if(insert_pos != ILLEGAL_POS) {
275 nentry = &this->entries[p];
277 EntryGetValue(*nentry) = value;
278 EntrySetHash(*nentry, hash);
279 this->num_elements++;
282 assert(!EntryIsDeleted(*entry));
285 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
286 assert(num_probes < num_buckets);
291 * calculate shrink and enlarge limits
295 void reset_thresholds(HashSet *this)
297 this->enlarge_threshold = (size_t) (this->num_buckets * HT_OCCUPANCY_FLT);
298 this->shrink_threshold = (size_t) (this->num_buckets * HT_EMPTY_FLT);
299 this->consider_shrink = 0;
307 void resize(HashSet *this, size_t new_size)
309 size_t num_buckets = this->num_buckets;
311 HashSetEntry *old_entries = this->entries;
312 HashSetEntry *new_entries;
314 /* allocate a new array with double size */
315 new_entries = Alloc(new_size);
316 SetRangeEmpty(new_entries, new_size);
318 /* use the new array */
319 this->entries = new_entries;
320 this->num_buckets = new_size;
321 this->num_elements = 0;
322 this->num_deleted = 0;
324 this->entries_version++;
326 reset_thresholds(this);
328 /* reinsert all elements */
329 for(i = 0; i < num_buckets; ++i) {
330 HashSetEntry *entry = & old_entries[i];
331 if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
334 insert_new(this, EntryGetHash(this, *entry), EntryGetValue(*entry));
337 /* now we can free the old array */
342 * grow the hashset if adding 1 more elements would make it too crowded
346 void maybe_grow(HashSet *this)
350 if(LIKELY(this->num_elements + 1 <= this->enlarge_threshold))
353 /* double table size */
354 resize_to = this->num_buckets * 2;
355 resize(this, resize_to);
359 * shrink the hashset if it is only sparsely filled
363 void maybe_shrink(HashSet *this)
368 if(!this->consider_shrink)
371 this->consider_shrink = 0;
372 size = hashset_size(this);
373 if(LIKELY(size > this->shrink_threshold))
376 resize_to = ceil_po2(size);
381 resize(this, resize_to);
385 * Insert an element into the hashset. If no element with key key exists yet,
386 * then a new one is created and initialized with the InitData function.
387 * Otherwise the exisiting element is returned (for hashs where key is equal to
388 * value, nothing is returned.)
390 * @param this the hashset
391 * @param key the key that identifies the data
392 * @returns the existing or newly created data element (or nothing in case of hashs where keys are the while value)
394 InsertReturnValue hashset_insert(HashSet *this, KeyType key)
397 this->entries_version++;
402 return insert_nogrow(this, key);
406 * Searchs for an element with key @p key.
408 * @param this the hashset
409 * @param key the key to search for
410 * @returns the found value or NullValue if nothing was found
412 ValueType hashset_find(const HashSet *this, ConstKeyType key)
414 size_t num_probes = 0;
415 size_t num_buckets = this->num_buckets;
416 size_t hashmask = num_buckets - 1;
417 unsigned hash = Hash(this, key);
418 size_t bucknum = hash & hashmask;
421 HashSetEntry *entry = & this->entries[bucknum];
423 if(EntryIsEmpty(*entry)) {
426 if(EntryIsDeleted(*entry)) {
428 } else if(EntryGetHash(this, *entry) == hash) {
429 if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
431 return EntryGetValue(*entry);
436 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
437 assert(num_probes < num_buckets);
442 * Removes an element from a hashset. Does nothing if the set doesn't contain
445 * @param this the hashset
446 * @param key key that identifies the data to remove
448 void hashset_remove(HashSet *this, ConstKeyType key)
450 size_t num_probes = 0;
451 size_t num_buckets = this->num_buckets;
452 size_t hashmask = num_buckets - 1;
453 unsigned hash = Hash(this, key);
454 size_t bucknum = hash & hashmask;
457 this->entries_version++;
461 HashSetEntry *entry = & this->entries[bucknum];
463 if(EntryIsEmpty(*entry)) {
466 if(EntryIsDeleted(*entry)) {
468 } else if(EntryGetHash(this, *entry) == hash) {
469 if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
470 EntrySetDeleted(*entry);
472 this->consider_shrink = 1;
478 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
479 assert(num_probes < num_buckets);
484 * Initializes hashset with a specific size
488 void init_size(HashSet *this, size_t initial_size)
493 this->entries = Alloc(initial_size);
494 SetRangeEmpty(this->entries, initial_size);
495 this->num_buckets = initial_size;
496 this->consider_shrink = 0;
497 this->num_elements = 0;
498 this->num_deleted = 0;
500 this->entries_version = 0;
503 reset_thresholds(this);
507 * Initialializes a hashset with the default size. The memory for the set has to
510 void hashset_init(HashSet *this)
512 init_size(this, HT_MIN_BUCKETS);
516 * Destroys a hashset, freeing all used memory (except the memory for the
517 * HashSet struct itself).
519 void hashset_destroy(HashSet *this)
523 this->entries = NULL;
528 * Initializes a hashset expecting expected_element size
530 void hashset_init_size(HashSet *this, size_t expected_elements)
535 if(expected_elements >= UINT_MAX/2) {
539 needed_size = (size_t) (expected_elements * (1.0 / HT_OCCUPANCY_FLT));
540 po2size = ceil_po2(needed_size);
541 init_size(this, po2size);
545 * Initializes a hashset iterator. The memory for the allocator has to be
547 * @note it is not allowed to remove or insert elements while iterating
549 void hashset_iterator_init(HashSetIterator *this, const HashSet *hashset)
551 this->current_bucket = hashset->entries - 1;
552 this->end = hashset->entries + hashset->num_buckets;
555 this->entries_version = hashset->entries_version;
560 * Returns the next value in the iterator or NULL if no value is left
562 * @note it is not allowed to remove or insert elements while iterating
564 ValueType hashset_iterator_next(HashSetIterator *this)
566 HashSetEntry *current_bucket = this->current_bucket;
567 HashSetEntry *end = this->end;
569 if(current_bucket >= end)
572 /* using hashset_insert or hashset_remove is not allowed while iterating */
573 assert(this->entries_version == this->set->entries_version);
577 } while(current_bucket < end &&
578 (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket)));
580 if(current_bucket >= end)
583 this->current_bucket = current_bucket;
584 return EntryGetValue(*current_bucket);
588 * Removes the element the iterator points to. Removing an element a second time
591 void hashset_remove_iterator(HashSet *this, const HashSetIterator *iter)
593 HashSetEntry *entry = iter->current_bucket;
595 /* iterator_next needs to have been called at least once */
596 assert(entry >= this->entries);
597 /* needs to be on a valid element */
598 assert(entry < this->entries + this->num_buckets);
600 if(EntryIsDeleted(*entry))
603 EntrySetDeleted(*entry);
605 this->consider_shrink = 1;
609 __attribute__((unused)) static int dummy;