2 * This file is part of cparser.
3 * Copyright (C) 2007-2008 Matthias Braun <matze@braunis.de>
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 * @brief Geberic hashset implementation
25 * @author Matthias Braun, inspiration from densehash from google sparsehash
30 * You have to specialize this file by defining:
33 * <li><b>HashSet</b> The name of the hashset type</li>
34 * <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
35 * <li><b>ValueType</b> The type of the stored data values</li>
36 * <li><b>NullValue</b> A special value representing no values</li>
37 * <li><b>DeletedValue</b> A special value representing deleted entries</li>
38 * <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
41 * Note that by default it is assumed that the data values themselfes are used
42 * as keys. However you can change that with additional defines:
45 * <li><b>KeyType</b> The type of the keys identifying data values.
46 * Defining this implies, that a data value contains
47 * more than just the key.</li>
48 * <li><b>GetKey(value)</b> Extracts the key from a data value</li>
49 * <li><b>KeysEqual(hashset,key1,key2)</b> Tests wether 2 keys are equal</li>
50 * <li><b>DO_REHASH</b> Instead of storing the hash-values, recalculate
51 * them on demand from the datavalues. (usefull if
52 * calculating the hash-values takes less time than
53 * a memory access)</li>
56 * You can further fine tune your hashset by defining the following:
59 * <li><b>JUMP(num_probes)</b> The probing method</li>
60 * <li><b>Alloc(count)</b> Allocates count hashset entries (NOT bytes)</li>
61 * <li><b>Free(ptr)</b> Frees a block of memory allocated by Alloc</li>
62 * <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
64 * <li><b>ADDITIONAL_DATA<b> Additional fields appended to the hashset struct</li>
73 #include "bitfiddle.h"
76 /* quadratic probing */
78 #define JUMP(num_probes) (num_probes)
83 #define Hash(this,value) ((unsigned)(value))
87 #define HashSetEntry ValueType
88 #define EntrySetHash(entry,new_hash)
89 #define EntryGetHash(this,entry) Hash(this,entry)
90 #define EntryGetValue(entry) (entry)
92 #define EntryGetHash(this,entry) (entry).hash
93 #define EntrySetHash(entry,new_hash) (entry).hash = (new_hash)
94 #define EntryGetValue(entry) (entry).data
99 #define Alloc(size) (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
100 #define Free(ptr) free(ptr)
104 #define InsertReturnValue int
105 #define GetInsertReturnValue(entry,new) (new)
107 #define InsertReturnValue ValueType
108 #define GetInsertReturnValue(entry,new) EntryGetValue(entry)
112 #define KeyType ValueType
113 #define GetKey(value) (value)
114 #define InitData(this,value,key) (value) = (key)
118 #define ConstKeyType const KeyType
121 #ifndef EntrySetEmpty
122 #define EntrySetEmpty(entry) EntryGetValue(entry) = NullValue
124 #ifndef EntrySetDeleted
125 #define EntrySetDeleted(entry) EntryGetValue(entry) = DeletedValue
128 #define EntryIsEmpty(entry) (EntryGetValue(entry) == NullValue)
130 #ifndef EntryIsDeleted
131 #define EntryIsDeleted(entry) (EntryGetValue(entry) == DeletedValue)
133 #ifndef SetRangeEmpty
134 #define SetRangeEmpty(ptr,size) \
137 size_t _size = (size); \
138 HashSetEntry *entries = (ptr); \
139 for(_i = 0; _i < _size; ++_i) { \
140 HashSetEntry *entry = & entries[_i]; \
141 EntrySetEmpty(*entry); \
146 #ifndef HT_OCCUPANCY_FLT
147 /** how full before we double size */
148 #define HT_OCCUPANCY_FLT 0.5f
152 /** how empty before we half size */
153 #define HT_EMPTY_FLT (0.4f * (HT_OCCUPANCY_FLT))
156 #ifndef HT_MIN_BUCKETS
157 /** default smallest bucket size */
158 #define HT_MIN_BUCKETS 32
161 #define ILLEGAL_POS ((size_t)-1)
164 #error You have to redefine hashset_init
166 #ifndef hashset_init_size
167 #error You have to redefine hashset_init_size
169 #ifndef hashset_destroy
170 #error You have to redefine hashset_destroy
172 #ifndef hashset_insert
173 #error You have to redefine hashset_insert
175 #ifndef hashset_remove
176 #error You have to redefine hashset_remove
179 #error You have to redefine hashset_find
182 #error You have to redefine hashset_size
184 #ifndef hashset_iterator_init
185 #error You have to redefine hashset_iterator_init
187 #ifndef hashset_iterator_next
188 #error You have to redefine hashset_iterator_next
190 #ifndef hashset_remove_iterator
191 #error You have to redefine hashset_remove_iterator
194 /* prototypes to silence warnings */
195 size_t hashset_size(const HashSet *this);
196 void hashset_init(HashSet *this);
197 void hashset_init_size(HashSet *this, size_t size);
198 void hashset_destroy(HashSet *this);
199 InsertReturnValue hashset_insert(HashSet *this, KeyType key);
200 ValueType hashset_find(const HashSet *this, ConstKeyType key);
201 void hashset_remove(HashSet *this, ConstKeyType key);
202 void hashset_iterator_init(HashSetIterator *this, const HashSet *hashset);
203 ValueType hashset_iterator_next(HashSetIterator *this);
204 void hashset_remove_iterator(HashSet *this, const HashSetIterator *iter);
207 * Returns the number of elements in the hashset
209 size_t hashset_size(const HashSet *this)
211 return this->num_elements - this->num_deleted;
215 * Inserts an element into a hashset without growing the set (you have to make
216 * sure there's enough room for that.
217 * @note also see comments for hashset_insert()
221 InsertReturnValue insert_nogrow(HashSet *this, KeyType key)
223 size_t num_probes = 0;
224 size_t num_buckets = this->num_buckets;
225 size_t hashmask = num_buckets - 1;
226 unsigned hash = Hash(this, key);
227 size_t bucknum = hash & hashmask;
228 size_t insert_pos = ILLEGAL_POS;
230 assert((num_buckets & (num_buckets - 1)) == 0);
233 HashSetEntry *entry = & this->entries[bucknum];
235 if(EntryIsEmpty(*entry)) {
237 HashSetEntry *nentry;
239 if(insert_pos != ILLEGAL_POS) {
245 nentry = &this->entries[p];
246 InitData(this, EntryGetValue(*nentry), key);
247 EntrySetHash(*nentry, hash);
248 this->num_elements++;
249 return GetInsertReturnValue(*nentry, 1);
251 if(EntryIsDeleted(*entry)) {
252 if(insert_pos == ILLEGAL_POS)
253 insert_pos = bucknum;
254 } else if(EntryGetHash(this, *entry) == hash) {
255 if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
256 // Value already in the set, return it
257 return GetInsertReturnValue(*entry, 0);
262 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
263 assert(num_probes < num_buckets);
268 * Inserts an element into a hashset under the assumption that the hashset
269 * contains no deleted entries and the element doesn't exist in the hashset yet.
273 void insert_new(HashSet *this, unsigned hash, ValueType value)
275 size_t num_probes = 0;
276 size_t num_buckets = this->num_buckets;
277 size_t hashmask = num_buckets - 1;
278 size_t bucknum = hash & hashmask;
279 size_t insert_pos = ILLEGAL_POS;
281 assert(value != NullValue);
284 HashSetEntry *entry = & this->entries[bucknum];
286 if(EntryIsEmpty(*entry)) {
288 HashSetEntry *nentry;
290 if(insert_pos != ILLEGAL_POS) {
295 nentry = &this->entries[p];
297 EntryGetValue(*nentry) = value;
298 EntrySetHash(*nentry, hash);
299 this->num_elements++;
302 assert(!EntryIsDeleted(*entry));
305 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
306 assert(num_probes < num_buckets);
311 * calculate shrink and enlarge limits
315 void reset_thresholds(HashSet *this)
317 this->enlarge_threshold = (size_t) (this->num_buckets * HT_OCCUPANCY_FLT);
318 this->shrink_threshold = (size_t) (this->num_buckets * HT_EMPTY_FLT);
319 this->consider_shrink = 0;
327 void resize(HashSet *this, size_t new_size)
329 size_t num_buckets = this->num_buckets;
331 HashSetEntry *old_entries = this->entries;
332 HashSetEntry *new_entries;
334 /* allocate a new array with double size */
335 new_entries = Alloc(new_size);
336 SetRangeEmpty(new_entries, new_size);
338 /* use the new array */
339 this->entries = new_entries;
340 this->num_buckets = new_size;
341 this->num_elements = 0;
342 this->num_deleted = 0;
344 this->entries_version++;
346 reset_thresholds(this);
348 /* reinsert all elements */
349 for(i = 0; i < num_buckets; ++i) {
350 HashSetEntry *entry = & old_entries[i];
351 if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
354 insert_new(this, EntryGetHash(this, *entry), EntryGetValue(*entry));
357 /* now we can free the old array */
362 * grow the hashset if adding 1 more elements would make it too crowded
366 void maybe_grow(HashSet *this)
370 if(LIKELY(this->num_elements + 1 <= this->enlarge_threshold))
373 /* double table size */
374 resize_to = this->num_buckets * 2;
375 resize(this, resize_to);
379 * shrink the hashset if it is only sparsely filled
383 void maybe_shrink(HashSet *this)
388 if(!this->consider_shrink)
391 this->consider_shrink = 0;
392 size = hashset_size(this);
393 if(LIKELY(size > this->shrink_threshold))
396 resize_to = ceil_po2(size);
401 resize(this, resize_to);
405 * Insert an element into the hashset. If no element with key key exists yet,
406 * then a new one is created and initialized with the InitData function.
407 * Otherwise the exisiting element is returned (for hashs where key is equal to
408 * value, nothing is returned.)
410 * @param this the hashset
411 * @param key the key that identifies the data
412 * @returns the existing or newly created data element (or nothing in case of hashs where keys are the while value)
414 InsertReturnValue hashset_insert(HashSet *this, KeyType key)
417 this->entries_version++;
422 return insert_nogrow(this, key);
426 * Searchs for an element with key @p key.
428 * @param this the hashset
429 * @param key the key to search for
430 * @returns the found value or NullValue if nothing was found
432 ValueType hashset_find(const HashSet *this, ConstKeyType key)
434 size_t num_probes = 0;
435 size_t num_buckets = this->num_buckets;
436 size_t hashmask = num_buckets - 1;
437 unsigned hash = Hash(this, key);
438 size_t bucknum = hash & hashmask;
441 HashSetEntry *entry = & this->entries[bucknum];
443 if(EntryIsEmpty(*entry)) {
446 if(EntryIsDeleted(*entry)) {
448 } else if(EntryGetHash(this, *entry) == hash) {
449 if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
451 return EntryGetValue(*entry);
456 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
457 assert(num_probes < num_buckets);
462 * Removes an element from a hashset. Does nothing if the set doesn't contain
465 * @param this the hashset
466 * @param key key that identifies the data to remove
468 void hashset_remove(HashSet *this, ConstKeyType key)
470 size_t num_probes = 0;
471 size_t num_buckets = this->num_buckets;
472 size_t hashmask = num_buckets - 1;
473 unsigned hash = Hash(this, key);
474 size_t bucknum = hash & hashmask;
477 this->entries_version++;
481 HashSetEntry *entry = & this->entries[bucknum];
483 if(EntryIsEmpty(*entry)) {
486 if(EntryIsDeleted(*entry)) {
488 } else if(EntryGetHash(this, *entry) == hash) {
489 if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
490 EntrySetDeleted(*entry);
492 this->consider_shrink = 1;
498 bucknum = (bucknum + JUMP(num_probes)) & hashmask;
499 assert(num_probes < num_buckets);
504 * Initializes hashset with a specific size
508 void init_size(HashSet *this, size_t initial_size)
513 this->entries = Alloc(initial_size);
514 SetRangeEmpty(this->entries, initial_size);
515 this->num_buckets = initial_size;
516 this->consider_shrink = 0;
517 this->num_elements = 0;
518 this->num_deleted = 0;
520 this->entries_version = 0;
523 reset_thresholds(this);
527 * Initialializes a hashset with the default size. The memory for the set has to
530 void hashset_init(HashSet *this)
532 init_size(this, HT_MIN_BUCKETS);
536 * Destroys a hashset, freeing all used memory (except the memory for the
537 * HashSet struct itself).
539 void hashset_destroy(HashSet *this)
543 this->entries = NULL;
548 * Initializes a hashset expecting expected_element size
550 void hashset_init_size(HashSet *this, size_t expected_elements)
555 if(expected_elements >= UINT_MAX/2) {
559 needed_size = (size_t) (expected_elements * (1.0 / HT_OCCUPANCY_FLT));
560 po2size = ceil_po2(needed_size);
561 init_size(this, po2size);
565 * Initializes a hashset iterator. The memory for the allocator has to be
567 * @note it is not allowed to remove or insert elements while iterating
569 void hashset_iterator_init(HashSetIterator *this, const HashSet *hashset)
571 this->current_bucket = hashset->entries - 1;
572 this->end = hashset->entries + hashset->num_buckets;
575 this->entries_version = hashset->entries_version;
580 * Returns the next value in the iterator or NULL if no value is left
582 * @note it is not allowed to remove or insert elements while iterating
584 ValueType hashset_iterator_next(HashSetIterator *this)
586 HashSetEntry *current_bucket = this->current_bucket;
587 HashSetEntry *end = this->end;
589 if(current_bucket >= end)
592 /* using hashset_insert or hashset_remove is not allowed while iterating */
593 assert(this->entries_version == this->set->entries_version);
597 } while(current_bucket < end &&
598 (EntryIsEmpty(*current_bucket) || EntryIsDeleted(*current_bucket)));
600 if(current_bucket >= end)
603 this->current_bucket = current_bucket;
604 return EntryGetValue(*current_bucket);
608 * Removes the element the iterator points to. Removing an element a second time
611 void hashset_remove_iterator(HashSet *this, const HashSetIterator *iter)
613 HashSetEntry *entry = iter->current_bucket;
615 /* iterator_next needs to have been called at least once */
616 assert(entry >= this->entries);
617 /* needs to be on a valid element */
618 assert(entry < this->entries + this->num_buckets);
620 if(EntryIsDeleted(*entry))
623 EntrySetDeleted(*entry);
625 this->consider_shrink = 1;
629 __attribute__((unused)) static int dummy;