1 /* Set --- collection of entries that are unique wrt to a key.
2 Copyright (C) 1995, 1996 Markus Armbruster */
4 /* This code is derived from:
6 From: ejp@ausmelb.oz.AU (Esmond Pitt)
7 Date: Tue, 7 Mar 1989 22:06:26 GMT
8 Subject: v06i042: dynamic hashing version of hsearch(3)
9 Message-ID: <1821@basser.oz>
10 Newsgroups: comp.sources.misc
11 Sender: msgs@basser.oz
13 Posting-number: Volume 6, Issue 42
14 Submitted-By: Esmond Pitt <ejp@ausmelb.oz.AU>
15 Archive-name: dynamic-hash
17 ** Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
18 ** Coded into C, with minor code improvements, and with hsearch(3) interface,
19 ** by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
21 TODO: Fix Esmond's ugly MixedCapsIdentifiers ;->
30 # define PMANGLE(pre) pre##_pset
31 # define MANGLEP(post) pset_##post
32 # define MANGLE(pre, post) pre##pset##post
33 # define EQUAL(cmp, elt, key, siz) (!(cmp) ((elt)->entry.dptr, (key)))
36 # define PMANGLE(pre) pre##_set
37 # define MANGLEP(post) set_##post
38 # define MANGLE(pre, post) pre##set##post
39 # define EQUAL(cmp, elt, key, siz) \
40 (((elt)->entry.size == (siz)) && !(cmp) ((elt)->entry.dptr, (key), (siz)))
54 #define TOBSTACK_ID MANGLEP(tag)
58 #define SEGMENT_SIZE_SHIFT 8
59 #define SEGMENT_SIZE (1 << SEGMENT_SIZE_SHIFT)
60 #define DIRECTORY_SIZE_SHIFT 8
61 #define DIRECTORY_SIZE (1 << DIRECTORY_SIZE_SHIFT)
62 #define MAX_LOAD_FACTOR 4
65 typedef struct element {
66 struct element *chain;
67 MANGLEP (entry) entry;
72 short p; /* Next bucket to be split */
73 short maxp; /* upper bound on p during expansion */
74 int nkey; /* current # keys */
75 short nseg; /* current # segments */
76 Segment *dir[DIRECTORY_SIZE];
77 MANGLEP(cmp_fun) cmp; /* function comparing entries */
79 Element *iter_tail; /* non-NULL while iterating over elts */
85 int naccess, ncollision, ndups;
97 MANGLEP(stats) (SET *table)
101 Element *q = table->free_list;
102 while (q) { q = q->chain; ++nfree; }
104 printf (" accesses collisions keys duplicates longest wasted\n%12d%12d%12d%12d%12d%12d\n",
105 table->naccess, table->ncollision, table->nkey, table->ndups, table->max_chain_len, nfree);
109 stat_chain_len (SET *table, int chain_len)
111 table->ncollision += chain_len;
112 if (table->max_chain_len < chain_len) table->max_chain_len = chain_len;
115 # define stat_access(table) (++(table)->naccess)
116 # define stat_dup(table) (++(table)->ndups)
120 # define stat_chain_len(table, chain_len) ((void)0)
121 # define stat_access(table) ((void)0)
122 # define stat_dup(table) ((void)0)
128 const char *MANGLEP(tag);
132 MANGLEP(describe) (SET *table)
138 printf ("p=%d maxp=%d nkey=%d nseg=%d\n",
139 table->p, table->maxp, table->nkey, table->nseg);
140 for (i = 0; i < table->nseg; i++) {
142 for (j = 0; j < SEGMENT_SIZE; j++) {
146 if (collide) printf ("<%3d>", collide);
147 else printf ("table");
148 printf ("[%d][%3d]: %u %p\n", i, j, ptr->entry.hash, ptr->entry.dptr);
160 (PMANGLE(new)) (MANGLEP(cmp_fun) cmp, int nslots)
163 SET *table = xmalloc (sizeof (SET));
165 /* Adjust nslots up to next power of 2, minimum SEGMENT_SIZE */
166 assert (nslots >= 0);
167 for (i = SEGMENT_SIZE; i < nslots; i <<= 1) assert (i < (i << 1));
168 nslots = i >> SEGMENT_SIZE_SHIFT;
170 table->nseg = table->p = table->nkey = 0;
171 table->maxp = nslots << SEGMENT_SIZE_SHIFT;
173 table->iter_tail = NULL;
175 table->free_list = NULL;
177 obstack_init (&table->obst);
180 for (i = 0; i < nslots; ++i) {
181 table->dir[i] = (Segment *)obstack_alloc (&table->obst,
182 sizeof (Segment) * SEGMENT_SIZE);
184 memset (table->dir[i], 0, sizeof (Segment) * SEGMENT_SIZE);
189 table->naccess = table->ncollision = table->ndups = 0;
190 table->max_chain_len = 0;
193 table->tag = MANGLEP(tag);
200 PMANGLE(del) (SET *table)
203 MANGLEP(tag) = table->tag;
205 obstack_free (&table->obst, NULL);
211 iter_step (SET *table)
213 if (++table->iter_j >= SEGMENT_SIZE) {
215 if (++table->iter_i >= table->nseg) {
225 MANGLEP(first) (SET *table)
227 assert (!table->iter_tail);
230 while (!table->dir[table->iter_i][table->iter_j]) {
231 if (!iter_step (table)) return NULL;
233 table->iter_tail = table->dir[table->iter_i][table->iter_j];
234 assert (table->iter_tail->entry.dptr);
235 return table->iter_tail->entry.dptr;
240 MANGLEP(next) (SET *table)
242 assert (table->iter_tail);
243 table->iter_tail = table->iter_tail->chain;
244 if (!table->iter_tail) {
246 if (!iter_step (table)) return NULL;
247 } while (!table->dir[table->iter_i][table->iter_j]);
248 table->iter_tail = table->dir[table->iter_i][table->iter_j];
250 assert (table->iter_tail->entry.dptr);
251 return table->iter_tail->entry.dptr;
255 MANGLEP(break) (SET *table)
257 assert (table->iter_tail);
258 table->iter_tail = NULL;
262 static inline unsigned
263 Hash (SET *table, unsigned h)
267 address = h & (table->maxp - 1);
268 if (address < (unsigned)table->p)
269 address = h & ((table->maxp << 1) - 1); /* h % (2*table->maxp) */
277 return ( ++table->nkey
278 > (table->nseg << SEGMENT_SIZE_SHIFT) * MAX_LOAD_FACTOR);
283 expand_table (SET *table)
286 int OldSegmentIndex, NewSegmentIndex;
287 int OldSegmentDir, NewSegmentDir;
294 if (table->maxp + table->p < (DIRECTORY_SIZE << SEGMENT_SIZE_SHIFT)) {
295 /* Locate the bucket to be split */
296 OldSegmentDir = table->p >> SEGMENT_SIZE_SHIFT;
297 OldSegment = table->dir[OldSegmentDir];
298 OldSegmentIndex = table->p & (SEGMENT_SIZE-1);
300 /* Expand address space; if necessary create a new segment */
301 NewAddress = table->maxp + table->p;
302 NewSegmentDir = NewAddress >> SEGMENT_SIZE_SHIFT;
303 NewSegmentIndex = NewAddress & (SEGMENT_SIZE-1);
304 if (NewSegmentIndex == 0) {
305 table->dir[NewSegmentDir] =
306 (Segment *)obstack_alloc (&table->obst,
307 sizeof(Segment) * SEGMENT_SIZE);
309 NewSegment = table->dir[NewSegmentDir];
311 /* Adjust state variables */
313 if (table->p == table->maxp) {
314 table->maxp <<= 1; /* table->maxp *= 2 */
319 /* Relocate records to the new bucket */
320 Previous = &OldSegment[OldSegmentIndex];
322 LastOfNew = &NewSegment[NewSegmentIndex];
324 while (Current != NULL) {
325 if (Hash (table, Current->entry.hash) == NewAddress) {
326 /* move to new chain */
327 *LastOfNew = Current;
328 *Previous = Current->chain;
329 LastOfNew = &Current->chain;
330 Current = Current->chain;
333 /* leave on old chain */
334 Previous = &Current->chain;
335 Current = Current->chain;
343 MANGLE(_,_search) (SET *table,
349 MANGLE(_,_action) action)
352 Segment *CurrentSegment;
354 MANGLEP(cmp_fun) cmp = table->cmp;
359 assert (!table->iter_tail);
362 MANGLEP(tag) = table->tag;
366 /* Find collision chain */
367 h = Hash (table, hash);
368 SegmentIndex = h & (SEGMENT_SIZE-1);
369 CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
370 assert (CurrentSegment != NULL);
371 q = CurrentSegment[SegmentIndex];
373 /* Follow collision chain */
374 while (q && !EQUAL (cmp, q, key, size)) {
379 stat_chain_len (table, chain_len);
381 if (!q && (action != MANGLE(_,_find))) { /* not found, insert */
382 if (CurrentSegment[SegmentIndex]) stat_dup (table);
385 if (table->free_list) {
386 q = table->free_list;
387 table->free_list = table->free_list->chain;
389 q = obstack_alloc (&table->obst, sizeof (Element));
391 q->entry.dptr = (void *)key;
393 obstack_blank (&table->obst, offsetof (Element, entry.dptr));
394 obstack_grow (&table->obst, key, size);
395 q = obstack_finish (&table->obst);
396 q->entry.size = size;
398 q->chain = CurrentSegment[SegmentIndex];
399 q->entry.hash = hash;
400 CurrentSegment[SegmentIndex] = q;
402 if (loaded (table)) {
403 expand_table(table); /* doesn't affect q */
408 if (action == MANGLE(_,_hinsert)) return &q->entry;
409 return q->entry.dptr;
416 pset_remove (SET *table, const void *key, unsigned hash)
419 Segment *CurrentSegment;
421 pset_cmp_fun cmp = table->cmp;
426 assert (table && !table->iter_tail);
429 /* Find collision chain */
430 h = Hash (table, hash);
431 SegmentIndex = h & (SEGMENT_SIZE-1);
432 CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
433 assert (CurrentSegment != NULL);
434 p = &CurrentSegment[SegmentIndex];
436 /* Follow collision chain */
437 while (!EQUAL (cmp, *p, key, size)) {
443 stat_chain_len (table, chain_len);
447 q->chain = table->free_list;
448 table->free_list = q;
450 return q->entry.dptr;
455 (pset_find) (SET *se, const void *key, unsigned hash)
457 return pset_find (se, key, hash);
462 (pset_insert) (SET *se, const void *key, unsigned hash)
464 return pset_insert (se, key, hash);
469 (pset_hinsert) (SET *se, const void *key, unsigned hash)
471 return pset_hinsert (se, key, hash);
477 (set_find) (set *se, const void *key, size_t size, unsigned hash)
479 return set_find (se, key, size, hash);
484 (set_insert) (set *se, const void *key, size_t size, unsigned hash)
486 return set_insert (se, key, size, hash);
491 (set_hinsert) (set *se, const void *key, size_t size, unsigned hash)
493 return set_hinsert (se, key, size, hash);