1 /* Set --- collection of entries that are unique wrt to a key.
2 Copyright (C) 1995, 1996 Markus Armbruster */
4 /* This code is derived from:
6 From: ejp@ausmelb.oz.AU (Esmond Pitt)
7 Date: Tue, 7 Mar 1989 22:06:26 GMT
8 Subject: v06i042: dynamic hashing version of hsearch(3)
9 Message-ID: <1821@basser.oz>
10 Newsgroups: comp.sources.misc
11 Sender: msgs@basser.oz
13 Posting-number: Volume 6, Issue 42
14 Submitted-By: Esmond Pitt <ejp@ausmelb.oz.AU>
15 Archive-name: dynamic-hash
17 ** Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
18 ** Coded into C, with minor code improvements, and with hsearch(3) interface,
19 ** by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
21 TODO: Fix Esmond's ugly MixedCapsIdentifiers ;->
36 /* bcopy is not ISO C */
37 #define bcopy(X, Y, Z) memcpy((Y), (X), (Z))
41 # define PMANGLE(pre) pre##_pset
42 # define MANGLEP(post) pset_##post
43 # define MANGLE(pre, post) pre##pset##post
44 # define EQUAL(cmp, elt, key, siz) (!(cmp) ((elt)->entry.dptr, (key)))
47 # define PMANGLE(pre) pre##_set
48 # define MANGLEP(post) set_##post
49 # define MANGLE(pre, post) pre##set##post
50 # define EQUAL(cmp, elt, key, siz) \
51 (((elt)->entry.size == (siz)) && !(cmp) ((elt)->entry.dptr, (key), (siz)))
65 #define TOBSTACK_ID MANGLEP(tag)
69 #define SEGMENT_SIZE_SHIFT 8
70 #define SEGMENT_SIZE (1 << SEGMENT_SIZE_SHIFT)
71 #define DIRECTORY_SIZE_SHIFT 8
72 #define DIRECTORY_SIZE (1 << DIRECTORY_SIZE_SHIFT)
73 #define MAX_LOAD_FACTOR 4
76 typedef struct element {
77 struct element *chain;
78 MANGLEP (entry) entry;
83 short p; /* Next bucket to be split */
84 short maxp; /* upper bound on p during expansion */
85 int nkey; /* current # keys */
86 short nseg; /* current # segments */
87 Segment *dir[DIRECTORY_SIZE];
88 MANGLEP(cmp_fun) cmp; /* function comparing entries */
90 Element *iter_tail; /* non-NULL while iterating over elts */
96 int naccess, ncollision, ndups;
108 MANGLEP(stats) (SET *table)
112 Element *q = table->free_list;
113 while (q) { q = q->chain; ++nfree; }
115 printf (" accesses collisions keys duplicates longest wasted\n%12d%12d%12d%12d%12d%12d\n",
116 table->naccess, table->ncollision, table->nkey, table->ndups, table->max_chain_len, nfree);
120 stat_chain_len (SET *table, int chain_len)
122 table->ncollision += chain_len;
123 if (table->max_chain_len < chain_len) table->max_chain_len = chain_len;
126 # define stat_access(table) (++(table)->naccess)
127 # define stat_dup(table) (++(table)->ndups)
131 # define stat_chain_len(table, chain_len) ((void)0)
132 # define stat_access(table) ((void)0)
133 # define stat_dup(table) ((void)0)
139 const char *MANGLEP(tag);
143 MANGLEP(describe) (SET *table)
149 printf ("p=%d maxp=%d nkey=%d nseg=%d\n",
150 table->p, table->maxp, table->nkey, table->nseg);
151 for (i = 0; i < table->nseg; i++) {
153 for (j = 0; j < SEGMENT_SIZE; j++) {
157 if (collide) printf ("<%3d>", collide);
158 else printf ("table");
159 printf ("[%d][%3d]: %u %p\n", i, j, ptr->entry.hash, ptr->entry.dptr);
171 (PMANGLE(new)) (MANGLEP(cmp_fun) cmp, int nslots)
174 SET *table = xmalloc (sizeof (SET));
176 /* Adjust nslots up to next power of 2, minimum SEGMENT_SIZE */
177 assert (nslots >= 0);
178 for (i = SEGMENT_SIZE; i < nslots; i <<= 1) assert (i < (i << 1));
179 nslots = i >> SEGMENT_SIZE_SHIFT;
181 table->nseg = table->p = table->nkey = 0;
182 table->maxp = nslots << SEGMENT_SIZE_SHIFT;
184 table->iter_tail = NULL;
186 table->free_list = NULL;
188 obstack_init (&table->obst);
191 for (i = 0; i < nslots; ++i) {
192 table->dir[i] = (Segment *)obstack_alloc (&table->obst,
193 sizeof (Segment) * SEGMENT_SIZE);
195 memset (table->dir[i], 0, sizeof (Segment) * SEGMENT_SIZE);
200 table->naccess = table->ncollision = table->ndups = 0;
201 table->max_chain_len = 0;
204 table->tag = MANGLEP(tag);
211 PMANGLE(del) (SET *table)
214 MANGLEP(tag) = table->tag;
216 obstack_free (&table->obst, NULL);
222 iter_step (SET *table)
224 if (++table->iter_j >= SEGMENT_SIZE) {
226 if (++table->iter_i >= table->nseg) {
236 MANGLEP(first) (SET *table)
238 assert (!table->iter_tail);
241 while (!table->dir[table->iter_i][table->iter_j]) {
242 if (!iter_step (table)) return NULL;
244 table->iter_tail = table->dir[table->iter_i][table->iter_j];
245 assert (table->iter_tail->entry.dptr);
246 return table->iter_tail->entry.dptr;
251 MANGLEP(next) (SET *table)
253 assert (table->iter_tail);
254 table->iter_tail = table->iter_tail->chain;
255 if (!table->iter_tail) {
257 if (!iter_step (table)) return NULL;
258 } while (!table->dir[table->iter_i][table->iter_j]);
259 table->iter_tail = table->dir[table->iter_i][table->iter_j];
261 assert (table->iter_tail->entry.dptr);
262 return table->iter_tail->entry.dptr;
266 MANGLEP(break) (SET *table)
268 assert (table->iter_tail);
269 table->iter_tail = NULL;
273 static INLINE unsigned
274 Hash (SET *table, unsigned h)
278 address = h & (table->maxp - 1);
279 if (address < (unsigned)table->p)
280 address = h & ((table->maxp << 1) - 1); /* h % (2*table->maxp) */
288 return ( ++table->nkey
289 > (table->nseg << SEGMENT_SIZE_SHIFT) * MAX_LOAD_FACTOR);
294 expand_table (SET *table)
297 int OldSegmentIndex, NewSegmentIndex;
298 int OldSegmentDir, NewSegmentDir;
305 if (table->maxp + table->p < (DIRECTORY_SIZE << SEGMENT_SIZE_SHIFT)) {
306 /* Locate the bucket to be split */
307 OldSegmentDir = table->p >> SEGMENT_SIZE_SHIFT;
308 OldSegment = table->dir[OldSegmentDir];
309 OldSegmentIndex = table->p & (SEGMENT_SIZE-1);
311 /* Expand address space; if necessary create a new segment */
312 NewAddress = table->maxp + table->p;
313 NewSegmentDir = NewAddress >> SEGMENT_SIZE_SHIFT;
314 NewSegmentIndex = NewAddress & (SEGMENT_SIZE-1);
315 if (NewSegmentIndex == 0) {
316 table->dir[NewSegmentDir] =
317 (Segment *)obstack_alloc (&table->obst,
318 sizeof(Segment) * SEGMENT_SIZE);
320 NewSegment = table->dir[NewSegmentDir];
322 /* Adjust state variables */
324 if (table->p == table->maxp) {
325 table->maxp <<= 1; /* table->maxp *= 2 */
330 /* Relocate records to the new bucket */
331 Previous = &OldSegment[OldSegmentIndex];
333 LastOfNew = &NewSegment[NewSegmentIndex];
335 while (Current != NULL) {
336 if (Hash (table, Current->entry.hash) == NewAddress) {
337 /* move to new chain */
338 *LastOfNew = Current;
339 *Previous = Current->chain;
340 LastOfNew = &Current->chain;
341 Current = Current->chain;
344 /* leave on old chain */
345 Previous = &Current->chain;
346 Current = Current->chain;
354 MANGLE(_,_search) (SET *table,
360 MANGLE(_,_action) action)
363 Segment *CurrentSegment;
365 MANGLEP(cmp_fun) cmp = table->cmp;
370 assert (!table->iter_tail);
373 MANGLEP(tag) = table->tag;
377 /* Find collision chain */
378 h = Hash (table, hash);
379 SegmentIndex = h & (SEGMENT_SIZE-1);
380 CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
381 assert (CurrentSegment != NULL);
382 q = CurrentSegment[SegmentIndex];
384 /* Follow collision chain */
385 while (q && !EQUAL (cmp, q, key, size)) {
390 stat_chain_len (table, chain_len);
392 if (!q && (action != MANGLE(_,_find))) { /* not found, insert */
393 if (CurrentSegment[SegmentIndex]) stat_dup (table);
396 if (table->free_list) {
397 q = table->free_list;
398 table->free_list = table->free_list->chain;
400 q = obstack_alloc (&table->obst, sizeof (Element));
402 q->entry.dptr = (void *)key;
404 obstack_blank (&table->obst, offsetof (Element, entry.dptr));
405 obstack_grow (&table->obst, key, size);
406 q = obstack_finish (&table->obst);
407 q->entry.size = size;
409 q->chain = CurrentSegment[SegmentIndex];
410 q->entry.hash = hash;
411 CurrentSegment[SegmentIndex] = q;
413 if (loaded (table)) {
414 expand_table(table); /* doesn't affect q */
419 if (action == MANGLE(_,_hinsert)) return &q->entry;
420 return q->entry.dptr;
427 pset_remove (SET *table, const void *key, unsigned hash)
430 Segment *CurrentSegment;
432 pset_cmp_fun cmp = table->cmp;
437 assert (table && !table->iter_tail);
440 /* Find collision chain */
441 h = Hash (table, hash);
442 SegmentIndex = h & (SEGMENT_SIZE-1);
443 CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
444 assert (CurrentSegment != NULL);
445 p = &CurrentSegment[SegmentIndex];
447 /* Follow collision chain */
448 while (!EQUAL (cmp, *p, key, size)) {
454 stat_chain_len (table, chain_len);
458 q->chain = table->free_list;
459 table->free_list = q;
461 return q->entry.dptr;
466 (pset_find) (SET *se, const void *key, unsigned hash)
468 return pset_find (se, key, hash);
473 (pset_insert) (SET *se, const void *key, unsigned hash)
475 return pset_insert (se, key, hash);
480 (pset_hinsert) (SET *se, const void *key, unsigned hash)
482 return pset_hinsert (se, key, hash);
488 (set_find) (set *se, const void *key, size_t size, unsigned hash)
490 return set_find (se, key, size, hash);
495 (set_insert) (set *se, const void *key, size_t size, unsigned hash)
497 return set_insert (se, key, size, hash);
502 (set_hinsert) (set *se, const void *key, size_t size, unsigned hash)
504 return set_hinsert (se, key, size, hash);