3 * File name: ir/adt/set.c
4 * Purpose: Set --- collection of entries that are unique wrt to a key.
5 * Author: Markus Armbruster
7 * Created: 1999 by getting from fiasco
9 * Copyright: (c) 1995, 1996 Markus Armbruster
10 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
13 /* This code is derived from:
15 From: ejp@ausmelb.oz.AU (Esmond Pitt)
16 Date: Tue, 7 Mar 1989 22:06:26 GMT
17 Subject: v06i042: dynamic hashing version of hsearch(3)
18 Message-ID: <1821@basser.oz>
19 Newsgroups: comp.sources.misc
20 Sender: msgs@basser.oz
22 Posting-number: Volume 6, Issue 42
23 Submitted-By: Esmond Pitt <ejp@ausmelb.oz.AU>
24 Archive-name: dynamic-hash
26 * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
27 * Coded into C, with minor code improvements, and with hsearch(3) interface,
28 * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
30 TODO: Fix Esmond's ugly MixedCapsIdentifiers ;->
45 /* bcopy is not ISO C *
46 #define bcopy(X, Y, Z) memcpy((Y), (X), (Z))
51 # define PMANGLE(pre) pre##_pset
52 # define MANGLEP(post) pset_##post
53 # define MANGLE(pre, post) pre##pset##post
54 # define EQUAL(cmp, elt, key, siz) (!(cmp) ((elt)->entry.dptr, (key)))
57 # define PMANGLE(pre) pre##_set
58 # define MANGLEP(post) set_##post
59 # define MANGLE(pre, post) pre##set##post
60 # define EQUAL(cmp, elt, key, siz) \
61 (((elt)->entry.size == (siz)) && !(cmp) ((elt)->entry.dptr, (key), (siz)))
75 #define TOBSTACK_ID MANGLEP(tag)
79 #define SEGMENT_SIZE_SHIFT 8
80 #define SEGMENT_SIZE (1 << SEGMENT_SIZE_SHIFT)
81 #define DIRECTORY_SIZE_SHIFT 8
82 #define DIRECTORY_SIZE (1 << DIRECTORY_SIZE_SHIFT)
83 #define MAX_LOAD_FACTOR 4
86 typedef struct element {
87 struct element *chain;
88 MANGLEP (entry) entry;
93 short p; /* Next bucket to be split */
94 short maxp; /* upper bound on p during expansion */
95 int nkey; /* current # keys */
96 short nseg; /* current # segments */
97 Segment *dir[DIRECTORY_SIZE];
98 MANGLEP(cmp_fun) cmp; /* function comparing entries */
100 Element *iter_tail; /* non-NULL while iterating over elts */
106 int naccess, ncollision, ndups;
118 MANGLEP(stats) (SET *table)
122 Element *q = table->free_list;
123 while (q) { q = q->chain; ++nfree; }
125 printf (" accesses collisions keys duplicates longest wasted\n%12d%12d%12d%12d%12d%12d\n",
126 table->naccess, table->ncollision, table->nkey, table->ndups, table->max_chain_len, nfree);
130 stat_chain_len (SET *table, int chain_len)
132 table->ncollision += chain_len;
133 if (table->max_chain_len < chain_len) table->max_chain_len = chain_len;
136 # define stat_access(table) (++(table)->naccess)
137 # define stat_dup(table) (++(table)->ndups)
141 # define stat_chain_len(table, chain_len) ((void)0)
142 # define stat_access(table) ((void)0)
143 # define stat_dup(table) ((void)0)
149 const char *MANGLEP(tag);
153 MANGLEP(describe) (SET *table)
159 printf ("p=%d maxp=%d nkey=%d nseg=%d\n",
160 table->p, table->maxp, table->nkey, table->nseg);
161 for (i = 0; i < table->nseg; i++) {
163 for (j = 0; j < SEGMENT_SIZE; j++) {
167 if (collide) printf ("<%3d>", collide);
168 else printf ("table");
169 printf ("[%d][%3d]: %u %p\n", i, j, ptr->entry.hash, ptr->entry.dptr);
181 (PMANGLE(new)) (MANGLEP(cmp_fun) cmp, int nslots)
184 SET *table = xmalloc (sizeof (SET));
186 /* Adjust nslots up to next power of 2, minimum SEGMENT_SIZE */
187 assert (nslots >= 0);
188 for (i = SEGMENT_SIZE; i < nslots; i <<= 1) assert (i < (i << 1));
189 nslots = i >> SEGMENT_SIZE_SHIFT;
191 table->nseg = table->p = table->nkey = 0;
192 table->maxp = nslots << SEGMENT_SIZE_SHIFT;
194 table->iter_tail = NULL;
196 table->free_list = NULL;
198 obstack_init (&table->obst);
201 for (i = 0; i < nslots; ++i) {
202 table->dir[i] = (Segment *)obstack_alloc (&table->obst,
203 sizeof (Segment) * SEGMENT_SIZE);
205 memset (table->dir[i], 0, sizeof (Segment) * SEGMENT_SIZE);
210 table->naccess = table->ncollision = table->ndups = 0;
211 table->max_chain_len = 0;
214 table->tag = MANGLEP(tag);
221 PMANGLE(del) (SET *table)
224 MANGLEP(tag) = table->tag;
226 obstack_free (&table->obst, NULL);
232 iter_step (SET *table)
234 if (++table->iter_j >= SEGMENT_SIZE) {
236 if (++table->iter_i >= table->nseg) {
246 MANGLEP(first) (SET *table)
248 assert (!table->iter_tail);
251 while (!table->dir[table->iter_i][table->iter_j]) {
252 if (!iter_step (table)) return NULL;
254 table->iter_tail = table->dir[table->iter_i][table->iter_j];
255 assert (table->iter_tail->entry.dptr);
256 return table->iter_tail->entry.dptr;
261 MANGLEP(next) (SET *table)
263 assert (table->iter_tail);
264 table->iter_tail = table->iter_tail->chain;
265 if (!table->iter_tail) {
267 if (!iter_step (table)) return NULL;
268 } while (!table->dir[table->iter_i][table->iter_j]);
269 table->iter_tail = table->dir[table->iter_i][table->iter_j];
271 assert (table->iter_tail->entry.dptr);
272 return table->iter_tail->entry.dptr;
276 MANGLEP(break) (SET *table)
278 assert (table->iter_tail);
279 table->iter_tail = NULL;
283 static INLINE unsigned
284 Hash (SET *table, unsigned h)
288 address = h & (table->maxp - 1);
289 if (address < (unsigned)table->p)
290 address = h & ((table->maxp << 1) - 1); /* h % (2*table->maxp) */
298 return ( ++table->nkey
299 > (table->nseg << SEGMENT_SIZE_SHIFT) * MAX_LOAD_FACTOR);
304 expand_table (SET *table)
307 int OldSegmentIndex, NewSegmentIndex;
308 int OldSegmentDir, NewSegmentDir;
315 if (table->maxp + table->p < (DIRECTORY_SIZE << SEGMENT_SIZE_SHIFT)) {
316 /* Locate the bucket to be split */
317 OldSegmentDir = table->p >> SEGMENT_SIZE_SHIFT;
318 OldSegment = table->dir[OldSegmentDir];
319 OldSegmentIndex = table->p & (SEGMENT_SIZE-1);
321 /* Expand address space; if necessary create a new segment */
322 NewAddress = table->maxp + table->p;
323 NewSegmentDir = NewAddress >> SEGMENT_SIZE_SHIFT;
324 NewSegmentIndex = NewAddress & (SEGMENT_SIZE-1);
325 if (NewSegmentIndex == 0) {
326 table->dir[NewSegmentDir] =
327 (Segment *)obstack_alloc (&table->obst,
328 sizeof(Segment) * SEGMENT_SIZE);
330 NewSegment = table->dir[NewSegmentDir];
332 /* Adjust state variables */
334 if (table->p == table->maxp) {
335 table->maxp <<= 1; /* table->maxp *= 2 */
340 /* Relocate records to the new bucket */
341 Previous = &OldSegment[OldSegmentIndex];
343 LastOfNew = &NewSegment[NewSegmentIndex];
345 while (Current != NULL) {
346 if (Hash (table, Current->entry.hash) == NewAddress) {
347 /* move to new chain */
348 *LastOfNew = Current;
349 *Previous = Current->chain;
350 LastOfNew = &Current->chain;
351 Current = Current->chain;
354 /* leave on old chain */
355 Previous = &Current->chain;
356 Current = Current->chain;
364 MANGLE(_,_search) (SET *table,
370 MANGLE(_,_action) action)
373 Segment *CurrentSegment;
375 MANGLEP(cmp_fun) cmp = table->cmp;
380 assert (!table->iter_tail);
383 MANGLEP(tag) = table->tag;
387 /* Find collision chain */
388 h = Hash (table, hash);
389 SegmentIndex = h & (SEGMENT_SIZE-1);
390 CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
391 assert (CurrentSegment != NULL);
392 q = CurrentSegment[SegmentIndex];
394 /* Follow collision chain */
395 while (q && !EQUAL (cmp, q, key, size)) {
400 stat_chain_len (table, chain_len);
402 if (!q && (action != MANGLE(_,_find))) { /* not found, insert */
403 if (CurrentSegment[SegmentIndex]) stat_dup (table);
406 if (table->free_list) {
407 q = table->free_list;
408 table->free_list = table->free_list->chain;
410 q = obstack_alloc (&table->obst, sizeof (Element));
412 q->entry.dptr = (void *)key;
414 obstack_blank (&table->obst, offsetof (Element, entry.dptr));
415 if (action == _set_hinsert0)
416 obstack_grow0 (&table->obst, key, size);
418 obstack_grow (&table->obst, key, size);
419 q = obstack_finish (&table->obst);
420 q->entry.size = size;
422 q->chain = CurrentSegment[SegmentIndex];
423 q->entry.hash = hash;
424 CurrentSegment[SegmentIndex] = q;
426 if (loaded (table)) {
427 expand_table(table); /* doesn't affect q */
433 if (action == _pset_hinsert) return &q->entry;
435 if (action == _set_hinsert || action == _set_hinsert0) return &q->entry;
437 return q->entry.dptr;
444 pset_remove (SET *table, const void *key, unsigned hash)
447 Segment *CurrentSegment;
449 pset_cmp_fun cmp = table->cmp;
454 assert (table && !table->iter_tail);
457 /* Find collision chain */
458 h = Hash (table, hash);
459 SegmentIndex = h & (SEGMENT_SIZE-1);
460 CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
461 assert (CurrentSegment != NULL);
462 p = &CurrentSegment[SegmentIndex];
464 /* Follow collision chain */
465 while (!EQUAL (cmp, *p, key, size)) {
471 stat_chain_len (table, chain_len);
475 q->chain = table->free_list;
476 table->free_list = q;
478 return q->entry.dptr;
483 (pset_find) (SET *se, const void *key, unsigned hash)
485 return pset_find (se, key, hash);
490 (pset_insert) (SET *se, const void *key, unsigned hash)
492 return pset_insert (se, key, hash);
497 (pset_hinsert) (SET *se, const void *key, unsigned hash)
499 return pset_hinsert (se, key, hash);
505 (set_find) (set *se, const void *key, size_t size, unsigned hash)
507 return set_find (se, key, size, hash);
512 (set_insert) (set *se, const void *key, size_t size, unsigned hash)
514 return set_insert (se, key, size, hash);
519 (set_hinsert) (set *se, const void *key, size_t size, unsigned hash)
521 return set_hinsert (se, key, size, hash);