3 * File name: ir/adt/set.c
4 * Purpose: Set --- collection of entries that are unique wrt to a key.
5 * Author: Markus Armbruster
7 * Created: 1999 by getting from fiasco
9 * Copyright: (c) 1995, 1996 Markus Armbruster
10 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
13 /* This code is derived from:
15 From: ejp@ausmelb.oz.AU (Esmond Pitt)
16 Date: Tue, 7 Mar 1989 22:06:26 GMT
17 Subject: v06i042: dynamic hashing version of hsearch(3)
18 Message-ID: <1821@basser.oz>
19 Newsgroups: comp.sources.misc
20 Sender: msgs@basser.oz
22 Posting-number: Volume 6, Issue 42
23 Submitted-By: Esmond Pitt <ejp@ausmelb.oz.AU>
24 Archive-name: dynamic-hash
26 * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
27 * Coded into C, with minor code improvements, and with hsearch(3) interface,
28 * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
30 TODO: Fix Esmond's ugly MixedCapsIdentifiers ;->
39 /* bcopy is not ISO C *
40 #define bcopy(X, Y, Z) memcpy((Y), (X), (Z))
45 # define PMANGLE(pre) pre##_pset
46 # define MANGLEP(post) pset_##post
47 # define MANGLE(pre, post) pre##pset##post
48 # define EQUAL(cmp, elt, key, siz) (!(cmp) ((elt)->entry.dptr, (key)))
51 # define PMANGLE(pre) pre##_set
52 # define MANGLEP(post) set_##post
53 # define MANGLE(pre, post) pre##set##post
54 # define EQUAL(cmp, elt, key, siz) \
55 (((elt)->entry.size == (siz)) && !(cmp) ((elt)->entry.dptr, (key), (siz)))
70 #define TOBSTACK_ID MANGLEP(tag)
74 #define SEGMENT_SIZE_SHIFT 8
75 #define SEGMENT_SIZE (1 << SEGMENT_SIZE_SHIFT)
76 #define DIRECTORY_SIZE_SHIFT 8
77 #define DIRECTORY_SIZE (1 << DIRECTORY_SIZE_SHIFT)
78 #define MAX_LOAD_FACTOR 4
81 typedef struct element {
82 struct element *chain; /**< for chaining Elements */
83 MANGLEP (entry) entry;
88 unsigned p; /**< Next bucket to be split */
89 unsigned maxp; /**< upper bound on p during expansion */
90 unsigned nkey; /**< current # keys */
91 unsigned nseg; /**< current # segments */
92 Segment *dir[DIRECTORY_SIZE];
93 MANGLEP(cmp_fun) cmp; /**< function comparing entries */
94 unsigned iter_i, iter_j;
95 Element *iter_tail; /**< non-NULL while iterating over elts */
97 Element *free_list; /**< list of free Elements */
99 struct obstack obst; /**< obstack for allocation all data */
101 int naccess, ncollision, ndups;
105 const char *tag; /**< an optionally tag for distinguishing sets */
113 MANGLEP(stats) (SET *table)
117 Element *q = table->free_list;
118 while (q) { q = q->chain; ++nfree; }
120 printf (" accesses collisions keys duplicates longest wasted\n%12d%12d%12d%12d%12d%12d\n",
121 table->naccess, table->ncollision, table->nkey, table->ndups, table->max_chain_len, nfree);
125 stat_chain_len (SET *table, int chain_len)
127 table->ncollision += chain_len;
128 if (table->max_chain_len < chain_len) table->max_chain_len = chain_len;
131 # define stat_access(table) (++(table)->naccess)
132 # define stat_dup(table) (++(table)->ndups)
136 # define stat_chain_len(table, chain_len) ((void)0)
137 # define stat_access(table) ((void)0)
138 # define stat_dup(table) ((void)0)
144 const char *MANGLEP(tag);
148 MANGLEP(describe) (SET *table)
150 unsigned i, j, collide;
154 printf ("p=%u maxp=%u nkey=%u nseg=%u\n",
155 table->p, table->maxp, table->nkey, table->nseg);
156 for (i = 0; i < table->nseg; i++) {
158 for (j = 0; j < SEGMENT_SIZE; j++) {
162 if (collide) printf ("<%3d>", collide);
163 else printf ("table");
164 printf ("[%d][%3d]: %u %p\n", i, j, ptr->entry.hash, (void *)ptr->entry.dptr);
171 MANGLEP(stats)(table);
179 (PMANGLE(new)) (MANGLEP(cmp_fun) cmp, int nslots)
182 SET *table = xmalloc(sizeof(*table));
184 if (nslots > SEGMENT_SIZE * DIRECTORY_SIZE)
185 nslots = DIRECTORY_SIZE;
187 assert (nslots >= 0);
188 /* Adjust nslots up to next power of 2, minimum SEGMENT_SIZE */
189 for (i = SEGMENT_SIZE; i < nslots; i <<= 1);
190 nslots = i >> SEGMENT_SIZE_SHIFT;
193 table->nseg = table->p = table->nkey = 0;
194 table->maxp = nslots << SEGMENT_SIZE_SHIFT;
196 table->iter_tail = NULL;
198 table->free_list = NULL;
200 obstack_init (&table->obst);
203 for (i = 0; i < nslots; ++i) {
204 table->dir[i] = (Segment *)obstack_alloc (&table->obst,
205 sizeof (Segment) * SEGMENT_SIZE);
207 memset(table->dir[i], 0, sizeof (Segment) * SEGMENT_SIZE);
212 table->naccess = table->ncollision = table->ndups = 0;
213 table->max_chain_len = 0;
216 table->tag = MANGLEP(tag);
223 PMANGLE(del) (SET *table)
226 MANGLEP(tag) = table->tag;
228 obstack_free (&table->obst, NULL);
233 MANGLEP(count) (SET *table)
239 * do one iteration step, return 1
240 * if still data in the set, 0 else
243 iter_step (SET *table)
245 if (++table->iter_j >= SEGMENT_SIZE) {
247 if (++table->iter_i >= table->nseg) {
256 * finds the first entry in the table
259 MANGLEP(first) (SET *table)
261 assert (!table->iter_tail);
264 while (!table->dir[table->iter_i][table->iter_j]) {
265 if (!iter_step (table)) return NULL;
267 table->iter_tail = table->dir[table->iter_i][table->iter_j];
268 assert (table->iter_tail->entry.dptr);
269 return table->iter_tail->entry.dptr;
273 * returns next entry in the table
276 MANGLEP(next) (SET *table)
278 if (!table->iter_tail)
281 /* follow collision chain */
282 table->iter_tail = table->iter_tail->chain;
283 if (!table->iter_tail) {
284 /* go to next segment */
286 if (!iter_step (table)) return NULL;
287 } while (!table->dir[table->iter_i][table->iter_j]);
288 table->iter_tail = table->dir[table->iter_i][table->iter_j];
290 assert (table->iter_tail->entry.dptr);
291 return table->iter_tail->entry.dptr;
295 MANGLEP(break) (SET *table)
297 table->iter_tail = NULL;
301 * limit the hash value
303 static INLINE unsigned
304 Hash (SET *table, unsigned h)
307 address = h & (table->maxp - 1); /* h % table->maxp */
308 if (address < (unsigned)table->p)
309 address = h & ((table->maxp << 1) - 1); /* h % (2*table->maxp) */
314 * returns non-zero if the number of elements in
315 * the set is greater then number of segments * MAX_LOAD_FACTOR
320 return ( ++table->nkey
321 > (table->nseg << SEGMENT_SIZE_SHIFT) * MAX_LOAD_FACTOR);
325 * expand the hash-table: the algorithm is split, so on every
326 * insert, only ONE segment is rehashed!
328 * table->p contains the current segment to split
329 * after all segments were split, table->p is set to zero and
330 * table->maxp is duplicated.
333 expand_table (SET *table)
336 int OldSegmentIndex, NewSegmentIndex;
337 int OldSegmentDir, NewSegmentDir;
344 if (table->maxp + table->p < (DIRECTORY_SIZE << SEGMENT_SIZE_SHIFT)) {
345 /* Locate the bucket to be split */
346 OldSegmentDir = table->p >> SEGMENT_SIZE_SHIFT;
347 OldSegment = table->dir[OldSegmentDir];
348 OldSegmentIndex = table->p & (SEGMENT_SIZE-1);
350 /* Expand address space; if necessary create a new segment */
351 NewAddress = table->maxp + table->p;
352 NewSegmentDir = NewAddress >> SEGMENT_SIZE_SHIFT;
353 NewSegmentIndex = NewAddress & (SEGMENT_SIZE-1);
354 if (NewSegmentIndex == 0) {
355 table->dir[NewSegmentDir] =
356 (Segment *)obstack_alloc (&table->obst,
357 sizeof(Segment) * SEGMENT_SIZE);
358 memset(table->dir[NewSegmentDir], 0, sizeof(Segment) * SEGMENT_SIZE);
361 NewSegment = table->dir[NewSegmentDir];
363 /* Adjust state variables */
365 if (table->p == table->maxp) {
366 table->maxp <<= 1; /* table->maxp *= 2 */
370 /* Relocate records to the new bucket */
371 Previous = &OldSegment[OldSegmentIndex];
373 LastOfNew = &NewSegment[NewSegmentIndex];
375 while (Current != NULL) {
376 if (Hash (table, Current->entry.hash) == NewAddress) {
377 /* move to new chain */
378 *LastOfNew = Current;
379 *Previous = Current->chain;
380 LastOfNew = &Current->chain;
381 Current = Current->chain;
384 /* leave on old chain */
385 Previous = &Current->chain;
386 Current = Current->chain;
394 MANGLE(_,_search) (SET *table,
400 MANGLE(_,_action) action)
403 Segment *CurrentSegment;
405 MANGLEP(cmp_fun) cmp = table->cmp;
412 MANGLEP(tag) = table->tag;
416 /* Find collision chain */
417 h = Hash (table, hash);
418 SegmentIndex = h & (SEGMENT_SIZE-1);
419 CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
420 assert (CurrentSegment != NULL);
421 q = CurrentSegment[SegmentIndex];
423 /* Follow collision chain */
424 while (q && !EQUAL (cmp, q, key, size)) {
429 stat_chain_len (table, chain_len);
431 if (!q && (action != MANGLE(_,_find))) { /* not found, insert */
432 assert (!table->iter_tail && "insert an element into a set that is iterated");
434 if (CurrentSegment[SegmentIndex]) stat_dup (table);
437 if (table->free_list) {
438 q = table->free_list;
439 table->free_list = table->free_list->chain;
441 q = obstack_alloc (&table->obst, sizeof (Element));
443 q->entry.dptr = (void *)key;
445 obstack_blank (&table->obst, offsetof (Element, entry.dptr));
446 if (action == _set_hinsert0)
447 obstack_grow0 (&table->obst, key, size);
449 obstack_grow (&table->obst, key, size);
450 q = obstack_finish (&table->obst);
451 q->entry.size = size;
453 q->chain = CurrentSegment[SegmentIndex];
454 q->entry.hash = hash;
455 CurrentSegment[SegmentIndex] = q;
457 if (loaded (table)) {
458 expand_table(table); /* doesn't affect q */
464 if (action == _pset_hinsert) return &q->entry;
466 if (action == _set_hinsert || action == _set_hinsert0) return &q->entry;
468 return q->entry.dptr;
474 int pset_default_ptr_cmp(const void *x, const void *y)
480 pset_remove (SET *table, const void *key, unsigned hash)
483 Segment *CurrentSegment;
485 pset_cmp_fun cmp = table->cmp;
490 assert (table && !table->iter_tail);
493 /* Find collision chain */
494 h = Hash (table, hash);
495 SegmentIndex = h & (SEGMENT_SIZE-1);
496 CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
497 assert (CurrentSegment != NULL);
498 p = &CurrentSegment[SegmentIndex];
500 /* Follow collision chain */
501 while (!EQUAL (cmp, *p, key, size)) {
507 stat_chain_len (table, chain_len);
511 if (q == table->iter_tail) {
512 /* removing current element */
513 table->iter_tail = q->chain;
514 if (!table->iter_tail) {
515 /* go to next segment */
517 if (!iter_step (table))
519 } while (!table->dir[table->iter_i][table->iter_j]);
520 table->iter_tail = table->dir[table->iter_i][table->iter_j];
525 q->chain = table->free_list;
526 table->free_list = q;
529 return q->entry.dptr;
534 (pset_find) (SET *se, const void *key, unsigned hash)
536 return pset_find (se, key, hash);
541 (pset_insert) (SET *se, const void *key, unsigned hash)
543 return pset_insert (se, key, hash);
548 (pset_hinsert) (SET *se, const void *key, unsigned hash)
550 return pset_hinsert (se, key, hash);
553 void pset_insert_pset_ptr(pset *target, pset *src) {
555 for (elt = pset_first(src); elt; elt = pset_next(src)) {
556 pset_insert_ptr(target, elt);
563 (set_find) (set *se, const void *key, size_t size, unsigned hash)
565 return set_find (se, key, size, hash);
570 (set_insert) (set *se, const void *key, size_t size, unsigned hash)
572 return set_insert (se, key, size, hash);
577 (set_hinsert) (set *se, const void *key, size_t size, unsigned hash)
579 return set_hinsert (se, key, size, hash);