1806437fe0100d8d63ddc8a877b78ea847abb7ab
[libfirm] / ir / adt / set.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       implementation of set
23  * @author      Markus Armbruster
24  * @version     $Id$
25  */
26
27 /*  This code is derived from:
28
29     From: ejp@ausmelb.oz.AU (Esmond Pitt)
30     Date: Tue, 7 Mar 1989 22:06:26 GMT
31     Subject: v06i042: dynamic hashing version of hsearch(3)
32     Message-ID: <1821@basser.oz>
33     Newsgroups: comp.sources.misc
34     Sender: msgs@basser.oz
35
36     Posting-number: Volume 6, Issue 42
37     Submitted-By: Esmond Pitt <ejp@ausmelb.oz.AU>
38     Archive-name: dynamic-hash
39
40     * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
41     * Coded into C, with minor code improvements, and with hsearch(3) interface,
42     * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
43
44     TODO: Fix Esmond's ugly MixedCapsIdentifiers ;->
45  */
46 #include "config.h"
47
48 #ifdef PSET
49 # define SET pset
50 # define PMANGLE(pre) pre##_pset
51 # define MANGLEP(post) pset_##post
52 # define MANGLE(pre, post) pre##pset##post
53 # define EQUAL(cmp, elt, key, siz) (!(cmp) ((elt)->entry.dptr, (key)))
54 #else
55 # define SET set
56 # define PMANGLE(pre) pre##_set
57 # define MANGLEP(post) set_##post
58 # define MANGLE(pre, post) pre##set##post
59 # define EQUAL(cmp, elt, key, siz) \
60     (((elt)->entry.size == (siz)) && !(cmp) ((elt)->entry.dptr, (key), (siz)))
61 #endif
62
63 #include <assert.h>
64 #include <stdlib.h>
65 #include <stdio.h>
66 #include <string.h>
67 #include "xmalloc.h"
68 #include "lc_printf.h"
69 #ifdef PSET
70 # include "pset.h"
71 #else
72 # include "set.h"
73 #endif
74
75
76 #define TOBSTACK_ID MANGLEP(tag)
77 #include "obst.h"
78
79
80 #define SEGMENT_SIZE_SHIFT   8
81 #define SEGMENT_SIZE         (1 << SEGMENT_SIZE_SHIFT)
82 #define DIRECTORY_SIZE_SHIFT 8
83 #define DIRECTORY_SIZE       (1 << DIRECTORY_SIZE_SHIFT)
84 #define MAX_LOAD_FACTOR      4
85
86
87 typedef struct element {
88         struct element *chain;    /**< for chaining Elements */
89         MANGLEP (entry) entry;
90 } Element, *Segment;
91
92
93 struct SET {
94         size_t p;             /**< Next bucket to be split */
95         size_t maxp;          /**< upper bound on p during expansion */
96         size_t nkey;          /**< current # keys */
97         size_t nseg;          /**< current # segments */
98         Segment *dir[DIRECTORY_SIZE];
99         MANGLEP(cmp_fun) cmp;     /**< function comparing entries */
100         unsigned iter_i, iter_j;
101         Element *iter_tail;       /**< non-NULL while iterating over elts */
102 #ifdef PSET
103         Element *free_list;       /**< list of free Elements */
104 #endif
105         struct obstack obst;      /**< obstack for allocation all data */
106 #ifdef STATS
107         size_t naccess, ncollision, ndups;
108         size_t max_chain_len;
109 #endif
110 #ifdef DEBUG
111         const char *tag;          /**< an optionally tag for distinguishing sets */
112 #endif
113 };
114
115
116 #ifdef STATS
117
118 void MANGLEP(stats) (SET *table)
119 {
120         size_t nfree = 0;
121 #ifdef PSET
122         Element *q = table->free_list;
123         while (q) { q = q->chain; ++nfree; }
124 #endif
125         lc_printf("     accesses  collisions        keys  duplicates     longest      wasted\n%12zu%12zu%12zu%12zu%12zu%12zu\n",
126                         table->naccess, table->ncollision, table->nkey, table->ndups, table->max_chain_len, nfree);
127 }
128
129 static inline void stat_chain_len(SET *table, size_t chain_len)
130 {
131         table->ncollision += chain_len;
132         if (table->max_chain_len < chain_len) table->max_chain_len = chain_len;
133 }
134
135 # define stat_access(table) (++(table)->naccess)
136 # define stat_dup(table) (++(table)->ndups)
137
138 #else /* !STATS */
139
140 # define stat_chain_len(table, chain_len) ((void)chain_len)
141 # define stat_access(table) ((void)0)
142 # define stat_dup(table) ((void)0)
143
144 #endif /* !STATS */
145
146 #ifdef DEBUG
147
148 const char *MANGLEP(tag);
149
150
151 void MANGLEP(describe) (SET *table)
152 {
153         size_t i, j, collide;
154         Element *ptr;
155         Segment *seg;
156
157         lc_printf("p=%zu maxp=%zu nkey=%zu nseg=%zu\n",
158                         table->p, table->maxp, table->nkey, table->nseg);
159         for (i = 0;  i < table->nseg;  i++) {
160                 seg = table->dir[i];
161                 for (j = 0;  j < SEGMENT_SIZE;  j++) {
162                         collide = 0;
163                         ptr = seg[j];
164                         while (ptr) {
165                                 if (collide) lc_printf("<%3zu>", collide);
166                                 else printf ("table");
167                                 lc_printf("[%zd][%3zd]: %u %p\n", i, j, ptr->entry.hash, (void *)ptr->entry.dptr);
168                                 ptr = ptr->chain;
169                                 ++collide;
170                         }
171                 }
172         }
173 #ifdef STATS
174         MANGLEP(stats)(table);
175 #endif
176 }
177
178 #endif /* !DEBUG */
179
180
181 SET *(PMANGLE(new)) (MANGLEP(cmp_fun) cmp, size_t nslots)
182 {
183         int i;
184         SET *table = XMALLOC(SET);
185
186         if (nslots > SEGMENT_SIZE * DIRECTORY_SIZE)
187                 nslots = DIRECTORY_SIZE;
188         else {
189                 assert (nslots >= 0);
190                 /* Adjust nslots up to next power of 2, minimum SEGMENT_SIZE */
191                 for (i = SEGMENT_SIZE;  i < nslots;  i <<= 1) {
192                 }
193                 nslots = i >> SEGMENT_SIZE_SHIFT;
194         }
195
196         table->nseg = table->p = table->nkey = 0;
197         table->maxp = nslots << SEGMENT_SIZE_SHIFT;
198         table->cmp = cmp;
199         table->iter_tail = NULL;
200 #ifdef PSET
201         table->free_list = NULL;
202 #endif
203         obstack_init (&table->obst);
204
205         /* Make segments */
206         for (i = 0;  i < nslots;  ++i) {
207                 table->dir[i] = OALLOCNZ(&table->obst, Segment, SEGMENT_SIZE);
208                 table->nseg++;
209         }
210
211 #ifdef STATS
212         table->naccess = table->ncollision = table->ndups = 0;
213         table->max_chain_len = 0;
214 #endif
215 #ifdef DEBUG
216         table->tag = MANGLEP(tag);
217 #endif
218         return table;
219 }
220
221
222 void PMANGLE(del) (SET *table)
223 {
224 #ifdef DEBUG
225         MANGLEP(tag) = table->tag;
226 #endif
227         obstack_free (&table->obst, NULL);
228         xfree (table);
229 }
230
231 size_t MANGLEP(count) (SET *table)
232 {
233         return table->nkey;
234 }
235
236 /*
237  * do one iteration step, return 1
238  * if still data in the set, 0 else
239  */
240 static inline int iter_step(SET *table)
241 {
242         if (++table->iter_j >= SEGMENT_SIZE) {
243                 table->iter_j = 0;
244                 if (++table->iter_i >= table->nseg) {
245                         table->iter_i = 0;
246                         return 0;
247                 }
248         }
249         return 1;
250 }
251
252 /*
253  * finds the first entry in the table
254  */
255 void * MANGLEP(first) (SET *table)
256 {
257         assert (!table->iter_tail);
258         table->iter_i = 0;
259         table->iter_j = 0;
260         while (!table->dir[table->iter_i][table->iter_j]) {
261                 if (!iter_step (table)) return NULL;
262         }
263         table->iter_tail = table->dir[table->iter_i][table->iter_j];
264         assert (table->iter_tail->entry.dptr);
265         return table->iter_tail->entry.dptr;
266 }
267
268 /*
269  * returns next entry in the table
270  */
271 void *MANGLEP(next) (SET *table)
272 {
273         if (!table->iter_tail)
274                 return NULL;
275
276         /* follow collision chain */
277         table->iter_tail = table->iter_tail->chain;
278         if (!table->iter_tail) {
279                 /* go to next segment */
280                 do {
281                         if (!iter_step (table)) return NULL;
282                 } while (!table->dir[table->iter_i][table->iter_j]);
283                 table->iter_tail = table->dir[table->iter_i][table->iter_j];
284         }
285         assert (table->iter_tail->entry.dptr);
286         return table->iter_tail->entry.dptr;
287 }
288
289 void MANGLEP(break) (SET *table)
290 {
291         table->iter_tail = NULL;
292 }
293
294 /*
295  * limit the hash value
296  */
297 static inline unsigned Hash(SET *table, unsigned h)
298 {
299         unsigned address;
300         address = h & (table->maxp - 1);          /* h % table->maxp */
301         if (address < (unsigned)table->p)
302                 address = h & ((table->maxp << 1) - 1); /* h % (2*table->maxp) */
303         return address;
304 }
305
306 /*
307  * returns non-zero if the number of elements in
308  * the set is greater then number of segments * MAX_LOAD_FACTOR
309  */
310 static inline int loaded(SET *table)
311 {
312         return (  ++table->nkey
313                         > (table->nseg << SEGMENT_SIZE_SHIFT) * MAX_LOAD_FACTOR);
314 }
315
316 /*
317  * expand the hash-table: the algorithm is split, so on every
318  * insert, only ONE segment is rehashed!
319  *
320  * table->p contains the current segment to split
321  * after all segments were split, table->p is set to zero and
322  * table->maxp is duplicated.
323  */
324 static void expand_table(SET *table)
325 {
326         size_t NewAddress;
327         size_t OldSegmentIndex, NewSegmentIndex;
328         size_t OldSegmentDir, NewSegmentDir;
329         Segment *OldSegment;
330         Segment *NewSegment;
331         Element *Current;
332         Element **Previous;
333         Element **LastOfNew;
334
335         if (table->maxp + table->p < (DIRECTORY_SIZE << SEGMENT_SIZE_SHIFT)) {
336                 /* Locate the bucket to be split */
337                 OldSegmentDir   = table->p >> SEGMENT_SIZE_SHIFT;
338                 OldSegment      = table->dir[OldSegmentDir];
339                 OldSegmentIndex = table->p & (SEGMENT_SIZE-1);
340
341                 /* Expand address space; if necessary create a new segment */
342                 NewAddress      = table->maxp + table->p;
343                 NewSegmentDir   = NewAddress >> SEGMENT_SIZE_SHIFT;
344                 NewSegmentIndex = NewAddress & (SEGMENT_SIZE-1);
345                 if (NewSegmentIndex == 0) {
346                         table->dir[NewSegmentDir] = OALLOCNZ(&table->obst, Segment, SEGMENT_SIZE);
347                         table->nseg++;
348                 }
349                 NewSegment = table->dir[NewSegmentDir];
350
351                 /* Adjust state variables */
352                 table->p++;
353                 if (table->p == table->maxp) {
354                         table->maxp <<= 1;  /* table->maxp *= 2 */
355                         table->p = 0;
356                 }
357
358                 /* Relocate records to the new bucket */
359                 Previous = &OldSegment[OldSegmentIndex];
360                 Current = *Previous;
361                 LastOfNew = &NewSegment[NewSegmentIndex];
362                 *LastOfNew = NULL;
363                 while (Current != NULL) {
364                         if (Hash (table, Current->entry.hash) == NewAddress) {
365                                 /* move to new chain */
366                                 *LastOfNew = Current;
367                                 *Previous  = Current->chain;
368                                 LastOfNew  = &Current->chain;
369                                 Current    = Current->chain;
370                                 *LastOfNew = NULL;
371                         } else {
372                                 /* leave on old chain */
373                                 Previous = &Current->chain;
374                                 Current = Current->chain;
375                         }
376                 }
377         }
378 }
379
380
381 void * MANGLE(_,_search) (SET *table,
382                 const void *key,
383 #ifndef PSET
384                 size_t size,
385 #endif
386                 unsigned hash,
387                 MANGLE(_,_action) action)
388 {
389         unsigned h;
390         Segment *CurrentSegment;
391         int SegmentIndex;
392         MANGLEP(cmp_fun) cmp = table->cmp;
393         Segment q;
394         size_t chain_len = 0;
395
396         assert (table);
397         assert (key);
398 #ifdef DEBUG
399         MANGLEP(tag) = table->tag;
400 #endif
401         stat_access (table);
402
403         /* Find collision chain */
404         h = Hash (table, hash);
405         SegmentIndex   = h & (SEGMENT_SIZE-1);
406         CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
407         assert (CurrentSegment != NULL);
408         q = CurrentSegment[SegmentIndex];
409
410         /* Follow collision chain */
411         while (q && !EQUAL (cmp, q, key, size)) {
412                 q = q->chain;
413                 ++chain_len;
414         }
415
416         stat_chain_len(table, chain_len);
417
418         if (!q && (action != MANGLE(_,_find))) { /* not found, insert */
419                 assert (!table->iter_tail && "insert an element into a set that is iterated");
420
421                 if (CurrentSegment[SegmentIndex]) stat_dup (table);
422
423 #ifdef PSET
424                 if (table->free_list) {
425                         q = table->free_list;
426                         table->free_list = table->free_list->chain;
427                 } else {
428                         q = OALLOC(&table->obst, Element);
429                 }
430                 q->entry.dptr = (void *)key;
431 #else
432                 obstack_blank (&table->obst, offsetof (Element, entry.dptr));
433                 if (action == _set_hinsert0)
434                         obstack_grow0 (&table->obst, key, size);
435                 else
436                         obstack_grow (&table->obst, key, size);
437                 q = (Segment) obstack_finish (&table->obst);
438                 q->entry.size = size;
439 #endif
440                 q->chain = CurrentSegment[SegmentIndex];
441                 q->entry.hash = hash;
442                 CurrentSegment[SegmentIndex] = q;
443
444                 if (loaded (table)) {
445                         expand_table(table);    /* doesn't affect q */
446                 }
447         }
448
449         if (!q) return NULL;
450 #ifdef PSET
451         if (action == _pset_hinsert) return &q->entry;
452 #else
453         if (action == _set_hinsert || action == _set_hinsert0) return &q->entry;
454 #endif
455         return q->entry.dptr;
456 }
457
458
459 #ifdef PSET
460
461 int pset_default_ptr_cmp(const void *x, const void *y)
462 {
463         return x != y;
464 }
465
466 void *pset_remove(SET *table, const void *key, unsigned hash)
467 {
468         unsigned h;
469         Segment *CurrentSegment;
470         int SegmentIndex;
471         pset_cmp_fun cmp = table->cmp;
472         Segment *p;
473         Segment q;
474         int chain_len = 0;
475
476         assert (table && !table->iter_tail);
477         stat_access (table);
478
479         /* Find collision chain */
480         h = Hash (table, hash);
481         SegmentIndex = h & (SEGMENT_SIZE-1);
482         CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
483         assert (CurrentSegment != NULL);
484         p = &CurrentSegment[SegmentIndex];
485
486         /* Follow collision chain */
487         while (!EQUAL (cmp, *p, key, size)) {
488                 p = &(*p)->chain;
489                 assert (*p);
490                 ++chain_len;
491         }
492
493         stat_chain_len (table, chain_len);
494
495         q = *p;
496
497         if (q == table->iter_tail) {
498                 /* removing current element */
499                 table->iter_tail = q->chain;
500                 if (!table->iter_tail) {
501                         /* go to next segment */
502                         do {
503                                 if (!iter_step (table))
504                                         break;
505                         } while (!table->dir[table->iter_i][table->iter_j]);
506                         table->iter_tail = table->dir[table->iter_i][table->iter_j];
507                 }
508         }
509
510         *p = (*p)->chain;
511         q->chain = table->free_list;
512         table->free_list = q;
513         --table->nkey;
514
515         return q->entry.dptr;
516 }
517
518
519 void *(pset_find) (SET *se, const void *key, unsigned hash)
520 {
521         return pset_find (se, key, hash);
522 }
523
524
525 void *(pset_insert) (SET *se, const void *key, unsigned hash)
526 {
527         return pset_insert (se, key, hash);
528 }
529
530
531         MANGLEP(entry) *
532 (pset_hinsert) (SET *se, const void *key, unsigned hash)
533 {
534         return pset_hinsert (se, key, hash);
535 }
536
537 void pset_insert_pset_ptr(pset *target, pset *src)
538 {
539         void *elt;
540         for (elt = pset_first(src); elt; elt = pset_next(src)) {
541                 pset_insert_ptr(target, elt);
542         }
543 }
544
545 #else /* !PSET */
546
547 void *(set_find) (set *se, const void *key, size_t size, unsigned hash)
548 {
549         return set_find (se, key, size, hash);
550 }
551
552
553 void *(set_insert) (set *se, const void *key, size_t size, unsigned hash)
554 {
555         return set_insert (se, key, size, hash);
556 }
557
558
559 set_entry *(set_hinsert) (set *se, const void *key, size_t size, unsigned hash)
560 {
561         return set_hinsert (se, key, size, hash);
562 }
563
564 #endif /* !PSET */