remove $Id$, it doesn't work with git anyway
[libfirm] / ir / adt / set.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       implementation of set
23  * @author      Markus Armbruster
24  */
25
26 /*  This code is derived from:
27
28     From: ejp@ausmelb.oz.AU (Esmond Pitt)
29     Date: Tue, 7 Mar 1989 22:06:26 GMT
30     Subject: v06i042: dynamic hashing version of hsearch(3)
31     Message-ID: <1821@basser.oz>
32     Newsgroups: comp.sources.misc
33     Sender: msgs@basser.oz
34
35     Posting-number: Volume 6, Issue 42
36     Submitted-By: Esmond Pitt <ejp@ausmelb.oz.AU>
37     Archive-name: dynamic-hash
38
39     * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
40     * Coded into C, with minor code improvements, and with hsearch(3) interface,
41     * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
42
43     TODO: Fix Esmond's ugly MixedCapsIdentifiers ;->
44  */
45 #include "config.h"
46
47 #ifdef PSET
48 # define SET pset
49 # define PMANGLE(pre) pre##_pset
50 # define MANGLEP(post) pset_##post
51 # define MANGLE(pre, post) pre##pset##post
52 # define EQUAL(cmp, elt, key, siz) (!(cmp) ((elt)->entry.dptr, (key)))
53 #else
54 # define SET set
55 # define PMANGLE(pre) pre##_set
56 # define MANGLEP(post) set_##post
57 # define MANGLE(pre, post) pre##set##post
58 # define EQUAL(cmp, elt, key, siz) \
59     (((elt)->entry.size == (siz)) && !(cmp) ((elt)->entry.dptr, (key), (siz)))
60 #endif
61
62 #include <assert.h>
63 #include <stdlib.h>
64 #include <stdio.h>
65 #include <string.h>
66 #include "xmalloc.h"
67 #include "lc_printf.h"
68 #ifdef PSET
69 # include "pset.h"
70 #else
71 # include "set.h"
72 #endif
73
74
75 #define TOBSTACK_ID MANGLEP(tag)
76 #include "obst.h"
77
78
79 #define SEGMENT_SIZE_SHIFT   8
80 #define SEGMENT_SIZE         (1 << SEGMENT_SIZE_SHIFT)
81 #define DIRECTORY_SIZE_SHIFT 8
82 #define DIRECTORY_SIZE       (1 << DIRECTORY_SIZE_SHIFT)
83 #define MAX_LOAD_FACTOR      4
84
85
86 typedef struct element {
87         struct element *chain;    /**< for chaining Elements */
88         MANGLEP (entry) entry;
89 } Element, *Segment;
90
91
92 struct SET {
93         size_t p;             /**< Next bucket to be split */
94         size_t maxp;          /**< upper bound on p during expansion */
95         size_t nkey;          /**< current # keys */
96         size_t nseg;          /**< current # segments */
97         Segment *dir[DIRECTORY_SIZE];
98         MANGLEP(cmp_fun) cmp;     /**< function comparing entries */
99         unsigned iter_i, iter_j;
100         Element *iter_tail;       /**< non-NULL while iterating over elts */
101 #ifdef PSET
102         Element *free_list;       /**< list of free Elements */
103 #endif
104         struct obstack obst;      /**< obstack for allocation all data */
105 #ifdef STATS
106         size_t naccess, ncollision, ndups;
107         size_t max_chain_len;
108 #endif
109 #ifdef DEBUG
110         const char *tag;          /**< an optionally tag for distinguishing sets */
111 #endif
112 };
113
114
115 #ifdef STATS
116
117 void MANGLEP(stats) (SET *table)
118 {
119         size_t nfree = 0;
120 #ifdef PSET
121         Element *q = table->free_list;
122         while (q) { q = q->chain; ++nfree; }
123 #endif
124         lc_printf("     accesses  collisions        keys  duplicates     longest      wasted\n%12zu%12zu%12zu%12zu%12zu%12zu\n",
125                         table->naccess, table->ncollision, table->nkey, table->ndups, table->max_chain_len, nfree);
126 }
127
128 static inline void stat_chain_len(SET *table, size_t chain_len)
129 {
130         table->ncollision += chain_len;
131         if (table->max_chain_len < chain_len) table->max_chain_len = chain_len;
132 }
133
134 # define stat_access(table) (++(table)->naccess)
135 # define stat_dup(table) (++(table)->ndups)
136
137 #else /* !STATS */
138
139 # define stat_chain_len(table, chain_len) ((void)chain_len)
140 # define stat_access(table) ((void)0)
141 # define stat_dup(table) ((void)0)
142
143 #endif /* !STATS */
144
145 #ifdef DEBUG
146
147 const char *MANGLEP(tag);
148
149
150 void MANGLEP(describe) (SET *table)
151 {
152         size_t i, j, collide;
153         Element *ptr;
154         Segment *seg;
155
156         lc_printf("p=%zu maxp=%zu nkey=%zu nseg=%zu\n",
157                         table->p, table->maxp, table->nkey, table->nseg);
158         for (i = 0;  i < table->nseg;  i++) {
159                 seg = table->dir[i];
160                 for (j = 0;  j < SEGMENT_SIZE;  j++) {
161                         collide = 0;
162                         ptr = seg[j];
163                         while (ptr) {
164                                 if (collide) lc_printf("<%3zu>", collide);
165                                 else printf ("table");
166                                 lc_printf("[%zd][%3zd]: %u %p\n", i, j, ptr->entry.hash, (void *)ptr->entry.dptr);
167                                 ptr = ptr->chain;
168                                 ++collide;
169                         }
170                 }
171         }
172 #ifdef STATS
173         MANGLEP(stats)(table);
174 #endif
175 }
176
177 #endif /* !DEBUG */
178
179
180 SET *(PMANGLE(new)) (MANGLEP(cmp_fun) cmp, size_t nslots)
181 {
182         SET   *table = XMALLOC(SET);
183         size_t i;
184
185         if (nslots > SEGMENT_SIZE * DIRECTORY_SIZE)
186                 nslots = DIRECTORY_SIZE;
187         else {
188                 /* Adjust nslots up to next power of 2, minimum SEGMENT_SIZE */
189                 for (i = SEGMENT_SIZE;  i < nslots;  i <<= 1) {
190                 }
191                 nslots = i >> SEGMENT_SIZE_SHIFT;
192         }
193
194         table->nseg = table->p = table->nkey = 0;
195         table->maxp = nslots << SEGMENT_SIZE_SHIFT;
196         table->cmp = cmp;
197         table->iter_tail = NULL;
198 #ifdef PSET
199         table->free_list = NULL;
200 #endif
201         obstack_init (&table->obst);
202
203         /* Make segments */
204         for (i = 0;  i < nslots;  ++i) {
205                 table->dir[i] = OALLOCNZ(&table->obst, Segment, SEGMENT_SIZE);
206                 table->nseg++;
207         }
208
209 #ifdef STATS
210         table->naccess = table->ncollision = table->ndups = 0;
211         table->max_chain_len = 0;
212 #endif
213 #ifdef DEBUG
214         table->tag = MANGLEP(tag);
215 #endif
216         return table;
217 }
218
219
220 void PMANGLE(del) (SET *table)
221 {
222 #ifdef DEBUG
223         MANGLEP(tag) = table->tag;
224 #endif
225         obstack_free (&table->obst, NULL);
226         xfree (table);
227 }
228
229 size_t MANGLEP(count) (SET *table)
230 {
231         return table->nkey;
232 }
233
234 /*
235  * do one iteration step, return 1
236  * if still data in the set, 0 else
237  */
238 static inline int iter_step(SET *table)
239 {
240         if (++table->iter_j >= SEGMENT_SIZE) {
241                 table->iter_j = 0;
242                 if (++table->iter_i >= table->nseg) {
243                         table->iter_i = 0;
244                         return 0;
245                 }
246         }
247         return 1;
248 }
249
250 /*
251  * finds the first entry in the table
252  */
253 void * MANGLEP(first) (SET *table)
254 {
255         assert (!table->iter_tail);
256         table->iter_i = 0;
257         table->iter_j = 0;
258         while (!table->dir[table->iter_i][table->iter_j]) {
259                 if (!iter_step (table)) return NULL;
260         }
261         table->iter_tail = table->dir[table->iter_i][table->iter_j];
262         assert (table->iter_tail->entry.dptr);
263         return table->iter_tail->entry.dptr;
264 }
265
266 /*
267  * returns next entry in the table
268  */
269 void *MANGLEP(next) (SET *table)
270 {
271         if (!table->iter_tail)
272                 return NULL;
273
274         /* follow collision chain */
275         table->iter_tail = table->iter_tail->chain;
276         if (!table->iter_tail) {
277                 /* go to next segment */
278                 do {
279                         if (!iter_step (table)) return NULL;
280                 } while (!table->dir[table->iter_i][table->iter_j]);
281                 table->iter_tail = table->dir[table->iter_i][table->iter_j];
282         }
283         assert (table->iter_tail->entry.dptr);
284         return table->iter_tail->entry.dptr;
285 }
286
287 void MANGLEP(break) (SET *table)
288 {
289         table->iter_tail = NULL;
290 }
291
292 /*
293  * limit the hash value
294  */
295 static inline unsigned Hash(SET *table, unsigned h)
296 {
297         unsigned address;
298         address = h & (table->maxp - 1);          /* h % table->maxp */
299         if (address < (unsigned)table->p)
300                 address = h & ((table->maxp << 1) - 1); /* h % (2*table->maxp) */
301         return address;
302 }
303
304 /*
305  * returns non-zero if the number of elements in
306  * the set is greater then number of segments * MAX_LOAD_FACTOR
307  */
308 static inline int loaded(SET *table)
309 {
310         return (  ++table->nkey
311                         > (table->nseg << SEGMENT_SIZE_SHIFT) * MAX_LOAD_FACTOR);
312 }
313
314 /*
315  * expand the hash-table: the algorithm is split, so on every
316  * insert, only ONE segment is rehashed!
317  *
318  * table->p contains the current segment to split
319  * after all segments were split, table->p is set to zero and
320  * table->maxp is duplicated.
321  */
322 static void expand_table(SET *table)
323 {
324         size_t NewAddress;
325         size_t OldSegmentIndex, NewSegmentIndex;
326         size_t OldSegmentDir, NewSegmentDir;
327         Segment *OldSegment;
328         Segment *NewSegment;
329         Element *Current;
330         Element **Previous;
331         Element **LastOfNew;
332
333         if (table->maxp + table->p < (DIRECTORY_SIZE << SEGMENT_SIZE_SHIFT)) {
334                 /* Locate the bucket to be split */
335                 OldSegmentDir   = table->p >> SEGMENT_SIZE_SHIFT;
336                 OldSegment      = table->dir[OldSegmentDir];
337                 OldSegmentIndex = table->p & (SEGMENT_SIZE-1);
338
339                 /* Expand address space; if necessary create a new segment */
340                 NewAddress      = table->maxp + table->p;
341                 NewSegmentDir   = NewAddress >> SEGMENT_SIZE_SHIFT;
342                 NewSegmentIndex = NewAddress & (SEGMENT_SIZE-1);
343                 if (NewSegmentIndex == 0) {
344                         table->dir[NewSegmentDir] = OALLOCNZ(&table->obst, Segment, SEGMENT_SIZE);
345                         table->nseg++;
346                 }
347                 NewSegment = table->dir[NewSegmentDir];
348
349                 /* Adjust state variables */
350                 table->p++;
351                 if (table->p == table->maxp) {
352                         table->maxp <<= 1;  /* table->maxp *= 2 */
353                         table->p = 0;
354                 }
355
356                 /* Relocate records to the new bucket */
357                 Previous = &OldSegment[OldSegmentIndex];
358                 Current = *Previous;
359                 LastOfNew = &NewSegment[NewSegmentIndex];
360                 *LastOfNew = NULL;
361                 while (Current != NULL) {
362                         if (Hash (table, Current->entry.hash) == NewAddress) {
363                                 /* move to new chain */
364                                 *LastOfNew = Current;
365                                 *Previous  = Current->chain;
366                                 LastOfNew  = &Current->chain;
367                                 Current    = Current->chain;
368                                 *LastOfNew = NULL;
369                         } else {
370                                 /* leave on old chain */
371                                 Previous = &Current->chain;
372                                 Current = Current->chain;
373                         }
374                 }
375         }
376 }
377
378
379 void * MANGLE(_,_search) (SET *table,
380                 const void *key,
381 #ifndef PSET
382                 size_t size,
383 #endif
384                 unsigned hash,
385                 MANGLE(_,_action) action)
386 {
387         unsigned h;
388         Segment *CurrentSegment;
389         int SegmentIndex;
390         MANGLEP(cmp_fun) cmp = table->cmp;
391         Segment q;
392         size_t chain_len = 0;
393
394         assert (table);
395         assert (key);
396 #ifdef DEBUG
397         MANGLEP(tag) = table->tag;
398 #endif
399         stat_access (table);
400
401         /* Find collision chain */
402         h = Hash (table, hash);
403         SegmentIndex   = h & (SEGMENT_SIZE-1);
404         CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
405         assert (CurrentSegment != NULL);
406         q = CurrentSegment[SegmentIndex];
407
408         /* Follow collision chain */
409         while (q && !EQUAL (cmp, q, key, size)) {
410                 q = q->chain;
411                 ++chain_len;
412         }
413
414         stat_chain_len(table, chain_len);
415
416         if (!q && (action != MANGLE(_,_find))) { /* not found, insert */
417                 assert (!table->iter_tail && "insert an element into a set that is iterated");
418
419                 if (CurrentSegment[SegmentIndex]) stat_dup (table);
420
421 #ifdef PSET
422                 if (table->free_list) {
423                         q = table->free_list;
424                         table->free_list = table->free_list->chain;
425                 } else {
426                         q = OALLOC(&table->obst, Element);
427                 }
428                 q->entry.dptr = (void *)key;
429 #else
430                 obstack_blank (&table->obst, offsetof (Element, entry.dptr));
431                 if (action == _set_hinsert0)
432                         obstack_grow0 (&table->obst, key, size);
433                 else
434                         obstack_grow (&table->obst, key, size);
435                 q = (Segment) obstack_finish (&table->obst);
436                 q->entry.size = size;
437 #endif
438                 q->chain = CurrentSegment[SegmentIndex];
439                 q->entry.hash = hash;
440                 CurrentSegment[SegmentIndex] = q;
441
442                 if (loaded (table)) {
443                         expand_table(table);    /* doesn't affect q */
444                 }
445         }
446
447         if (!q) return NULL;
448 #ifdef PSET
449         if (action == _pset_hinsert) return &q->entry;
450 #else
451         if (action == _set_hinsert || action == _set_hinsert0) return &q->entry;
452 #endif
453         return q->entry.dptr;
454 }
455
456
457 #ifdef PSET
458
459 int pset_default_ptr_cmp(const void *x, const void *y)
460 {
461         return x != y;
462 }
463
464 void *pset_remove(SET *table, const void *key, unsigned hash)
465 {
466         unsigned h;
467         Segment *CurrentSegment;
468         int SegmentIndex;
469         pset_cmp_fun cmp = table->cmp;
470         Segment *p;
471         Segment q;
472         int chain_len = 0;
473
474         assert (table && !table->iter_tail);
475         stat_access (table);
476
477         /* Find collision chain */
478         h = Hash (table, hash);
479         SegmentIndex = h & (SEGMENT_SIZE-1);
480         CurrentSegment = table->dir[h >> SEGMENT_SIZE_SHIFT];
481         assert (CurrentSegment != NULL);
482         p = &CurrentSegment[SegmentIndex];
483
484         /* Follow collision chain */
485         while (!EQUAL (cmp, *p, key, size)) {
486                 p = &(*p)->chain;
487                 assert (*p);
488                 ++chain_len;
489         }
490
491         stat_chain_len (table, chain_len);
492
493         q = *p;
494
495         if (q == table->iter_tail) {
496                 /* removing current element */
497                 table->iter_tail = q->chain;
498                 if (!table->iter_tail) {
499                         /* go to next segment */
500                         do {
501                                 if (!iter_step (table))
502                                         break;
503                         } while (!table->dir[table->iter_i][table->iter_j]);
504                         table->iter_tail = table->dir[table->iter_i][table->iter_j];
505                 }
506         }
507
508         *p = (*p)->chain;
509         q->chain = table->free_list;
510         table->free_list = q;
511         --table->nkey;
512
513         return q->entry.dptr;
514 }
515
516
517 void *(pset_find) (SET *se, const void *key, unsigned hash)
518 {
519         return pset_find (se, key, hash);
520 }
521
522
523 void *(pset_insert) (SET *se, const void *key, unsigned hash)
524 {
525         return pset_insert (se, key, hash);
526 }
527
528
529         MANGLEP(entry) *
530 (pset_hinsert) (SET *se, const void *key, unsigned hash)
531 {
532         return pset_hinsert (se, key, hash);
533 }
534
535 void pset_insert_pset_ptr(pset *target, pset *src)
536 {
537         void *elt;
538         for (elt = pset_first(src); elt; elt = pset_next(src)) {
539                 pset_insert_ptr(target, elt);
540         }
541 }
542
543 #else /* !PSET */
544
545 void *(set_find) (set *se, const void *key, size_t size, unsigned hash)
546 {
547         return set_find (se, key, size, hash);
548 }
549
550
551 void *(set_insert) (set *se, const void *key, size_t size, unsigned hash)
552 {
553         return set_insert (se, key, size, hash);
554 }
555
556
557 set_entry *(set_hinsert) (set *se, const void *key, size_t size, unsigned hash)
558 {
559         return set_hinsert (se, key, size, hash);
560 }
561
562 #endif /* !PSET */