2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief Statistics for Firm. Pattern history.
25 #include "pattern_dmp.h"
28 #include "lc_printf.h"
31 * just be make some things clear :-), the
34 #define HASH_MAP(type) pset_##type
36 typedef pset pset_pattern_entry_t;
38 typedef unsigned char BYTE;
40 /** Maximum size of the pattern store. */
41 #define PATTERN_STORE_SIZE 2048
47 typedef struct code_buf_t {
48 BYTE *next; /**< Next byte address to be written. */
49 BYTE *end; /**< End address of the buffer. */
50 BYTE *start; /**< Start address of the buffer. */
51 unsigned hash; /**< The hash value for the buffer content. */
52 unsigned overrun; /**< flag set if the buffer was overrun */
59 VLC_7BIT = 0x00, /**< 8 bit code, carrying 7 bits payload */
60 VLC_14BIT = 0x80, /**< 16 bit code, carrying 14 bits payload */
61 VLC_21BIT = 0xC0, /**< 24 bit code, carrying 21 bits payload */
62 VLC_28BIT = 0xE0, /**< 32 bit code, carrying 28 bits payload */
63 VLC_32BIT = 0xF0, /**< 40 bit code, carrying 32 bits payload */
65 VLC_TAG_FIRST = 0xF1, /**< First possible tag value. */
66 VLC_TAG_ICONST = 0xFB, /**< Encodes an integer constant. */
67 VLC_TAG_EMPTY = 0xFC, /**< Encodes an empty entity. */
68 VLC_TAG_OPTION = 0xFD, /**< Options exists. */
69 VLC_TAG_REF = 0xFE, /**< Special tag, next code is an ID. */
70 VLC_TAG_END = 0xFF, /**< End tag. */
74 * An entry for holding one pattern.
76 typedef struct pattern_entry_t {
77 counter_t count; /**< Amount of pattern occurance. */
78 size_t len; /**< The length of the VLC encoded buffer. */
79 BYTE buf[1]; /**< The buffer containing the VLC encoded pattern. */
83 * Current options for the pattern matcher.
86 OPT_WITH_MODE = 0x00000001, /**< use modes */
87 OPT_ENC_DAG = 0x00000002, /**< encode DAGs, not terms */
88 OPT_WITH_ICONST = 0x00000004, /**< encode integer constants */
89 OPT_PERSIST_PATTERN = 0x00000008, /**< persistent pattern hash */
96 typedef struct pattern_info_t {
97 int enable; /**< If non-zero, this module is enabled. */
98 struct obstack obst; /**< An obstack containing the counters. */
99 HASH_MAP(pattern_entry_t) *pattern_hash; /**< A hash map containing the pattern. */
100 unsigned bound; /**< Lowest value for pattern output. */
101 unsigned options; /**< Current option mask. */
102 unsigned min_depth; /**< Minimum pattern depth. */
103 unsigned max_depth; /**< Maximum pattern depth. */
109 static pattern_info_t _status, *status = &_status;
112 * Compare two pattern for its occurance counter.
114 static int pattern_count_cmp(const void *elt, const void *key)
118 pattern_entry_t **e1 = (pattern_entry_t **)elt;
119 pattern_entry_t **e2 = (pattern_entry_t **)key;
121 /* we want it sorted in descending order */
122 cmp = cnt_cmp(&(*e2)->count, &(*e1)->count);
128 * Compare two pattern for its pattern hash.
130 static int pattern_cmp(const void *elt, const void *key)
132 const pattern_entry_t *e1 = (const pattern_entry_t*)elt;
133 const pattern_entry_t *e2 = (const pattern_entry_t*)key;
135 if (e1->len == e2->len)
136 return memcmp(e1->buf, e2->buf, e1->len);
138 return e1->len < e2->len ? -1 : +1;
142 * Initialize a code buffer.
144 * @param buf the code buffer
145 * @param data a buffer address
146 * @param len the length of the data buffer
148 static void init_buf(CODE_BUFFER *buf, BYTE *data, size_t len)
152 buf->end = data + len;
153 buf->hash = 0x2BAD4; /* An arbitrary seed. */
158 * Put a byte into the buffer.
160 * @param buf the code buffer
161 * @param byte the byte to write
163 * The hash value for the buffer content is updated.
165 static inline void put_byte(CODE_BUFFER *buf, BYTE byte)
167 if (buf->next < buf->end) {
169 buf->hash = (buf->hash * 9) ^ byte;
176 * Returns the current length of a buffer.
178 * @param buf the code buffer
180 * @return the length of the buffer content
182 static size_t buf_lenght(const CODE_BUFFER *buf)
184 return buf->next - buf->start;
188 * Returns the current content of a buffer.
190 * @param buf the code buffer
192 * @return the start address of the buffer content
194 static const BYTE *buf_content(const CODE_BUFFER *buf)
200 * Returns the hash value of a buffer.
202 * @param buf the code buffer
204 * @return the hash value of the buffer content
206 static unsigned buf_hash(const CODE_BUFFER *buf)
212 * Returns non-zero if a buffer overrun has occurred.
214 * @param buf the code buffer
216 static unsigned buf_overrun(const CODE_BUFFER *buf)
222 * Returns the next byte from the buffer WITHOUT dropping.
224 * @param buf the code buffer
226 * @return the next byte from the code buffer
228 static inline BYTE look_byte(CODE_BUFFER *buf)
230 if (buf->next < buf->end)
236 * Returns the next byte from the buffer WITH dropping.
238 * @param buf the code buffer
240 * @return the next byte from the code buffer
242 static inline BYTE get_byte(CODE_BUFFER *buf)
244 if (buf->next < buf->end)
249 #define BITS(n) (1 << (n))
252 * Put a 32bit value into the buffer.
254 * @param buf the code buffer
255 * @param code the code to be written into the buffer
257 static void put_code(CODE_BUFFER *buf, unsigned code)
259 if (code < BITS(7)) {
260 put_byte(buf, VLC_7BIT | code);
261 } else if (code < BITS(6 + 8)) {
262 put_byte(buf, VLC_14BIT | (code >> 8));
264 } else if (code < BITS(5 + 8 + 8)) {
265 put_byte(buf, VLC_21BIT | (code >> 16));
266 put_byte(buf, code >> 8);
268 } else if (code < BITS(4 + 8 + 8 + 8)) {
269 put_byte(buf, VLC_28BIT | (code >> 24));
270 put_byte(buf, code >> 16);
271 put_byte(buf, code >> 8);
274 put_byte(buf, VLC_32BIT);
275 put_byte(buf, code >> 24);
276 put_byte(buf, code >> 16);
277 put_byte(buf, code >> 8);
282 #define BIT_MASK(n) ((1 << (n)) - 1)
285 * Get 32 bit from the buffer.
287 * @param buf the code buffer
289 * @return next 32bit value from the code buffer
291 static unsigned get_code(CODE_BUFFER *buf)
293 unsigned code = get_byte(buf);
295 if (code < VLC_14BIT)
297 if (code < VLC_21BIT)
298 return ((code & BIT_MASK(6)) << 8) | get_byte(buf);
299 if (code < VLC_28BIT) {
300 code = ((code & BIT_MASK(5)) << 16) | (get_byte(buf) << 8);
301 code |= get_byte(buf);
304 if (code < VLC_32BIT) {
305 code = ((code & BIT_MASK(4)) << 24) | (get_byte(buf) << 16);
306 code |= get_byte(buf) << 8;
307 code |= get_byte(buf);
310 if (code == VLC_32BIT) {
311 code = get_byte(buf) << 24;
312 code |= get_byte(buf) << 16;
313 code |= get_byte(buf) << 8;
314 code |= get_byte(buf);
317 /* should not happen */
318 panic("Wrong code in buffer");
322 * Put a tag into the buffer.
324 * @param buf the code buffer
325 * @param tag the tag to write to the code buffer
327 static void put_tag(CODE_BUFFER *buf, BYTE tag)
329 assert(tag >= VLC_TAG_FIRST && "invalid tag");
335 * Returns the next tag or zero if the next code isn't a tag.
337 * @param buf the code buffer
339 * @return the next tag in the code buffer
341 static BYTE next_tag(CODE_BUFFER *buf)
343 BYTE b = look_byte(buf);
345 if (b >= VLC_TAG_FIRST)
346 return get_byte(buf);
351 * An Environment for the pattern encoder.
353 typedef struct codec_enc_t {
354 CODE_BUFFER *buf; /**< The current code buffer. */
355 set *id_set; /**< A set containing all already seen Firm nodes. */
356 unsigned curr_id; /**< The current node id. */
357 unsigned options; /**< The encoding options. */
358 pattern_dumper_t *dmp; /**< The dumper for the decoder. */
364 typedef struct addr_entry_t {
365 void *addr; /**< the address */
366 unsigned id; /**< associated ID */
370 * Compare two addresses.
372 static int addr_cmp(const void *p1, const void *p2, size_t size)
374 const addr_entry_t *e1 = (const addr_entry_t*)p1;
375 const addr_entry_t *e2 = (const addr_entry_t*)p2;
378 return e1->addr != e2->addr;
382 * Return the index of a (existing) mode.
384 static size_t find_mode_index(const ir_mode *mode)
386 size_t i, n = ir_get_n_modes();
388 for (i = 0; i < n; ++i) {
389 if (ir_get_mode(i) == mode)
392 /* should really not happen */
393 assert(!"Cound not find index of mode in find_mode_index()");
398 * Encodes an IR-node, recursive worker.
400 * @return reached depth
402 static int _encode_node(ir_node *node, int max_depth, codec_env_t *env)
404 addr_entry_t entry, *r_entry;
409 unsigned code = get_irn_opcode(node);
411 /* insert the node into our ID map */
413 entry.id = env->curr_id;
415 s_entry = set_hinsert(env->id_set, &entry, sizeof(entry), hash_ptr(node));
416 r_entry = (addr_entry_t *)s_entry->dptr;
418 if (r_entry->id != env->curr_id) {
419 /* already in the map, add an REF */
420 put_tag(env->buf, VLC_TAG_REF);
421 put_code(env->buf, r_entry->id);
425 /* a new entry, proceed */
429 put_code(env->buf, (unsigned)code);
431 /* do we need the mode ? */
432 if (env->options & OPT_WITH_MODE) {
433 ir_mode *mode = get_irn_mode(node);
436 put_code(env->buf, find_mode_index(mode));
438 put_tag(env->buf, VLC_TAG_EMPTY);
441 /* do we need integer constants */
442 if (env->options & OPT_WITH_ICONST) {
443 if (code == iro_Const) {
444 ir_tarval *tv = get_Const_tarval(node);
446 if (tarval_is_long(tv)) {
447 long v = get_tarval_long(tv);
449 put_tag(env->buf, VLC_TAG_ICONST);
450 put_code(env->buf, v);
457 if (max_depth <= 0) {
458 put_code(env->buf, 0);
462 preds = get_irn_arity(node);
463 put_code(env->buf, preds);
466 if (is_op_commutative(get_irn_op(node))) {
467 ir_node *l = get_binop_left(node);
468 ir_node *r = get_binop_right(node);
469 int opcode_diff = (int)get_irn_opcode(l) - (int)get_irn_opcode(r);
471 if (opcode_diff > 0) {
475 } else if (opcode_diff == 0 && l != r) {
476 /* Both nodes have the same opcode, but are different.
477 Need a better method here to decide which goes to the left side. */
480 /* special handling for commutative operators */
481 depth = _encode_node(l, max_depth, env);
484 depth = _encode_node(r, max_depth, env);
488 for (i = 0; i < preds; ++i) {
489 ir_node *n = get_irn_n(node, i);
491 depth = _encode_node(n, max_depth, env);
500 * Encode a DAG starting by the IR-node node.
502 * @param node The root node of the graph
503 * @param buf The code buffer to store the bitstring in
504 * @param max_depth The maximum depth for descending
506 * @return The depth of the encoded graph (without cycles)
508 static int encode_node(ir_node *node, CODE_BUFFER *buf, int max_depth)
513 /* initialize the encoder environment */
515 env.curr_id = 1; /* 0 is used for special purpose */
516 env.options = status->options;
519 if (env.options & OPT_ENC_DAG)
520 env.id_set = new_set(addr_cmp, 32);
524 /* encode options if any for the decoder */
526 put_tag(buf, VLC_TAG_OPTION);
527 put_code(buf, env.options);
530 res = _encode_node(node, max_depth, &env);
532 if (env.id_set != NULL)
535 return max_depth - res;
539 * Decode an IR-node, recursive walker.
541 static void _decode_node(unsigned parent, int position, codec_env_t *env)
545 unsigned mode_code = 0;
549 code = next_tag(env->buf);
550 if (code == VLC_TAG_REF) { /* it's a REF */
551 code = get_code(env->buf);
557 * the mode of a Firm edge can be either computed from its target or
558 * from its source and position. We must take the second approach because
559 * we don't know the target here, it's a ref.
561 pattern_dump_edge(env->dmp, code, parent, position, edge_mode);
564 /* dump the node ref */
565 pattern_dump_ref(env->dmp, code);
571 op_code = get_code(env->buf);
573 /* get the mode if encoded */
574 if (env->options & OPT_WITH_MODE) {
575 if (next_tag(env->buf) != VLC_TAG_EMPTY) {
576 mode_code = get_code(env->buf);
580 /* check, if a ICONST attribute is given */
581 if (next_tag(env->buf) == VLC_TAG_ICONST) {
582 iconst = get_code(env->buf);
591 * the mode of a Firm edge can be either computed from its target or
592 * from its source and position. We take the second approach because
593 * we need it anyway for ref's.
595 pattern_dump_edge(env->dmp, env->curr_id, parent, position, edge_mode);
599 parent = env->curr_id;
600 pattern_dump_node(env->dmp, parent, op_code, mode_code, attr);
602 /* ok, we have a new ID */
605 code = next_tag(env->buf);
606 if (code != VLC_TAG_END) {
607 /* more info, do recursion */
610 preds = get_code(env->buf);
612 pattern_start_children(env->dmp, parent);
613 for (i = 0; i < preds; ++i) {
614 _decode_node(parent, i, env);
616 pattern_finish_children(env->dmp, parent);
624 static void decode_node(BYTE *b, size_t len, pattern_dumper_t *dump)
628 unsigned code, options = 0;
630 init_buf(&buf, b, len);
633 env.curr_id = 1; /* 0 is used for special purpose */
637 code = next_tag(&buf);
638 if (code == VLC_TAG_OPTION) {
639 options = get_code(&buf);
641 env.options = options;
643 _decode_node(0, 0, &env);
647 * The environment for the pattern calculation.
649 typedef struct pattern_env {
650 int max_depth; /**< maximum depth for pattern generation. */
654 * Returns the associates pattern_entry_t for a CODE_BUF.
656 * @param buf the code buffer
657 * @param set the hash table containing all pattern entries
659 * @return the associated pattern_entry_t for the given code buffer
661 * If the code content was never seen before, a new pattern_entry is created
664 static pattern_entry_t *pattern_get_entry(CODE_BUFFER *buf, pset *set)
666 pattern_entry_t *key, *elem;
667 size_t len = buf_lenght(buf);
670 key = OALLOCF(&status->obst, pattern_entry_t, buf, len);
672 memcpy(key->buf, buf_content(buf), len);
674 hash = buf_hash(buf);
676 elem = (pattern_entry_t*)pset_find(set, key, hash);
678 obstack_free(&status->obst, key);
682 cnt_clr(&key->count);
683 return (pattern_entry_t*)pset_insert(set, key, hash);
687 * Increase the count for a pattern.
689 * @param buf the code buffer containing the pattern
690 * @param depth the pattern depth
692 * @note Single node patterns are ignored
694 static void count_pattern(CODE_BUFFER *buf, int depth)
696 pattern_entry_t *entry;
698 /* ignore single node pattern (i.e. constants) */
700 entry = pattern_get_entry(buf, status->pattern_hash);
703 cnt_inc(&entry->count);
708 * Pre-walker for nodes pattern calculation.
710 static void calc_nodes_pattern(ir_node *node, void *ctx)
712 pattern_env_t *env = (pattern_env_t*)ctx;
713 BYTE buffer[PATTERN_STORE_SIZE];
717 init_buf(&buf, buffer, sizeof(buffer));
718 depth = encode_node(node, &buf, env->max_depth);
720 if (buf_overrun(&buf)) {
721 lc_fprintf(stderr, "Pattern store: buffer overrun at size %zu. Pattern ignored.\n", sizeof(buffer));
723 count_pattern(&buf, depth);
727 * Store all collected patterns.
729 * @param fname filename for storage
731 static void store_pattern(const char *fname)
734 size_t count = pset_count(status->pattern_hash);
739 f = fopen(fname, "wb");
745 fwrite("FPS1", 4, 1, f);
746 fwrite(&count, sizeof(count), 1, f);
748 foreach_pset(status->pattern_hash, pattern_entry_t, entry) {
749 fwrite(entry, offsetof(pattern_entry_t, buf) + entry->len, 1, f);
755 * Read collected patterns from a file.
757 * @param fname filename
759 static HASH_MAP(pattern_entry_t) *read_pattern(const char *fname)
762 pattern_entry_t *entry, tmp;
766 HASH_MAP(pattern_entry_t) *pattern_hash = new_pset(pattern_cmp, 8);
767 BYTE buffer[PATTERN_STORE_SIZE];
771 f = fopen(fname, "rb");
777 res = fread(magic, 4, 1, f);
781 res = fread(&count, sizeof(count), 1, f);
782 if (res != 1 || memcmp(magic, "FPS1", 4) != 0 || count <= 0)
785 /* read all pattern entries and put them into the hash table. */
786 for (i = 0; i < count; ++i) {
787 init_buf(&buf, buffer, sizeof(buffer));
788 res = fread(&tmp, offsetof(pattern_entry_t, buf), 1, f);
791 for (j = 0; j < tmp.len; ++j)
792 put_byte(&buf, fgetc(f));
793 entry = pattern_get_entry(&buf, pattern_hash);
794 entry->count = tmp.count;
798 lc_printf("Read %zu pattern from %s\n", count, fname);
799 assert(pset_count(pattern_hash) == count);
804 fprintf(stderr, "Error: %s is not a Firm pattern store. Ignored.\n", fname);
810 * Write the collected patterns to a VCG file for inspection.
812 * @param fname name of the VCG file to create
814 static void pattern_output(const char *fname)
816 pattern_entry_t **pattern_arr;
817 pattern_dumper_t *dump;
818 size_t i, count = pset_count(status->pattern_hash);
820 lc_printf("\n%zu pattern detected\n", count);
825 /* creates a dumper */
826 dump = new_vcg_dumper(fname, 100);
828 pattern_arr = XMALLOCN(pattern_entry_t*, count);
830 foreach_pset(status->pattern_hash, pattern_entry_t, entry) {
831 pattern_arr[i++] = entry;
837 qsort(pattern_arr, count, sizeof(*pattern_arr), pattern_count_cmp);
839 for (i = 0; i < count; ++i) {
840 pattern_entry_t *const entry = pattern_arr[i];
841 if (cnt_to_uint(&entry->count) < status->bound)
845 pattern_dump_new_pattern(dump, &entry->count);
846 decode_node(entry->buf, entry->len, dump);
847 pattern_dump_finish_pattern(dump);
855 * Calculates the pattern history.
857 void stat_calc_pattern_history(ir_graph *irg)
862 if (! status->enable)
865 /* do NOT count the const code IRG */
866 if (irg == get_const_code_irg())
869 for (i = status->min_depth; i <= status->max_depth; ++i) {
871 irg_walk_graph(irg, calc_nodes_pattern, NULL, &env);
876 * Initializes the pattern history.
878 void stat_init_pattern_history(int enable)
880 HASH_MAP(pattern_entry_t) *pattern_hash = NULL;
882 status->enable = enable;
887 status->options = /* OPT_WITH_MODE | */ OPT_ENC_DAG | OPT_WITH_ICONST | OPT_PERSIST_PATTERN;
888 status->min_depth = 3;
889 status->max_depth = 5;
891 obstack_init(&status->obst);
893 /* create the hash-table */
894 if (status->options & OPT_PERSIST_PATTERN)
895 pattern_hash = read_pattern("pattern.fps");
896 if (pattern_hash == NULL)
897 pattern_hash = new_pset(pattern_cmp, 8);
898 status->pattern_hash = pattern_hash;
902 * Finish the pattern history.
904 void stat_finish_pattern_history(const char *fname)
907 if (! status->enable)
910 store_pattern("pattern.fps");
911 pattern_output("pattern.vcg");
913 del_pset(status->pattern_hash);
914 obstack_free(&status->obst, NULL);