X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fopt%2Fgvn_pre.c;h=51d33d3860fcd480e1073bec9adc430253f46117;hb=01c4c27c4b7824dd0020f6fd2218edbe9ab40548;hp=c7537e1d9f0d00b34bbe94eb5c2960f231d02b85;hpb=b5da39413a84ef8ab8a4c0eab12103b1c1675938;p=libfirm diff --git a/ir/opt/gvn_pre.c b/ir/opt/gvn_pre.c index c7537e1d9..51d33d386 100644 --- a/ir/opt/gvn_pre.c +++ b/ir/opt/gvn_pre.c @@ -52,31 +52,30 @@ #define MAX_ANTIC_ITER 10 #define MAX_INSERT_ITER 3 -/* Infinite loops will be unrolled during antic iteration and - will iterate until otherwise stopped. - This also leaves every possible values of iteration variables in antic_in. -*/ +/* Stops antic iteration from processing endless loops. */ +#define IGNORE_INF_LOOPS 1 +/* Stops antic information to flow over infinite loop backedge */ #define NO_INF_LOOPS 0 /* Attempt to reduce register pressure and reduce code size for hoisted nodes. */ -#define HOIST_HIGH 0 +#define HOIST_HIGH 1 #define COMMON_DOM 1 /* Seamless implementation of handling loads and generally memory dependent nodes with GVN-PRE. */ -#define LOADS 0 +#define LOADS 1 #define DIVMODS 0 /* Experimental */ -#define MIN_CUT 0 -#define OLD_DIVMODS 0 -#define NO_INF_LOOPS2 0 +/* Only optimize node directly after phi node if node is only successor. */ +#define MIN_CUT 0 -/* NIY Choose to be optimized nodes in a more sophisticated way - to reduce number of newly introduced phi nodes. */ -#define BETTER_GREED 0 +/* GVN is very limited. This enables optimize_node during value recognition. + GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2. + TODO Broken for yet unknown reasons. */ +#define OPTIMIZE_NODES 0 /** Additional info we need for every block. */ @@ -119,6 +118,10 @@ typedef struct pre_env { char changes; /* flag for fixed point iterations - non-zero if changes occurred */ char first_iter; /* non-zero for first fixed point iteration */ int iteration; /* iteration counter */ +#if OPTIMIZE_NODES + pset *value_table; /* standard value table*/ + pset *gvnpre_values; /* gvnpre value table */ +#endif } pre_env; static pre_env *environ; @@ -149,20 +152,20 @@ typedef struct gvnpre_statistics { int infinite_loops; } gvnpre_statistics; -gvnpre_statistics *gvnpre_stats = NULL; +static gvnpre_statistics *gvnpre_stats = NULL; -static void init_stats() +static void init_stats(void) { gvnpre_stats = XMALLOCZ(gvnpre_statistics); } -static void free_stats() +static void free_stats(void) { free(gvnpre_stats); gvnpre_stats = NULL; } -static void print_stats() +static void print_stats(void) { gvnpre_statistics *stats = gvnpre_stats; DB((dbg, LEVEL_1, "replaced : %d\n", stats->replaced)); @@ -256,10 +259,12 @@ static int compare_gvn_identities(const void *elt, const void *key) /* memops are not the same, even if we want to optimize them we have to take the order in account */ - if (is_memop(a) || is_memop(b)) { + if (is_memop(a) || is_memop(b) || + get_irn_mode(a) == mode_T || + get_irn_mode(b) == mode_T) { /* Loads with the same predecessors are the same value; this should only happen after phi translation. */ - if (! is_Load(a) || ! is_Load(b)) + if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b))) return 1; } @@ -357,7 +362,7 @@ static ir_node *remember(ir_node *irn) in[i] = pred_value; } - if (changed) { + if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) { /* create representative for */ ir_node *nn = new_ir_node( get_irn_dbg_info(irn), @@ -369,6 +374,14 @@ static ir_node *remember(ir_node *irn) in); copy_node_attr(environ->graph, irn, nn); +#if OPTIMIZE_NODES + /* TODO optimize_node() uses the valuetable and thus the custom + identify_cmp() and will fail trying. */ + environ->graph->value_table = environ->value_table; + nn = optimize_node(nn); + environ->graph->value_table = environ->gvnpre_values; +#endif + /* now the value can be determined because the predecessors are the leaders */ value = identify_remember(nn); @@ -596,7 +609,7 @@ static void analyse_loops(ir_graph *irg) ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK); } -#if NO_INF_LOOPS || NO_INF_LOOPS2 +#if IGNORE_INF_LOOPS || NO_INF_LOOPS /** * Returns non-zero if block is part of an infinite loop. */ @@ -631,7 +644,7 @@ static unsigned is_nice_value(ir_node *n) if (is_Phi(n)) return 1; -#if LOADS || OLD_DIVMODS || DIVMODS +#if LOADS || DIVMODS if (is_Proj(n) && mode != mode_X && mode != mode_T) return 1; #else @@ -642,6 +655,8 @@ static unsigned is_nice_value(ir_node *n) #if LOADS if (is_Load(n)) return get_Load_volatility(n) == volatility_non_volatile; + if (is_Store(n)) + return get_Store_volatility(n) == volatility_non_volatile; #endif if (get_irn_pinned(n) == op_pin_state_pinned) @@ -675,6 +690,8 @@ static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *val /* filter loads with no phi predecessor from antic_in */ if (is_Load(n) && ! is_Phi(get_Load_mem(n))) return 0; + if (is_Store(n) && ! is_Phi(get_Store_mem(n))) + return 0; #endif #if DIVMODS @@ -731,20 +748,28 @@ static void topo_walker(ir_node *irn, void *ctx) if (is_Block(irn)) return; +#if OPTIMIZE_NODES + environ->graph->value_table = environ->value_table; + identify_remember(irn); + environ->graph->value_table = environ->gvnpre_values; +#endif + /* GVN step: remember the value. */ value = remember(irn); - /* values that are not in antic_in also dont't need to be in any other set */ - if (! is_nice_value(irn)) - return; - if (is_irn_constlike(irn)) return; block = get_nodes_block(irn); info = get_block_info(block); - ir_valueset_insert(info->avail_out, value, irn); + if (get_irn_mode(irn) != mode_X) + ir_valueset_insert(info->avail_out, value, irn); + + /* values that are not in antic_in also dont't need to be in any other set */ + + if (! is_nice_value(irn)) + return; if (is_clean_in_block(irn, block, info->exp_gen)) { DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block)); @@ -792,70 +817,6 @@ static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans) ir_nodehashmap_insert(map, node, trans); } -#if OLD_DIVMODS -/* Helper function to compare the values of pred and avail_pred. */ -static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos) -{ - ir_node *avail_value = identify(avail_pred); - ir_node *pred_block = get_Block_cfgpred_block(block, pos); - ir_node *trans_pred = get_translated(pred_block, pred); - ir_node *value; - - if (trans_pred == NULL) - trans_pred = pred; - value = identify(trans_pred); - - DB((dbg, LEVEL_3, "manual compare %+F %+F\n", pred, avail_pred)); - - return (value == avail_value); -} - -/** - * Does phi translation for redundant Div/Mod nodes only. - * Returns NULL for non-redundant node, which needs to be phi translated. - */ -static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos) -{ - ir_node *mem = get_memop_mem(divmod); - ir_node *trans = get_translated_pred(block, pos, mem); - - if (trans == NULL) - trans = mem; - - /* no partial redundancy if this is a mode_M phi */ - if (is_Proj(trans)) { - /* The last memory operation in predecessor block */ - ir_node *avail_op = get_Proj_pred(trans); - - if (get_irn_op(divmod) == get_irn_op(avail_op)) { - unsigned left, right; - - if (is_Div(avail_op)) { - if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) && - get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) { - - left = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos); - right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos); - - if (left && right) - return avail_op; - } - } else if (is_Mod(avail_op)) { - if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) { - - left = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos); - right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos); - - if (left && right) - return avail_op; - } - } - } - } - return NULL; -} -#endif - /** * Translates an expression above a Phi. * @@ -882,14 +843,6 @@ static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valuese } arity = get_irn_arity(node); -#if OLD_DIVMODS - if (is_Div(node) || is_Mod(node)) { - ir_node *avail_op = phi_translate_divmod(node, block, pos); - if (avail_op) - return avail_op; - } -#endif - needed = 0; in = ALLOCANZ(ir_node *, arity); @@ -935,14 +888,16 @@ static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valuese skip them and use the loads memory. */ if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) { #if LOADS || DIVMODS - ir_node *load = get_Proj_pred(pred_trans); + ir_node *loadstore = get_Proj_pred(pred_trans); /* If we do not translate this node, we will get its value wrong. */ needed |= 1; - if (is_Load(load)) { + if (is_Load(loadstore)) { /* Put new load under the adjacent loads memory edge such that GVN may compare them. */ - new_pred = get_Load_mem(load); + new_pred = get_Load_mem(loadstore); + } else if (is_Store(loadstore)) { + new_pred = get_Store_mem(loadstore); } #endif } else { @@ -1022,7 +977,7 @@ static void compute_antic(ir_node *block, void *ctx) /* add exp_gen */ if (env->first_iter) { -#if NO_INF_LOOPS +#if IGNORE_INF_LOOPS /* keep antic_in of infinite loops empty */ if (! is_in_infinite_loop(block)) { foreach_valueset(info->exp_gen, value, expr, iter) { @@ -1055,6 +1010,7 @@ static void compute_antic(ir_node *block, void *ctx) if (trans == NULL) trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in); + /* create new value if necessary */ trans_value = identify_or_remember(trans); @@ -1068,9 +1024,9 @@ static void compute_antic(ir_node *block, void *ctx) represent = expr; if (is_clean_in_block(expr, block, info->antic_in)) { -#if NO_INF_LOOPS2 - /* no flow over the backedge of endless loops */ - if (env->iteration <= 2 || ! (! is_in_infinite_loop(succ) || ! is_backedge(succ, pos))) { +#if NO_INF_LOOPS + /* Prevent information flow over the backedge of endless loops. */ + if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) { ir_valueset_replace(info->antic_in, trans_value, represent); } #else @@ -1238,13 +1194,6 @@ static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *v } } -#if BETTER_GREED - /* value is redundant from last iteration, - but has not been removed from antic_in (is not optimized) */ - if (! environ->first_iter && is_redundant(block, expr)) - return mode; -#endif - /* If it is not the same value already existing along every predecessor and it is defined by some predecessor then it is partially redundant. */ if (! partially_redundant || fully_redundant) @@ -1280,21 +1229,6 @@ static void update_new_set(ir_node *block, ir_node *idom) #endif } /* update_new_set */ -#if BETTER_GREED -/* - * Returns redundant flag of node irn in block block. - */ -static unsigned is_redundant(ir_node *block, ir_node *irn) -{ - (void) block; - (void) irn; - - /* TODO Needs to use a flag, because antic_done should only be used - if node is finally processed by insert_nodes. */ - return 0; -} -#endif - /** * Checks if hoisting irn is greedy. * Greedy hoisting means that there are non partially redundant nodes @@ -1372,6 +1306,7 @@ static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block) avail = ir_valueset_lookup(pred_info->avail_out, trans_val); + DB((dbg, LEVEL_3, "avail %+F\n", avail)); if (! avail) return 1; #if MIN_CUT @@ -1410,9 +1345,6 @@ static void insert_nodes_walker(ir_node *block, void *ctx) ir_node *idom; int pos; ir_valueset_iterator_t iter; -#if BETTER_GREED - plist_t *stack; -#endif /* only blocks */ if (! is_Block(block)) @@ -1437,14 +1369,6 @@ static void insert_nodes_walker(ir_node *block, void *ctx) return; } -#if BETTER_GREED - stack = plist_new(); - foreach_valueset(info->antic_in, value, expr, iter) { - /* inverse topologic */ - plist_insert_front(stack, expr); - } -#endif - /* This is the main reason antic_in is preverred over antic_out; we may iterate over every anticipated value first and not over the predecessor blocks. */ @@ -1473,25 +1397,16 @@ static void insert_nodes_walker(ir_node *block, void *ctx) continue; } -#if !BETTER_GREED if (is_hoisting_greedy(expr, block)) { DB((dbg, LEVEL_2, "greedy\n")); continue; } -#endif mode = is_partially_redundant(block, expr, value); if (mode == NULL) continue; -#if BETTER_GREED - if (is_hoisting_greedy(expr, block)) { - DB((dbg, LEVEL_2, "Better greed: greedy\n")); - continue; - } -#endif - -#if LOADS || OLD_DIVMODS || DIVMODS +#if LOADS || DIVMODS /* save old mode_M phis to remove keepalive edges later */ if (is_memop(expr)) { ir_node *mem = get_memop_mem(expr); @@ -1507,7 +1422,7 @@ static void insert_nodes_walker(ir_node *block, void *ctx) inc_stats(gvnpre_stats->first_iter_found); inc_stats(gvnpre_stats->partially); } - if (is_Load(expr)) + if (is_Load(expr) || is_Store(expr)) inc_stats(gvnpre_stats->loads); else if (is_Div(expr) || is_Mod(expr)) inc_stats(gvnpre_stats->divmods); @@ -1638,50 +1553,6 @@ static void insert_nodes_walker(ir_node *block, void *ctx) ir_valueset_insert(info->antic_done, value, expr); env->changes |= 1; } - -#if BETTER_GREED - /* TODO Unfinished - Better greed first determines which values are redundant - and decides then which to take. - insert_nodes needs to be split up for that. The cycle could be - for each block: flag redundant nodes, - use heuristic to adjust these flags (also consider antic_done), - do insert nodes. - This way we could decide if we should hoist a non redundant node, - if all its successors are redundant. - Or we might try to minimize the cut along hoisted nodes and their - non redundant successors. - */ - if (env->changes) { - plist_element_t *it; - /* iterate in inverse topological order */ - foreach_plist(stack, it) { - ir_node *irn = (ir_node *)plist_element_get_value(it); - ir_node *block = get_nodes_block(irn); - int j; - char redundant = 1; - - /* does irn only have redundant successors? */ - - foreach_out_edge(irn, edge) { - ir_node *succ = get_edge_src_irn(edge); - - /* if succ and irn are in the same block */ - if (get_nodes_block(succ) == block && is_redundant(block, succ)) { - continue; - } else { - redundant = 0; - break; - } - } - - if (redundant) - flag_redundant(irn, 1); - } - } - plist_free(stack); -#endif - } #if HOIST_HIGH @@ -1885,6 +1756,7 @@ static void hoist_high(ir_node *block, void *ctx) be available from this point on. Currently we do not push the availability information through during the walk. */ ir_valueset_insert(target_info->new_set, value, nn); + ir_valueset_insert(target_info->avail_out, value, nn); } } } @@ -2119,15 +1991,24 @@ void do_gvn_pre(ir_graph *irg) /* Detect and set links of infinite loops to non-zero. */ analyse_loops(irg); +#if OPTIMIZE_NODES + new_identities(irg); + env.value_table = irg->value_table; + irg->value_table = NULL; +#endif + /* Switch on GCSE. We need it to correctly compute the value of a node, which is independent from its block. */ set_opt_global_cse(1); - /* new_identities */ + /* new_identities() */ if (irg->value_table != NULL) del_pset(irg->value_table); /* initially assumed nodes in pset are 512 */ irg->value_table = new_pset(compare_gvn_identities, 512); +#if OPTIMIZE_NODES + env.gvnpre_values = irg->value_table; +#endif /* do GVN-PRE pass */ gvn_pre(irg, &env); @@ -2149,6 +2030,12 @@ void do_gvn_pre(ir_graph *irg) restore_optimization_state(&state); confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE); +#if OPTIMIZE_NODES + irg->value_table = env.value_table; + del_pset(irg->value_table); + irg->value_table = env.gvnpre_values; +#endif + /* TODO There seem to be optimizations that try to use the existing value_table. */ new_identities(irg);