From f4ae1b7b653bcd43b99bc81b40425ab595a0dc7d Mon Sep 17 00:00:00 2001 From: =?utf8?q?G=C3=B6tz=20Lindenmaier?= Date: Fri, 8 Mar 2002 14:39:58 +0000 Subject: [PATCH] Implemented cf optimizations. Checked compiler flags -- sorted better to fit optimizations. [r325] --- ir/ir/ircons.c | 4 - ir/ir/irdump.c | 19 ++- ir/ir/irdump.h | 2 + ir/ir/irflag.c | 9 ++ ir/ir/irflag.h | 4 + ir/ir/irgmod.c | 3 +- ir/ir/irgopt.c | 325 ++++++++++++++++++++++++++++++++++++++++++++++-- ir/ir/irgopt.h | 11 ++ ir/ir/irgwalk.h | 4 +- ir/ir/irnode.c | 8 +- ir/ir/irnode.h | 1 + ir/ir/iropt.c | 310 +++++++++++++++++++++++---------------------- ir/ir/iropt_t.h | 9 +- 13 files changed, 526 insertions(+), 183 deletions(-) diff --git a/ir/ir/ircons.c b/ir/ir/ircons.c index 3e458116c..a974d91ce 100644 --- a/ir/ir/ircons.c +++ b/ir/ir/ircons.c @@ -470,10 +470,6 @@ new_r_Raise (ir_graph *irg, ir_node *block, ir_node *store, ir_node *obj) ir_node *in[2] = {store, obj}; ir_node *res; res = new_ir_node (irg, block, op_Raise, mode_T, 2, in); - - // DEBUG - fprintf (stdout, "%s: res = %p\n", __PRETTY_FUNCTION__, res); - res = optimize (res); irn_vrfy (res); return res; diff --git a/ir/ir/irdump.c b/ir/ir/irdump.c index 48086dcdf..d426237fd 100644 --- a/ir/ir/irdump.c +++ b/ir/ir/irdump.c @@ -67,6 +67,9 @@ #define PRINT_NODEID(X) fprintf(F, "%p", X) #endif +/* A suffix to manipulate the file name. */ +char *dump_file_suffix = NULL; + /* file to dump to */ static FILE *F; @@ -767,21 +770,17 @@ void vcg_open (ir_graph *irg, char *suffix) { /** open file for vcg graph */ ent = get_irg_ent(irg); id = ent->ld_name ? ent->ld_name : ent->name; - /* Don't use get_entity_ld_ident (ent) as it computes the mangled name! */ + /* Don't use get_entity_ld_ident (ent) as it computes the mangled name! */ len = id_to_strlen (id); cp = id_to_str (id); - fname = malloc (len + 5 + strlen(suffix)); + if (dump_file_suffix) + fname = malloc (len + 5 + strlen(suffix) + strlen(dump_file_suffix)); + else + fname = malloc (len + 5 + strlen(suffix)); strncpy (fname, cp, len); /* copy the filename */ fname[len] = '\0'; + if (dump_file_suffix) strcat (fname, dump_file_suffix); /* append file suffix */ strcat (fname, suffix); /* append file suffix */ - - fname = malloc (len + 5 + strlen(suffix)); - strncpy (fname, cp, len); /* copy the filename */ - fname[len] = '\0'; /* ensure string termination */ - /*strcpy (fname, cp); * copy the filename * - this produces wrong, too long strings in conjuction with the - jocca frontend. The \0 seems to be missing. */ - strcat (fname, suffix); /* append file suffix */ strcat (fname, ".vcg"); /* append the .vcg suffix */ F = fopen (fname, "w"); /* open file for writing */ if (!F) { diff --git a/ir/ir/irdump.h b/ir/ir/irdump.h index 1ad604cdc..902425e67 100644 --- a/ir/ir/irdump.h +++ b/ir/ir/irdump.h @@ -41,6 +41,8 @@ void dump_ir_block_edge(ir_node *n); /* dump edges to our inputs */ void dump_ir_data_edges(ir_node *n); #endif +/* @@@ GL: A hack */ +extern char *dump_file_suffix; /****m* irdump/dump_ir_graph * diff --git a/ir/ir/irflag.c b/ir/ir/irflag.c index 2bdd3198f..18619b277 100644 --- a/ir/ir/irflag.c +++ b/ir/ir/irflag.c @@ -22,6 +22,7 @@ int opt_global_cse = 0; /* Don't use block predecessor for compariso /* @@@ 0 solage code placement fehlt */ int opt_constant_folding = 1; /* Evaluate operations */ int opt_unreachable_code = 1; /* Bad node propagation */ +int opt_control_flow = 1; /* control flow optimizations. */ int opt_dead_node_elimination = 1; /* Reclaim memory */ int opt_reassociation = 1; /* Reassociate nodes */ int opt_inline = 1; /* Do inlining transformation */ @@ -73,6 +74,14 @@ get_opt_unreachable_code(void) return opt_unreachable_code; } +inline void set_opt_control_flow(int value) { + opt_control_flow = value; +} + +inline int get_opt_control_flow(void) { + return opt_control_flow; +} + inline void set_opt_reassociation(int value) { diff --git a/ir/ir/irflag.h b/ir/ir/irflag.h index e962e7f0c..349e2b03e 100644 --- a/ir/ir/irflag.h +++ b/ir/ir/irflag.h @@ -47,6 +47,10 @@ int get_opt_global_cse (void); inline void set_opt_unreachable_code(int value); inline int get_opt_unreachable_code(void); +/* Performs Straightening, if simplifications and loop simplifications. */ +inline void set_opt_control_flow(int value); +inline int get_opt_control_flow(void); + /* If opt_reassociation == 1 reassociation is performed. Default: opt_reassociation == 1. */ inline void set_opt_reassociation(int value); diff --git a/ir/ir/irgmod.c b/ir/ir/irgmod.c index bdd782af6..b82eb5709 100644 --- a/ir/ir/irgmod.c +++ b/ir/ir/irgmod.c @@ -52,9 +52,8 @@ exchange (ir_node *old, ir_node *new) old->in[1] = new; } - /**********************************************************************/ -/* Funcionality for collect_phis */ +/* Functionality for collect_phis */ /**********************************************************************/ void diff --git a/ir/ir/irgopt.c b/ir/ir/irgopt.c index e1592014a..d0595cc0d 100644 --- a/ir/ir/irgopt.c +++ b/ir/ir/irgopt.c @@ -25,7 +25,7 @@ # include "irgmod.h" # include "array.h" # include "pset.h" -# include "pdeq.h" /* provisorisch fuer code placement */ +# include "pdeq.h" /* Fuer code placement */ # include "irouts.h" /* Defined in iropt.c */ @@ -33,15 +33,6 @@ pset *new_identities (void); void del_identities (pset *value_table); void add_identities (pset *value_table, ir_node *node); -#if 0 /* Warum ist das hier nochmal definiert? - Hat's nicht gelinkt wegen typeO tities - tity ?? */ -/* To fill the hash table */ -void -add_identity (pset *value_table, ir_node *n) { - /* identify_remember (value_table, n);*/ -} -#endif - /********************************************************************/ /* apply optimizations of iropt to all nodes. */ /********************************************************************/ @@ -80,6 +71,8 @@ local_optimize_graph (ir_graph *irg) { set_irg_pinned(current_ir_graph, floats); if (get_irg_outs_state(current_ir_graph) == outs_consistent) set_irg_outs_inconsistent(current_ir_graph); + if (get_irg_dom_state(current_ir_graph) == dom_consistent) + set_irg_dom_inconsistent(current_ir_graph); /* Clean the value_table in irg for the cse. */ del_identities(irg->value_table); @@ -241,7 +234,7 @@ copy_preds (ir_node *n, void *env) { void copy_graph () { ir_node *oe, *ne; /* old end, new end */ - ir_node *ka; /* keep alive */ + ir_node *ka; /* keep alive */ int i; oe = get_irg_end(current_ir_graph); @@ -524,7 +517,7 @@ void inline_method(ir_node *call, ir_graph *called_graph) { arity = get_irn_arity(end_bl); /* arity = n_exc + n_ret */ n_res = get_method_n_res(get_Call_type(call)); - res_pred = (ir_node **) malloc ((n_res) * sizeof (ir_node *)); + res_pred = (ir_node **) malloc (n_res * sizeof (ir_node *)); cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *)); set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */ @@ -988,6 +981,314 @@ void place_code(ir_graph *irg) { unnecessary executions of the node. */ place_late(); + set_irg_outs_inconsistent(current_ir_graph); del_pdeq (worklist); current_ir_graph = rem; } + + + +/********************************************************************/ +/* Control flow optimization. */ +/* Removes Bad control flow predecessors and empty blocks. A block */ +/* is empty if it contains only a Jmp node. */ +/* */ +/********************************************************************/ + + +static void merge_blocks(ir_node *n, void *env) { + int i; + set_irn_link(n, NULL); + + if (get_irn_op(n) == op_Block) { + /* Remove Tuples */ + for (i = 0; i < get_Block_n_cfgpreds(n); i++) + set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i))); + } else if (get_irn_mode(n) == mode_X) { + /* We will soon visit a block. Optimize it before visiting! */ + ir_node *b = get_nodes_Block(n); + ir_node *new = equivalent_node(b); + while (irn_not_visited(b) && (!is_Bad(new)) && (new != b)) { + /* We would have to run gigo if new is bad. */ + if (get_optimize() && get_opt_control_flow()) exchange (b, new); + b = new; + new = equivalent_node(b); + } + if (is_Bad(new)) exchange (n, new_Bad()); + } +} + +static void collect_nodes(ir_node *n, void *env) { + int i; + if (is_no_Block(n)) { + ir_node *b = get_nodes_Block(n); + + if ((get_irn_op(n) == op_Phi)) { + /* Collect Phi nodes to compact ins along with block's ins. */ + set_irn_link(n, get_irn_link(b)); + set_irn_link(b, n); + } else if (get_irn_op(n) != op_Jmp) { /* Check for non empty block. */ + mark_Block_block_visited(b); + } + } +} + +/* Returns true if pred is pred of block */ +int is_pred_of(ir_node *pred, ir_node *b) { + int i; + for (i = 0; i < get_Block_n_cfgpreds(b); i++) { + ir_node *b_pred = get_nodes_Block(get_Block_cfgpred(b, i)); + if (b_pred == pred) return 1; + } + return 0; +} + +int test_whether_dispensable(ir_node *b, int pos) { + int i, j, n_preds = 1; + int dispensable = 1; + ir_node *cfop = get_Block_cfgpred(b, pos); + ir_node *pred = get_nodes_Block(cfop); + + if (get_Block_block_visited(pred) + 1 + < get_irg_block_visited(current_ir_graph)) { + if (!get_optimize() || !get_opt_control_flow()) { + /* Mark block so that is will not be removed. */ + set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1); + return 1; + } + /* Seems to be empty. */ + if (!get_irn_link(b)) { + /* There are no Phi nodes ==> dispensable. */ + n_preds = get_Block_n_cfgpreds(pred); + } else { + /* b's pred blocks and pred's pred blocks must be pairwise disjunct. + Work preds < pos as if they were already removed. */ + for (i = 0; i < pos; i++) { + ir_node *b_pred = get_nodes_Block(get_Block_cfgpred(b, i)); + if (get_Block_block_visited(b_pred) + 1 + < get_irg_block_visited(current_ir_graph)) { + for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) { + ir_node *b_pred_pred = get_nodes_Block(get_Block_cfgpred(b_pred, j)); + if (is_pred_of(b_pred_pred, pred)) dispensable = 0; + } + } else { + if (is_pred_of(b_pred, pred)) dispensable = 0; + } + } + for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) { + ir_node *b_pred = get_nodes_Block(get_Block_cfgpred(b, i)); + if (is_pred_of(b_pred, pred)) dispensable = 0; + } + if (!dispensable) { + set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1); + n_preds = 1; + } else { + n_preds = get_Block_n_cfgpreds(pred); + } + } + } + + return n_preds; +} + +void optimize_blocks(ir_node *b, void *env) { + int i, j, k, max_preds, n_preds; + ir_node *pred, *phi; + ir_node **in; + + max_preds = 0; + for (i = 0; i < get_Block_n_cfgpreds(b); i++) { + pred = get_Block_cfgpred(b, i); + max_preds += test_whether_dispensable(b, i); + } + in = (ir_node **) malloc(max_preds * sizeof(ir_node *)); + + + /** Debug output ** + printf(" working on "); DDMN(b); + for (i = 0; i < get_Block_n_cfgpreds(b); i++) { + pred = get_nodes_Block(get_Block_cfgpred(b, i)); + if (is_Bad(get_Block_cfgpred(b, i))) { + printf(" removing Bad %i\n ", i); + } else if (get_Block_block_visited(pred) +1 + < get_irg_block_visited(current_ir_graph)) { + printf(" removing pred %i ", i); DDMN(pred); + } else { printf(" Nothing to do for "); DDMN(pred); } + } + ** end Debug output **/ + + /** Fix the Phi nodes **/ + phi = get_irn_link(b); + while (phi) { + assert(get_irn_op(phi) == op_Phi); + /* Find the new predecessors for the Phi */ + n_preds = 0; + for (i = 0; i < get_Block_n_cfgpreds(b); i++) { + pred = get_nodes_Block(get_Block_cfgpred(b, i)); + if (is_Bad(get_Block_cfgpred(b, i))) { + /* Do nothing */ + } else if (get_Block_block_visited(pred) +1 + < get_irg_block_visited(current_ir_graph)) { + /* It's an empty block and not yet visited. */ + ir_node *phi_pred = get_Phi_pred(phi, i); + for (j = 0; j < get_Block_n_cfgpreds(pred); j++) { + if (get_nodes_Block(phi_pred) == pred) { + assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */ + in[n_preds] = get_Phi_pred(phi_pred, j); + } else { + in[n_preds] = phi_pred; + } + n_preds++; + } +#if 0 + /* @@@ hier brauche ich schleifeninformation!!! Wenn keine Rueckwaertskante + dann darfs auch keine Verwendung geben. */ + if (get_nodes_Block(phi_pred) == pred) { + /* remove the Phi as it might be kept alive. Further there + might be other users. */ + exchange(phi_pred, phi); /* geht, is aber doch semantisch falsch! */ + } +#endif + } else { + in[n_preds] = get_Phi_pred(phi, i); + n_preds ++; + } + } + /* Fix the node */ + set_irn_in(phi, n_preds, in); + phi = get_irn_link(phi); + } + + /** Move Phi nodes from removed blocks to this one. + This happens only if merge between loop backedge and single loop entry. **/ + for (k = 0; k < get_Block_n_cfgpreds(b); k++) { + pred = get_nodes_Block(get_Block_cfgpred(b, k)); + if (get_Block_block_visited(pred) +1 + < get_irg_block_visited(current_ir_graph)) { + phi = get_irn_link(pred); + while (phi) { + if (get_irn_op(phi) == op_Phi) { + set_nodes_Block(phi, b); + + n_preds = 0; + for (i = 0; i < k; i++) { + pred = get_nodes_Block(get_Block_cfgpred(b, i)); + if (is_Bad(get_Block_cfgpred(b, i))) { + /* Do nothing */ + } else if (get_Block_block_visited(pred) +1 + < get_irg_block_visited(current_ir_graph)) { + /* It's an empty block and not yet visited. */ + for (j = 0; j < get_Block_n_cfgpreds(pred); j++) { + /* @@@ Hier brauche ich schleifeninformation!!! Kontrllflusskante + muss Rueckwaertskante sein! (An allen vier in[n_preds] = phi + Anweisungen.) Trotzdem tuts bisher!! */ + in[n_preds] = phi; + n_preds++; + } + } else { + in[n_preds] = phi; + n_preds++; + } + } + for (i = 0; i < get_Phi_n_preds(phi); i++) { + in[n_preds] = get_Phi_pred(phi, i); + n_preds++; + } + for (i = k+1; i < get_Block_n_cfgpreds(b); i++) { + pred = get_nodes_Block(get_Block_cfgpred(b, i)); + if (is_Bad(get_Block_cfgpred(b, i))) { + /* Do nothing */ + } else if (get_Block_block_visited(pred) +1 + < get_irg_block_visited(current_ir_graph)) { + /* It's an empty block and not yet visited. */ + for (j = 0; j < get_Block_n_cfgpreds(pred); j++) { + in[n_preds] = phi; + n_preds++; + } + } else { + in[n_preds] = phi; + n_preds++; + } + } + set_irn_in(phi, n_preds, in); + } + phi = get_irn_link(phi); + } + } + } + + /** Fix the block **/ + n_preds = 0; + for (i = 0; i < get_Block_n_cfgpreds(b); i++) { + pred = get_nodes_Block(get_Block_cfgpred(b, i)); + if (is_Bad(get_Block_cfgpred(b, i))) { + /* Do nothing */ + } else if (get_Block_block_visited(pred) +1 + < get_irg_block_visited(current_ir_graph)) { + /* It's an empty block and not yet visited. */ + assert(get_Block_n_cfgpreds(b) > 1); + /* Else it should be optimized by equivalent_node. */ + for (j = 0; j < get_Block_n_cfgpreds(pred); j++) { + in[n_preds] = get_Block_cfgpred(pred, j); + n_preds++; + } + /* Remove block as it might be kept alive. */ + exchange(pred, b/*new_Bad()*/); + } else { + in[n_preds] = get_Block_cfgpred(b, i); + n_preds ++; + } + } + set_irn_in(b, n_preds, in); + free(in); +} + +void optimize_cf(ir_graph *irg) { + int i; + ir_node **in; + ir_node *end = get_irg_end(irg); + ir_graph *rem = current_ir_graph; + current_ir_graph = irg; + + + /* Handle graph state */ + assert(get_irg_phase_state(irg) != phase_building); + if (get_irg_outs_state(current_ir_graph) == outs_consistent) + set_irg_outs_inconsistent(current_ir_graph); + if (get_irg_dom_state(current_ir_graph) == dom_consistent) + set_irg_dom_inconsistent(current_ir_graph); + + //DDME(get_irg_ent(irg)); + + /* Use block visited flag to mark non-empty blocks. */ + inc_irg_block_visited(irg); + irg_walk(end, merge_blocks, collect_nodes, NULL); + + /* Optimize the standard code. */ + irg_block_walk(get_irg_end_block(irg), optimize_blocks, NULL, NULL); + + /* Walk all keep alives, optimize them if block, add to new in-array + for end if useful. */ + in = NEW_ARR_F (ir_node *, 1); + in[0] = get_nodes_Block(end); + + inc_irg_visited(current_ir_graph); + for(i = 0; i < get_End_n_keepalives(end); i++) { + ir_node *ka = get_End_keepalive(end, i); + if (irn_not_visited(ka)) + if ((get_irn_op(ka) == op_Block) && Block_not_block_visited(ka)) { + set_irg_block_visited(current_ir_graph, /* Don't walk all the way to Start. */ + get_irg_block_visited(current_ir_graph)-1); + irg_block_walk(ka, optimize_blocks, NULL, NULL); + mark_irn_visited(ka); + ARR_APP1 (ir_node *, in, ka); + } else if (get_irn_op(ka) == op_Phi) { + mark_irn_visited(ka); + ARR_APP1 (ir_node *, in, ka); + } + } + /* DEL_ARR_F(end->in); GL @@@ tut nicht ! */ + end->in = in; + + current_ir_graph = rem; +} diff --git a/ir/ir/irgopt.h b/ir/ir/irgopt.h index 9bdd4f9d5..8b1504880 100644 --- a/ir/ir/irgopt.h +++ b/ir/ir/irgopt.h @@ -85,6 +85,17 @@ void inline_small_irgs(ir_graph *irg, int size); for all operations. */ void place_code(ir_graph *irg); +/********************************************************************/ +/* Control flow optimization. */ +/* Removes empty blocks doing if simplifications and loop simpli- */ +/* fications. A block is empty if it contains only a Jmp node and */ +/* Phi nodes. */ +/* Merges single entry single exit blocks with their predecessor */ +/* and propagates dead control flow by calling equivalent_node. */ +/* Independent of compiler flag it removes Tuples from cf edges, */ +/* Bad predecessors form blocks and unnecessary predecessors of End.*/ +/********************************************************************/ +void optimize_cf(ir_graph *irg); # endif /* _IRGOPT_H_ */ diff --git a/ir/ir/irgwalk.h b/ir/ir/irgwalk.h index 2275b8347..b0227ebda 100644 --- a/ir/ir/irgwalk.h +++ b/ir/ir/irgwalk.h @@ -37,7 +37,9 @@ void all_irg_walk(void (pre)(ir_node*, void*), void (post)(ir_node*, void*), /* Walks only over Block nodes in the graph. Has it's own visited - flag, so that it can be interleaved with the other walker. */ + flag, so that it can be interleaved with the other walker. + If a none block is passed, starts at the block this node belongs to. + If end is passed also visites kept alive blocks. */ void irg_block_walk(ir_node *node, void (pre)(ir_node*, void*), void (post)(ir_node*, void*), void *env); diff --git a/ir/ir/irnode.c b/ir/ir/irnode.c index 45892adec..b71e625b5 100644 --- a/ir/ir/irnode.c +++ b/ir/ir/irnode.c @@ -85,10 +85,6 @@ new_ir_node (ir_graph *irg, ir_node *block, ir_op *op, ir_mode *mode, res = (ir_node *) obstack_alloc (irg->obst, node_size); - // DEBUG - if (op_Raise == op) - fprintf (stdout, "%s: res(%p) = %p\n", __PRETTY_FUNCTION__, op, res); - res->kind = k_ir_node; res->op = op; res->mode = mode; @@ -479,6 +475,10 @@ inline void mark_Block_block_visited (ir_node *node) { node->attr.block.block_visited = get_irg_block_visited(current_ir_graph); } +inline int Block_not_block_visited(ir_node *node) { + assert (node->op == op_Block); + return (node->attr.block.block_visited < get_irg_block_visited(current_ir_graph)); +} inline ir_node * get_Block_graph_arr (ir_node *node, int pos) { diff --git a/ir/ir/irnode.h b/ir/ir/irnode.h index 4d6e6a4b5..c11f51b11 100644 --- a/ir/ir/irnode.h +++ b/ir/ir/irnode.h @@ -151,6 +151,7 @@ inline unsigned long get_Block_block_visited (ir_node *node); inline void set_Block_block_visited (ir_node *node, unsigned long visit); /* For this current_ir_graph must be set. */ inline void mark_Block_block_visited(ir_node *node); +inline int Block_not_block_visited(ir_node *node); inline ir_node *get_Block_graph_arr (ir_node *node, int pos); inline void set_Block_graph_arr (ir_node *node, int pos, ir_node *value); diff --git a/ir/ir/iropt.c b/ir/ir/iropt.c index b37ed0454..83ffd7373 100644 --- a/ir/ir/iropt.c +++ b/ir/ir/iropt.c @@ -234,8 +234,6 @@ computed_value (ir_node *n) else /* Mod */ res = tarval_mod(ta, tb); } - } else { - /* printf(" # comp_val: Proj node, not optimized\n"); */ } } break; @@ -270,7 +268,7 @@ different_identity (ir_node *a, ir_node *b) new nodes. It is therefore safe to free N if the node returned is not N. If a node returns a Tuple we can not just skip it. If the size of the in array fits, we transform n into a tuple (e.g., Div). */ -static ir_node * +ir_node * equivalent_node (ir_node *n) { int ins; @@ -293,30 +291,45 @@ equivalent_node (ir_node *n) case iro_Block: { /* The Block constructor does not call optimize, but mature_block - calls the optimization. */ + calls the optimization. */ assert(get_Block_matured(n)); - /* A single entry Block following a single exit Block can be merged, - if it is not the Start block. */ + /* Straightening: a single entry Block following a single exit Block + can be merged, if it is not the Start block. */ /* !!! Beware, all Phi-nodes of n must have been optimized away. - This should be true, as the block is matured before optimize is called. + This should be true, as the block is matured before optimize is called. But what about Phi-cycles with the Phi0/Id that could not be resolved? - Remaining Phi nodes are just Ids. */ - if (get_Block_n_cfgpreds(n) == 1 - && get_irn_op(get_Block_cfgpred(n, 0)) == op_Jmp) { - n = get_nodes_Block(get_Block_cfgpred(n, 0)); - - } else if ((n != current_ir_graph->start_block) && - (n != current_ir_graph->end_block) ) { - int i; - /* If all inputs are dead, this block is dead too, except if it is + Remaining Phi nodes are just Ids. */ + if ((get_Block_n_cfgpreds(n) == 1) && + (get_irn_op(get_Block_cfgpred(n, 0)) == op_Jmp) && + (get_opt_control_flow())) { + n = get_nodes_Block(get_Block_cfgpred(n, 0)); + } else if ((get_Block_n_cfgpreds(n) == 2) && + (get_opt_control_flow())) { + /* Test whether Cond jumps twice to this block + @@@ we could do this also with two loops finding two preds from several ones. */ + a = get_Block_cfgpred(n, 0); + b = get_Block_cfgpred(n, 1); + if ((get_irn_op(a) == op_Proj) && + (get_irn_op(b) == op_Proj) && + (get_Proj_pred(a) == get_Proj_pred(b)) && + (get_irn_op(get_Proj_pred(a)) == op_Cond)) { + /* Also a single entry Block following a single exit Block. Phis have + twice the same operand and will be optimized away. */ + n = get_nodes_Block(a); + } + } else if (get_opt_unreachable_code() && + (n != current_ir_graph->start_block) && + (n != current_ir_graph->end_block) ) { + int i; + /* If all inputs are dead, this block is dead too, except if it is the start or end block. This is a step of unreachable code - elimination */ - for (i = 0; i < get_Block_n_cfgpreds(n); i++) { - if (!is_Bad(get_Block_cfgpred(n, i))) break; - } - if (i == get_Block_n_cfgpreds(n)) - n = new_Bad(); + elimination */ + for (i = 0; i < get_Block_n_cfgpreds(n); i++) { + if (!is_Bad(get_Block_cfgpred(n, i))) break; + } + if (i == get_Block_n_cfgpreds(n)) + n = new_Bad(); } } break; @@ -426,19 +439,11 @@ equivalent_node (ir_node *n) n_preds = get_Phi_n_preds(n); block = get_nodes_Block(n); - if (is_Bad(block)) return new_Bad(); - assert(get_irn_op (block) == op_Block); - - /* there should be no Phi nodes in the Start region. */ - if (block == current_ir_graph->start_block) { - n = new_Bad(); - break; - } + if ((is_Bad(block)) || /* Control dead */ + (block == current_ir_graph->start_block)) /* There should be no Phi nodes */ + return new_Bad(); /* in the Start Block. */ - if (n_preds == 0) { /* Phi of dead Region without predecessors. */ - /* GL: why not return new_Bad? */ - break; - } + if (n_preds == 0) break; /* Phi of dead Region without predecessors. */ #if 0 /* first we test for a special case: */ @@ -446,16 +451,16 @@ equivalent_node (ir_node *n) value that is known at a certain point. This is useful for dataflow analysis. */ if (n_preds == 2) { - ir_node *a = follow_Id (get_Phi_pred(n, 0)); - ir_node *b = follow_Id (get_Phi_pred(n, 1)); - if ( (get_irn_op(a) == op_Confirm) - && (get_irn_op(b) == op_Confirm) - && (follow_Id (get_irn_n(a, 0)) == follow_Id(get_irn_n(b, 0))) - && (get_irn_n(a, 1) == get_irn_n (b, 1)) - && (a->data.num == (~b->data.num & irpn_True) )) { - n = follow_Id (get_irn_n(a, 0)); - break; - } + ir_node *a = follow_Id (get_Phi_pred(n, 0)); + ir_node *b = follow_Id (get_Phi_pred(n, 1)); + if ( (get_irn_op(a) == op_Confirm) + && (get_irn_op(b) == op_Confirm) + && (follow_Id (get_irn_n(a, 0)) == follow_Id(get_irn_n(b, 0))) + && (get_irn_n(a, 1) == get_irn_n (b, 1)) + && (a->data.num == (~b->data.num & irpn_True) )) { + n = follow_Id (get_irn_n(a, 0)); + break; + } } #endif @@ -464,11 +469,11 @@ equivalent_node (ir_node *n) first_val = follow_Id(get_Phi_pred(n, i)); /* skip Id's */ set_Phi_pred(n, i, first_val); - if ( (first_val != n) /* not self pointer */ - && (get_irn_op(first_val) != op_Bad) /* value not dead */ - && !(is_Bad (get_Block_cfgpred(block, i))) ) { /* not dead control flow */ - break; /* then found first value. */ - } + if ( (first_val != n) /* not self pointer */ + && (get_irn_op(first_val) != op_Bad) /* value not dead */ + && !(is_Bad (get_Block_cfgpred(block, i))) ) { /* not dead control flow */ + break; /* then found first value. */ + } } /* A totally Bad or self-referencing Phi (we didn't break the above loop) */ @@ -477,27 +482,27 @@ equivalent_node (ir_node *n) scnd_val = NULL; /* follow_Id () for rest of inputs, determine if any of these - are non-self-referencing */ + are non-self-referencing */ while (++i < n_preds) { scnd_val = follow_Id(get_Phi_pred(n, i)); /* skip Id's */ set_Phi_pred(n, i, scnd_val); if ( (scnd_val != n) - && (scnd_val != first_val) - && (get_irn_op(scnd_val) != op_Bad) - && !(is_Bad (get_Block_cfgpred(block, i))) ) { + && (scnd_val != first_val) + && (get_irn_op(scnd_val) != op_Bad) + && !(is_Bad (get_Block_cfgpred(block, i))) ) { break; - } + } } /* Fold, if no multiple distinct non-self-referencing inputs */ if (i >= n_preds) { - n = first_val; + n = first_val; } else { - /* skip the remaining Ids. */ - while (++i < n_preds) { - set_Phi_pred(n, i, follow_Id(get_Phi_pred(n, i))); - } + /* skip the remaining Ids. */ + while (++i < n_preds) { + set_Phi_pred(n, i, follow_Id(get_Phi_pred(n, i))); + } } } break; @@ -533,12 +538,12 @@ equivalent_node (ir_node *n) /* We have twice exactly the same store -- a write after write. */ n = a; } else if (get_irn_op(c) == op_Load - && (a == c || skip_Proj(get_Load_mem(c)) == a) + && (a == c || skip_Proj(get_Load_mem(c)) == a) && get_Load_ptr(c) == b ) - /* !!!??? and a cryptic test */ { + /* !!!??? and a cryptic test */ { /* We just loaded the value from the same memory, i.e., the store doesn't change the memory -- a write after read. */ - a = get_Store_mem(n); + a = get_Store_mem(n); turn_into_tuple(n, 2); set_Tuple_pred(n, 0, a); set_Tuple_pred(n, 1, new_Bad()); @@ -552,16 +557,16 @@ equivalent_node (ir_node *n) if ( get_irn_op(a) == op_Tuple) { /* Remove the Tuple/Proj combination. */ - if ( get_Proj_proj(n) <= get_Tuple_n_preds(a) ) { - n = get_Tuple_pred(a, get_Proj_proj(n)); - } else { + if ( get_Proj_proj(n) <= get_Tuple_n_preds(a) ) { + n = get_Tuple_pred(a, get_Proj_proj(n)); + } else { assert(0); /* This should not happen! */ - n = new_Bad(); - } + n = new_Bad(); + } } else if (get_irn_mode(n) == mode_X && - is_Bad(get_nodes_Block(n))) { + is_Bad(get_nodes_Block(n))) { /* Remove dead control flow. */ - n = new_Bad(); + n = new_Bad(); } } break; @@ -577,7 +582,7 @@ equivalent_node (ir_node *n) } /* end equivalent_node() */ -/* tries several [inplace] [optimizing] transformations and returns a +/* tries several [inplace] [optimizing] transformations and returns an equivalent node. The difference to equivalent_node is that these transformations _do_ generate new nodes, and thus the old node must not be freed even if the equivalent node isn't the old one. */ @@ -620,8 +625,8 @@ transform_node (ir_node *n) b = get_DivMod_right(n); mode = get_irn_mode(a); - if (!(mode_is_int(get_irn_mode(a)) - && mode_is_int(get_irn_mode(b)))) + if (!(mode_is_int(get_irn_mode(a)) && + mode_is_int(get_irn_mode(b)))) break; if (a == b) { @@ -633,23 +638,23 @@ transform_node (ir_node *n) tb = value_of(b); if (tb) { - if (tarval_classify(tb) == 1) { - b = new_Const (mode, tarval_from_long (mode, 0)); - evaluated = 1; - } else if (ta) { - tarval *resa, *resb; + if (tarval_classify(tb) == 1) { + b = new_Const (mode, tarval_from_long (mode, 0)); + evaluated = 1; + } else if (ta) { + tarval *resa, *resb; resa = tarval_div (ta, tb); if (!resa) break; /* Causes exception!!! Model by replacing through - Jmp for X result!? */ + Jmp for X result!? */ resb = tarval_mod (ta, tb); if (!resb) break; /* Causes exception! */ - a = new_Const (mode, resa); - b = new_Const (mode, resb); - evaluated = 1; - } + a = new_Const (mode, resa); + b = new_Const (mode, resb); + evaluated = 1; + } } else if (tarval_classify (ta) == 0) { b = a; - evaluated = 1; + evaluated = 1; } } if (evaluated) { /* replace by tuple */ @@ -671,7 +676,9 @@ transform_node (ir_node *n) a = get_Cond_selector(n); ta = value_of(a); - if (ta && (get_irn_mode(a) == mode_b)) { + if (ta && + (get_irn_mode(a) == mode_b) && + (get_opt_unreachable_code())) { /* It's a boolean Cond, branching on a boolean constant. Replace it by a tuple (Bad, Jmp) or (Jmp, Bad) */ jmp = new_r_Jmp(current_ir_graph, get_nodes_Block(n)); @@ -685,7 +692,10 @@ transform_node (ir_node *n) } /* We might generate an endless loop, so keep it alive. */ add_End_keepalive(get_irg_end(current_ir_graph), get_nodes_Block(n)); - } else if (ta && (get_irn_mode(a) == mode_I) && (get_Cond_kind(n) == dense)) { + } else if (ta && + (get_irn_mode(a) == mode_I) && + (get_Cond_kind(n) == dense) && + (get_opt_unreachable_code())) { /* I don't want to allow Tuples smaller than the biggest Proj. Also this tuple might get really big... I generate the Jmp here, and remember it in link. Link is used @@ -693,19 +703,19 @@ transform_node (ir_node *n) set_irn_link(n, new_r_Jmp(current_ir_graph, get_nodes_Block(n))); /* We might generate an endless loop, so keep it alive. */ add_End_keepalive(get_irg_end(current_ir_graph), get_nodes_Block(n)); - } else if ( (get_irn_op(get_Cond_selector(n)) == op_Eor) - && (get_irn_mode(get_Cond_selector(n)) == mode_b) - && (tarval_classify(computed_value(get_Eor_right(a))) == 1)) { + } else if ((get_irn_op(get_Cond_selector(n)) == op_Eor) + && (get_irn_mode(get_Cond_selector(n)) == mode_b) + && (tarval_classify(computed_value(get_Eor_right(a))) == 1)) { /* The Eor is a negate. Generate a new Cond without the negate, simulate the negate by exchanging the results. */ set_irn_link(n, new_r_Cond(current_ir_graph, get_nodes_Block(n), - get_Eor_left(a))); - } else if ( (get_irn_op(get_Cond_selector(n)) == op_Not) - && (get_irn_mode(get_Cond_selector(n)) == mode_b)) { + get_Eor_left(a))); + } else if ((get_irn_op(get_Cond_selector(n)) == op_Not) + && (get_irn_mode(get_Cond_selector(n)) == mode_b)) { /* A Not before the Cond. Generate a new Cond without the Not, simulate the Not by exchanging the results. */ set_irn_link(n, new_r_Cond(current_ir_graph, get_nodes_Block(n), - get_Not_op(a))); + get_Not_op(a))); } } break; @@ -713,26 +723,26 @@ transform_node (ir_node *n) case iro_Proj: { a = get_Proj_pred(n); - if ( (get_irn_op(a) == op_Cond) - && get_irn_link(a) - && get_irn_op(get_irn_link(a)) == op_Cond) { - /* Use the better Cond if the Proj projs from a Cond which get's - its result from an Eor/Not. */ - assert ( ( (get_irn_op(get_Cond_selector(a)) == op_Eor) - || (get_irn_op(get_Cond_selector(a)) == op_Not)) - && (get_irn_mode(get_Cond_selector(a)) == mode_b) - && (get_irn_op(get_irn_link(a)) == op_Cond) - && (get_Cond_selector(get_irn_link(a)) == - get_Eor_left(get_Cond_selector(a)))); + if ((get_irn_op(a) == op_Cond) + && get_irn_link(a) + && get_irn_op(get_irn_link(a)) == op_Cond) { + /* Use the better Cond if the Proj projs from a Cond which get's + its result from an Eor/Not. */ + assert (((get_irn_op(get_Cond_selector(a)) == op_Eor) + || (get_irn_op(get_Cond_selector(a)) == op_Not)) + && (get_irn_mode(get_Cond_selector(a)) == mode_b) + && (get_irn_op(get_irn_link(a)) == op_Cond) + && (get_Cond_selector(get_irn_link(a)) == get_Eor_left(get_Cond_selector(a)))); set_Proj_pred(n, get_irn_link(a)); if (get_Proj_proj(n) == 0) set_Proj_proj(n, 1); else set_Proj_proj(n, 0); - } else if ( (get_irn_op(a) == op_Cond) - && (get_irn_mode(get_Cond_selector(a)) == mode_I) - && value_of(a) - && (get_Cond_kind(a) == dense)) { + } else if ((get_irn_op(a) == op_Cond) + && (get_irn_mode(get_Cond_selector(a)) == mode_I) + && value_of(a) + && (get_Cond_kind(a) == dense) + && (get_opt_unreachable_code())) { /* The Cond is a Switch on a Constant */ if (get_Proj_proj(n) == tv_val_CHIL(value_of(a))) { /* The always taken branch, reuse the existing Jmp. */ @@ -741,12 +751,12 @@ transform_node (ir_node *n) assert(get_irn_op(get_irn_link(a)) == op_Jmp); n = get_irn_link(a); } else {/* Not taken control flow, but be careful with the default! */ - if (get_Proj_proj(n) < a->attr.c.default_proj){ - /* a never taken branch */ - n = new_Bad(); - } else { - a->attr.c.default_proj = get_Proj_proj(n); - } + if (get_Proj_proj(n) < a->attr.c.default_proj){ + /* a never taken branch */ + n = new_Bad(); + } else { + a->attr.c.default_proj = get_Proj_proj(n); + } } } } break; @@ -754,16 +764,16 @@ transform_node (ir_node *n) a = get_Eor_left(n); b = get_Eor_right(n); - if ( (get_irn_mode(n) == mode_b) - && (get_irn_op(a) == op_Proj) - && (get_irn_mode(a) == mode_b) - && (tarval_classify (computed_value (b)) == 1) - && (get_irn_op(get_Proj_pred(a)) == op_Cmp)) + if ((get_irn_mode(n) == mode_b) + && (get_irn_op(a) == op_Proj) + && (get_irn_mode(a) == mode_b) + && (tarval_classify (computed_value (b)) == 1) + && (get_irn_op(get_Proj_pred(a)) == op_Cmp)) /* The Eor negates a Cmp. The Cmp has the negated result anyways! */ n = new_r_Proj(current_ir_graph, get_nodes_Block(n), get_Proj_pred(a), mode_b, get_negated_pnc(get_Proj_proj(a))); - else if ( (get_irn_mode(n) == mode_b) - && (tarval_classify (computed_value (b)) == 1)) + else if ((get_irn_mode(n) == mode_b) + && (tarval_classify (computed_value (b)) == 1)) /* The Eor is a Not. Replace it by a Not. */ /* ????!!!Extend to bitfield 1111111. */ n = new_r_Not(current_ir_graph, get_nodes_Block(n), a, mode_b); @@ -773,9 +783,9 @@ transform_node (ir_node *n) a = get_Not_op(n); if ( (get_irn_mode(n) == mode_b) - && (get_irn_op(a) == op_Proj) - && (get_irn_mode(a) == mode_b) - && (get_irn_op(get_Proj_pred(a)) == op_Cmp)) + && (get_irn_op(a) == op_Proj) + && (get_irn_mode(a) == mode_b) + && (get_irn_op(get_Proj_pred(a)) == op_Cmp)) /* We negate a Cmp. The Cmp has the negated result anyways! */ n = new_r_Proj(current_ir_graph, get_nodes_Block(n), get_Proj_pred(a), mode_b, get_negated_pnc(get_Proj_proj(a))); @@ -1019,7 +1029,11 @@ optimize (ir_node *n) } /* remove unnecessary nodes */ - if (get_opt_constant_folding() || get_irn_op(n) == op_Phi) + if (get_opt_constant_folding() || + (get_irn_op(n) == op_Phi) || /* always optimize these nodes. */ + (get_irn_op(n) == op_Id) || + (get_irn_op(n) == op_Proj) || + (get_irn_op(n) == op_Block) ) /* Flags tested local. */ n = equivalent_node (n); /** common subexpression elimination **/ @@ -1029,34 +1043,31 @@ optimize (ir_node *n) subexpressions within a block. */ if (get_opt_cse()) n = identify_cons (current_ir_graph->value_table, n); - /* identify found a cse, so deallocate the old node. */ + if (n != old_n) { + /* We found an existing, better node, so we can deallocate the old node. */ obstack_free (current_ir_graph->obst, old_n); - /* The AmRoq fiasco returns n here. Martin's version doesn't. */ } /* Some more constant expression evaluation that does not allow to free the node. */ - if (get_opt_constant_folding()) + if (get_opt_constant_folding() || + (get_irn_op(n) == op_Cond) || + (get_irn_op(n) == op_Proj)) /* Flags tested local. */ n = transform_node (n); - /* Remove nodes with dead (Bad) input. */ - if (get_opt_unreachable_code()) - n = gigo (n); + /* Remove nodes with dead (Bad) input. + Run always for transformation induced Bads. */ + n = gigo (n); + /* Now we can verify the node, as it has no dead inputs any more. */ irn_vrfy(n); /* Now we have a legal, useful node. Enter it in hash table for cse */ - if (get_opt_cse()) { + if (get_opt_cse() && (get_irn_opcode(n) != iro_Block)) { n = identify_remember (current_ir_graph->value_table, n); } -#if 0 /* GL: what's the use of this?? */ - if ((current_ir_graph->state & irgs_building) && IR_KEEP_ALIVE (n)) { - assert (~current_ir_graph->state & irgs_keep_alives_in_arr); - pdeq_putr (current_ir_graph->keep.living, n); - } -#endif return n; } @@ -1070,10 +1081,11 @@ optimize_in_place_2 (ir_node *n) tarval *tv; ir_node *old_n = n; - if (!get_optimize()) return n; + if (!get_optimize() && (get_irn_op(n) != op_Phi)) return n; /* if not optimize return n */ if (n == NULL) { + assert(0); /* Here this is possible. Why? */ return n; } @@ -1096,7 +1108,11 @@ optimize_in_place_2 (ir_node *n) /* remove unnecessary nodes */ /*if (get_opt_constant_folding()) */ - if (get_opt_constant_folding() || get_irn_op(n) == op_Phi) + if (get_opt_constant_folding() || + (get_irn_op(n) == op_Phi) || /* always optimize these nodes. */ + (get_irn_op(n) == op_Id) || + (get_irn_op(n) == op_Proj) || + (get_irn_op(n) == op_Block) ) /* Flags tested local. */ n = equivalent_node (n); /** common subexpression elimination **/ @@ -1108,18 +1124,16 @@ optimize_in_place_2 (ir_node *n) n = identify (current_ir_graph->value_table, n); } - /* identify found a cse, so deallocate the old node. */ - if (n != old_n) { - /* The AmRoq fiasco returns n here. Martin's version doesn't. */ - } - /* Some more constant expression evaluation. */ - if (get_opt_constant_folding()) + if (get_opt_constant_folding() || + (get_irn_op(n) == op_Cond) || + (get_irn_op(n) == op_Proj)) /* Flags tested local. */ n = transform_node (n); - /* Remove nodes with dead (Bad) input. */ - if (get_opt_unreachable_code()) - n = gigo (n); + /* Remove nodes with dead (Bad) input. + Run always for transformation induced Bads. */ + n = gigo (n); + /* Now we can verify the node, as it has no dead inputs any more. */ irn_vrfy(n); diff --git a/ir/ir/iropt_t.h b/ir/ir/iropt_t.h index 2bb29b0a2..2c4b369aa 100644 --- a/ir/ir/iropt_t.h +++ b/ir/ir/iropt_t.h @@ -14,9 +14,14 @@ # include "pset.h" # include "iropt.h" +ir_node *equivalent_node (ir_node *n); + +/* For cse */ pset *new_identities (void); -void del_identities (pset *value_table); -void add_identity (pset *value_table, ir_node *node); +void del_identities (pset *value_table); +void add_identity (pset *value_table, ir_node *node); + ir_node *optimize_in_place_2 (ir_node *n); + # endif /* _IROPT_T_H_ */ -- 2.20.1