nsz Git - libfirm/blob - ir/ir/irgopt.c

   1 /* Copyright (C) 1998 - 2000 by Universitaet Karlsruhe
   2 ** All rights reserved.
   3 **
   4 ** Author: Christian Schaefer
   5 **
   6 ** Optimizations for a whole ir graph, i.e., a procedure.
   7 */
   8
   9 /* $Id$ */
  10
  11 #ifdef HAVE_CONFIG_H
  12 # include <config.h>
  13 #endif
  14
  15 # include <assert.h>
  16
  17 # include "irgopt.h"
  18 # include "irnode_t.h"
  19 # include "irgraph_t.h"
  20 # include "iropt.h"
  21 # include "irgwalk.h"
  22 # include "ircons.h"
  23 # include "misc.h"
  24 # include "irgmod.h"
  25
  26 # include "pset.h"
  27
  28 /* @@@ for testing */
  29 #include "irdump.h"
  30
  31 /* Defined in iropt.c */
  32 pset *new_identities (void);
  33 void  del_identities (pset *value_table);
  34 void  add_identities   (pset *value_table, ir_node *node);
  35
  36 #if 0  /* Warum ist das hier nochmal definiert?
  37           Hat's nicht gelinkt wegen typeO tities - tity ?? */
  38 /* To fill the hash table */
  39 void
  40 add_identity (pset *value_table, ir_node *n) {
  41   /* identify_remember (value_table, n);*/
  42 }
  43 #endif
  44
  45 /********************************************************************/
  46 /* apply optimizations of iropt to all nodes.                       */
  47 /********************************************************************/
  48
  49 void init_link (ir_node *n, void *env) {
  50   set_irn_link(n, NULL);
  51 }
  52
  53 void
  54 optimize_in_place_wrapper (ir_node *n, void *env) {
  55   int i;
  56   ir_node *optimized;
  57
  58   /* optimize all sons after recursion, i.e., the sons' sons are
  59      optimized already. */
  60   for (i = -1; i < get_irn_arity(n); i++) {
  61     optimized = optimize_in_place(get_irn_n(n, i));
  62     set_irn_n(n, i, optimized);
  63   }
  64 }
  65
  66 void
  67 local_optimize_graph (ir_graph *irg) {
  68   ir_graph *rem = current_ir_graph;
  69   current_ir_graph = irg;
  70
  71   /* Should we clean the value_table in irg for the cse? Better do so... */
  72   del_identities(irg->value_table);
  73   irg->value_table = new_identities();
  74
  75   /* walk over the graph */
  76   irg_walk(irg->end, init_link, optimize_in_place_wrapper, NULL);
  77
  78   current_ir_graph = rem;
  79 }
  80
  81 /********************************************************************/
  82 /* Routines for dead node elimination / copying garbage collection  */
  83 /* of the obstack.                                                  */
  84 /********************************************************************/
  85
  86 /* Remeber the new node in the old node by using a field all nodes have. */
  87 inline void
  88 set_new_node (ir_node *old, ir_node *new)
  89 {
  90   old->link = new;
  91 }
  92
  93 /* Get this new node, before the old node is forgotton.*/
  94 inline ir_node *
  95 get_new_node (ir_node * n)
  96 {
  97   return n->link;
  98 }
  99
 100
 101 /* We use the block_visited flag to mark that we have computed the
 102    number of useful predecessors for this block.
 103    Further we encode the new arity in this flag in the old blocks.
 104    Remembering the arity is useful, as it saves a lot of pointer
 105    accesses.  This function is called for all Phi and Block nodes
 106    in a Block. */
 107 inline int
 108 compute_new_arity(ir_node *b) {
 109   int i, res;
 110   int irg_v, block_v;
 111
 112   irg_v = get_irg_block_visited(current_ir_graph);
 113   block_v = get_Block_block_visited(b);
 114   if (block_v >= irg_v) {
 115     /* we computed the number of preds for this block and saved it in the
 116        block_v flag */
 117     return block_v - irg_v;
 118   } else {
 119     /* compute the number of good predecessors */
 120     res = get_irn_arity(b);
 121     for (i = 0; i < get_irn_arity(b); i++)
 122       if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--;
 123     /* save it in the flag. */
 124     set_Block_block_visited(b, irg_v + res);
 125     return res;
 126   }
 127 }
 128
 129 /* Copies the node to the new obstack. The Ins of the new node point to
 130    the predecessors on the old obstack.  For block/phi nodes not all
 131    predecessors might be copied.  n->link points to the new node.
 132    For Phi and Block nodes the function allocates in-arrays with an arity
 133    only for useful predecessors.  The arity is determined by counting
 134    the non-bad predecessors of the block. */
 135 void
 136 copy_node (ir_node *n, void *env) {
 137   ir_node *nn, *block;
 138   int new_arity;
 139
 140   if (get_irn_opcode(n) == iro_Block) {
 141     block = NULL;
 142     new_arity = compute_new_arity(n);
 143   } else {
 144     block = get_nodes_Block(n);
 145     if (get_irn_opcode(n) == iro_Phi) {
 146       new_arity = compute_new_arity(block);
 147     } else {
 148       new_arity = get_irn_arity(n);
 149     }
 150   }
 151   nn = new_ir_node(current_ir_graph,
 152                    block,
 153                    get_irn_op(n),
 154                    get_irn_mode(n),
 155                    new_arity,
 156                    get_irn_in(n));
 157   /* Copy the attributes.  These might point to additional data.  If this
 158      was allocated on the old obstack the pointers now are dangling.  This
 159      frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 160   copy_attrs(n, nn);
 161   set_new_node(n, nn);
 162
 163   /*  printf("\n old node: "); DDMSG2(n);
 164       printf(" new node: "); DDMSG2(nn); */
 165
 166 }
 167
 168 /* Copies new predecessors of old node to new node remembered in link.
 169    Spare the Bad predecessors of Phi and Block nodes. */
 170 void
 171 copy_preds (ir_node *n, void *env) {
 172   ir_node *nn, *block, *on;
 173   int i, j;
 174
 175   nn = get_new_node(n);
 176
 177   /* printf("\n old node: "); DDMSG2(n);
 178      printf(" new node: "); DDMSG2(nn);
 179      printf(" arities: old: %d, new: %d\n", get_irn_arity(n), get_irn_arity(nn)); */
 180
 181   if (get_irn_opcode(n) == iro_Block) {
 182     /* Don't copy Bad nodes. */
 183     j = 0;
 184     for (i = 0; i < get_irn_arity(n); i++)
 185       if (get_irn_opcode(get_irn_n(n, i)) != iro_Bad) {
 186         set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
 187         j++;
 188       }
 189     /* repair the block visited flag from above misuse. Repair it in both
 190        graphs so that the old one can still be used. */
 191     set_Block_block_visited(nn, 0);
 192     set_Block_block_visited(n, 0);
 193     /* Local optimization could not merge two subsequent blocks if
 194        in array contained Bads.  Now it's possible.
 195        @@@ I removed the call to optimize_in_place as it requires
 196        that the fields in ir_graph are set properly. */
 197     if (get_Block_n_cfgpreds(nn) == 1
 198         && get_irn_op(get_Block_cfgpred(nn, 0)) == op_Jmp)
 199       exchange(nn, get_nodes_Block(get_Block_cfgpred(nn, 0)));
 200   } else if (get_irn_opcode(n) == iro_Phi) {
 201     /* Don't copy node if corresponding predecessor in block is Bad.
 202        The Block itself should not be Bad. */
 203     block = get_nodes_Block(n);
 204     set_irn_n (nn, -1, get_new_node(block));
 205     j = 0;
 206     for (i = 0; i < get_irn_arity(n); i++)
 207       if (get_irn_opcode(get_irn_n(block, i)) != iro_Bad) {
 208         set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
 209         j++;
 210       }
 211     /* If the pre walker reached this Phi after the post walker visited the
 212        block block_visited is > 0. */
 213     set_Block_block_visited(get_nodes_Block(n), 0);
 214     /* Compacting the Phi's ins might generate Phis with only one
 215        predecessor. */
 216     if (get_irn_arity(n) == 1)
 217       exchange(n, get_irn_n(n, 0));
 218   } else {
 219     for (i = -1; i < get_irn_arity(n); i++)
 220       set_irn_n (nn, i, get_new_node(get_irn_n(n, i)));
 221   }
 222   /* Now the new node is complete.  We can add it to the hash table for cse. */
 223   /* add_identity (current_ir_graph->value_table, nn); */
 224   add_identities (current_ir_graph->value_table, nn);
 225 }
 226
 227 /* Copies the graph reachable from current_ir_graph->end to the obstack
 228    in current_ir_graph.
 229    Then fixes the fields in current_ir_graph containing nodes of the
 230    graph.  */
 231 void
 232 copy_graph () {
 233   /* Not all nodes remembered in current_ir_graph might be reachable
 234      from the end node.  Assure their link is set to NULL, so that
 235      we can test whether new nodes have been computed. */
 236   set_irn_link(get_irg_frame  (current_ir_graph), NULL);
 237   set_irn_link(get_irg_globals(current_ir_graph), NULL);
 238   set_irn_link(get_irg_args   (current_ir_graph), NULL);
 239
 240   /* we use the block walk flag for removing Bads from Blocks ins. */
 241   inc_irg_block_visited(current_ir_graph);
 242
 243   /* copy the graph */
 244   irg_walk(get_irg_end(current_ir_graph), copy_node, copy_preds, NULL);
 245
 246   /* fix the fields in current_ir_graph */
 247   set_irg_end        (current_ir_graph, get_new_node(get_irg_end(current_ir_graph)));
 248   set_irg_end_block  (current_ir_graph, get_new_node(get_irg_end_block(current_ir_graph)));
 249   if (get_irn_link(get_irg_frame(current_ir_graph)) == NULL) {
 250     copy_node (get_irg_frame(current_ir_graph), NULL);
 251     copy_preds(get_irg_frame(current_ir_graph), NULL);
 252   }
 253   if (get_irn_link(get_irg_globals(current_ir_graph)) == NULL) {
 254     copy_node (get_irg_globals(current_ir_graph), NULL);
 255     copy_preds(get_irg_globals(current_ir_graph), NULL);
 256   }
 257   if (get_irn_link(get_irg_args(current_ir_graph)) == NULL) {
 258     copy_node (get_irg_args(current_ir_graph), NULL);
 259     copy_preds(get_irg_args(current_ir_graph), NULL);
 260   }
 261   set_irg_start  (current_ir_graph, get_new_node(get_irg_start(current_ir_graph)));
 262
 263   set_irg_start_block(current_ir_graph,
 264                       get_new_node(get_irg_start_block(current_ir_graph)));
 265   set_irg_frame  (current_ir_graph, get_new_node(get_irg_frame(current_ir_graph)));
 266   set_irg_globals(current_ir_graph, get_new_node(get_irg_globals(current_ir_graph)));
 267   set_irg_args   (current_ir_graph, get_new_node(get_irg_args(current_ir_graph)));
 268   if (get_irn_link(get_irg_bad(current_ir_graph)) == NULL) {
 269     copy_node(get_irg_bad(current_ir_graph), NULL);
 270     copy_preds(get_irg_bad(current_ir_graph), NULL);
 271   }
 272   set_irg_bad(current_ir_graph, get_new_node(get_irg_bad(current_ir_graph)));
 273 }
 274
 275 /* Copies all reachable nodes to a new obstack.  Removes bad inputs
 276    from block nodes and the corresponding inputs from Phi nodes.
 277    Merges single exit blocks with single entry blocks and removes
 278    1-input Phis.
 279    Adds all new nodes to a new hash table for cse.  Does not
 280    perform cse, so the hash table might contain common subexpressions. */
 281 /* Amroq call this emigrate() */
 282 void
 283 dead_node_elimination(ir_graph *irg) {
 284   ir_graph *rem;
 285   struct obstack *graveyard_obst = NULL;
 286   struct obstack *rebirth_obst   = NULL;
 287
 288   /* Remember external state of current_ir_graph. */
 289   rem = current_ir_graph;
 290   current_ir_graph = irg;
 291
 292   if (get_optimize() && get_opt_dead_node_elimination()) {
 293
 294     /* A quiet place, where the old obstack can rest in peace,
 295        until it will be cremated. */
 296     graveyard_obst = irg->obst;
 297
 298     /* A new obstack, where the reachable nodes will be copied to. */
 299     rebirth_obst = (struct obstack *) xmalloc (sizeof (struct obstack));
 300     current_ir_graph->obst = rebirth_obst;
 301     obstack_init (current_ir_graph->obst);
 302
 303     /* We also need a new hash table for cse */
 304     del_identities (irg->value_table);
 305     irg->value_table = new_identities ();
 306
 307     /* Copy the graph from the old to the new obstack */
 308     copy_graph();
 309
 310     /* Free memory from old unoptimized obstack */
 311     obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
 312     xfree (graveyard_obst);           /* ... then free it.           */
 313   }
 314
 315   current_ir_graph = rem;
 316 }
 317
 318 /**********************************************************************/
 319 /*  Funcionality for inlining                                         */
 320 /**********************************************************************/
 321
 322 void inline_method(ir_node *call, ir_graph *called_graph) {
 323   ir_node *pre_call;
 324   ir_node *post_call, *post_bl;
 325   ir_node *in[5];
 326   ir_node *end, *end_bl;
 327   ir_node **res_pred;
 328   ir_node **cf_pred;
 329   ir_node *ret, *phi;
 330   ir_node *cf_op, *bl;
 331   int arity, n_ret, n_exc, n_res, i, j;
 332
 333   if (!get_opt_inline()) return;
 334
 335   /** Check preconditions **/
 336   assert(get_irn_op(call) == op_Call);
 337   assert(get_Call_type(call) == get_entity_type(get_irg_ent(called_graph)));
 338   assert(get_type_tpop(get_Call_type(call)) == type_method);
 339   if (called_graph == current_ir_graph) return;
 340
 341   /** Part the Call node into two nodes.  Pre_call collects the parameters of
 342   the procedure and later replaces the Start node of the called graph.
 343   Post_call is the old Call node and collects the results of the called
 344   graph. Both will end up being a tuple.  **/
 345   post_bl = get_nodes_Block(call);
 346   set_irg_current_block(current_ir_graph, post_bl);
 347   /* XxMxPxP von Start + Parameter von Call */
 348   in[0] = new_Jmp();
 349   in[1] = get_Call_mem(call);
 350   in[2] = get_irg_frame(current_ir_graph);
 351   in[3] = get_irg_globals(current_ir_graph);
 352   in[4] = new_Tuple (get_Call_n_params(call),
 353                      get_Call_param_arr(call));
 354   pre_call = new_Tuple(5, in);
 355   post_call = call;
 356
 357   /** Part the block of the Call node into two blocks.
 358       The new block gets the ins of the old block, pre_call and all its
 359       predecessors and all Phi nodes. **/
 360   part_block(pre_call);
 361
 362   /** Prepare state for dead node elimination **/
 363   /* Visited flags in calling irg must be >= flag in called irg.
 364      Else walker and arity computation will not work. */
 365   if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
 366     set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);   /***/
 367   if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
 368     set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
 369   /* Set pre_call as new Start node in link field of the start node of
 370       calling graph and pre_calls block as new block for the start block
 371       of calling graph.
 372       Further mark these nodes so that they are not visited by the
 373       copying. */
 374   set_irn_link(get_irg_start(called_graph), pre_call);
 375   set_irn_visited(get_irg_start(called_graph),
 376                   get_irg_visited(current_ir_graph));/***/
 377   set_irn_link(get_irg_start_block(called_graph),
 378                get_nodes_Block(pre_call));
 379   set_irn_visited(get_irg_start_block(called_graph),
 380                   get_irg_visited(current_ir_graph));  /***/
 381
 382   /* Initialize for compaction of in arrays */
 383   inc_irg_block_visited(current_ir_graph);
 384   /*
 385   set_Block_block_visited(get_irg_start_block(called_graph),
 386                         get_irg_block_visited(current_ir_graph) +1 +1); /* count for self edge */
 387
 388   /*** Replicate local entities of the called_graph ***/
 389   /* @@@ */
 390
 391   /* visited is > than that of called graph.  With this trick visited will
 392      remain unchanged so that an outer walker calling this inline will
 393      not visit the inlined nodes. */
 394   set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
 395
 396   /** Performing dead node elimination inlines the graph **/
 397   /* Copies the nodes to the obstack of current_ir_graph. */
 398   /* @@@ endless loops are not copied!! */
 399   irg_walk(get_irg_end(called_graph), copy_node, copy_preds, NULL);
 400
 401   /* Repair called_graph */
 402   set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
 403   set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
 404   set_Block_block_visited(get_irg_start_block(called_graph), 0);
 405
 406   /*** Merge the end of the inlined procedure with the call site ***/
 407
 408   /** Precompute some values **/
 409   end_bl = get_new_node(get_irg_end_block(called_graph));
 410   end = get_new_node(get_irg_end(called_graph));
 411   arity = get_irn_arity(end_bl);    /* arity = n_exc + n_ret  */
 412   n_res = get_method_n_res(get_Call_type(call));
 413
 414   res_pred = (ir_node **) malloc ((n_res) * sizeof (ir_node *));
 415   cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
 416
 417   set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */
 418
 419
 420   /** Collect control flow from Return blocks to post_calls block. Replace
 421       Return nodes by Jump nodes. **/
 422   n_ret = 0;
 423   for (i = 0; i < arity; i++) {
 424     ir_node *ret;
 425     ret = get_irn_n(end_bl, i);
 426     if (get_irn_op(ret) == op_Return) {
 427       cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_Block(ret));
 428       n_ret++;
 429     }
 430   }
 431   set_irn_in(post_bl, n_ret, cf_pred);
 432
 433   /** Collect results from Return nodes to post_call. Post_call is
 434       turned into a tuple. **/
 435   turn_into_tuple(post_call, 4);
 436   /* First the Memory-Phi */
 437   n_ret = 0;
 438   for (i = 0; i < arity; i++) {
 439     ret = get_irn_n(end_bl, i);
 440     if (get_irn_op(ret) == op_Return) {
 441       cf_pred[n_ret] = get_Return_mem(ret);
 442       n_ret++;
 443     }
 444   }
 445   phi = new_Phi(n_ret, cf_pred, mode_M);
 446   set_Tuple_pred(call, 0, phi);
 447   set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
 448   set_irn_link(post_bl, phi);
 449   /* Now the real results */
 450   if (n_res > 0) {
 451     for (j = 0; j < n_res; j++) {
 452       n_ret = 0;
 453       for (i = 0; i < arity; i++) {
 454         ret = get_irn_n(end_bl, i);
 455         if (get_irn_op(ret) == op_Return) {
 456           cf_pred[n_ret] = get_Return_res(ret, j);
 457           n_ret++;
 458         }
 459       }
 460       phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
 461       res_pred[j] = phi;
 462       set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
 463       set_irn_link(post_bl, phi);
 464     }
 465     set_Tuple_pred(call, 2, new_Tuple(n_res, res_pred));
 466   } else {
 467     set_Tuple_pred(call, 2, new_Bad());
 468   }
 469   /* Finally the exception control flow.  We need to add a Phi node to
 470      collect the memory containing the exception objects.  Further we need
 471      to add another block to get a correct representation of this Phi.  To
 472      this block we add a Jmp that resolves into the X output of the Call
 473      when the Call is turned into a tuple. */
 474   n_exc = 0;
 475   for (i = 0; i < arity; i++) {
 476     ir_node *ret;
 477     ret = get_irn_n(end_bl, i);
 478     if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
 479       cf_pred[n_exc] = ret;
 480       n_exc++;
 481     }
 482   }
 483   if (n_exc > 0) {
 484     new_Block(n_exc, cf_pred);      /* whatch it: current_block is changed! */
 485     set_Tuple_pred(call, 1, new_Jmp());
 486     /* The Phi for the memories with the exception objects */
 487     n_exc = 0;
 488     for (i = 0; i < arity; i++) {
 489       ir_node *ret;
 490       ret = skip_Proj(get_irn_n(end_bl, i));
 491       if (get_irn_op(ret) == op_Call) {
 492         cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3);
 493         n_exc++;
 494       } else if (is_fragile_op(ret)) {
 495         /* We rely that all cfops have the memory output at the same position. */
 496         cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0);
 497         n_exc++;
 498       } else if (get_irn_op(ret) == op_Raise) {
 499         cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1);
 500         n_exc++;
 501       }
 502     }
 503     set_Tuple_pred(call, 3, new_Phi(n_exc, cf_pred, mode_M));
 504   } else {
 505     set_Tuple_pred(call, 1, new_Bad());
 506     set_Tuple_pred(call, 3, new_Bad());
 507   }
 508   free(res_pred);
 509   free(cf_pred);
 510
 511   /*** Correct the control flow to the end node.
 512        If the exception control flow from the Call directly branched to the
 513        end block we now have the following control flow predecessor pattern:
 514        ProjX -> Tuple -> Jmp.
 515        We must remove the Jmp along with it's empty block and add Jmp's
 516        predecessors as predecessors of this end block. ***/
 517   /* find the problematic predecessor of the end block. */
 518   end_bl = get_irg_end_block(current_ir_graph);
 519   for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) {
 520     cf_op = get_Block_cfgpred(end_bl, i);
 521     if (get_irn_op(cf_op) == op_Proj) {
 522       cf_op = get_Proj_pred(cf_op);
 523       if (get_irn_op(cf_op) == op_Tuple) {
 524         cf_op = get_Tuple_pred(cf_op, 1);
 525         assert(get_irn_op(cf_op) == op_Jmp);
 526         break;
 527       }
 528     }
 529   }
 530   /* repair */
 531   if (i < get_Block_n_cfgpreds(end_bl)) {
 532     bl = get_nodes_Block(cf_op);
 533     arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1;
 534     cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
 535     for (j = 0; j < i; j++)
 536       cf_pred[j] = get_Block_cfgpred(end_bl, j);
 537     for (j = j; j < i + get_Block_n_cfgpreds(bl); j++)
 538       cf_pred[j] = get_Block_cfgpred(bl, j-i);
 539     for (j = j; j < arity; j++)
 540       cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1);
 541     set_irn_in(end_bl, arity, cf_pred);
 542     free(cf_pred);
 543   }
 544 }