nsz Git - libfirm/blob - ir/ir/irgopt.c

   1 /* Copyright (C) 1998 - 2000 by Universitaet Karlsruhe
   2 ** All rights reserved.
   3 **
   4 ** Author: Christian Schaefer
   5 **
   6 ** Optimizations for a whole ir graph, i.e., a procedure.
   7 */
   8
   9 #ifdef HAVE_CONFIG_H
  10 # include <config.h>
  11 #endif
  12
  13 # include <assert.h>
  14
  15 # include "irgopt.h"
  16 # include "irnode_t.h"
  17 # include "irgraph_t.h"
  18 # include "iropt.h"
  19 # include "irgwalk.h"
  20 # include "ircons.h"
  21 # include "misc.h"
  22 # include "irgmod.h"
  23
  24 # include "pset.h"
  25
  26 /* @@@ for testing */
  27 #include "irdump.h"
  28
  29 /* Defined in iropt.c */
  30 pset *new_identities (void);
  31 void  del_identities (pset *value_table);
  32 void  add_identities   (pset *value_table, ir_node *node);
  33
  34 #if 0  /* Warum ist das hier nochmal definiert?
  35           Hat's nicht gelinkt wegen typeO tities - tity ?? */
  36 /* To fill the hash table */
  37 void
  38 add_identity (pset *value_table, ir_node *n) {
  39   /* identify_remember (value_table, n);*/
  40 }
  41 #endif
  42
  43 /********************************************************************/
  44 /* apply optimizations of iropt to all nodes.                       */
  45 /********************************************************************/
  46
  47 void init_link (ir_node *n, void *env) {
  48   set_irn_link(n, NULL);
  49 }
  50
  51 void
  52 optimize_in_place_wrapper (ir_node *n, void *env) {
  53   int i;
  54   ir_node *optimized;
  55
  56   /* optimize all sons after recursion, i.e., the sons' sons are
  57      optimized already. */
  58   for (i = -1; i < get_irn_arity(n); i++) {
  59     optimized = optimize_in_place(get_irn_n(n, i));
  60     set_irn_n(n, i, optimized);
  61   }
  62 }
  63
  64 void
  65 local_optimize_graph (ir_graph *irg) {
  66   ir_graph *rem = current_ir_graph;
  67   current_ir_graph = irg;
  68
  69   /* Should we clean the value_table in irg for the cse? Better do so... */
  70   del_identities(irg->value_table);
  71   irg->value_table = new_identities();
  72
  73   /* walk over the graph */
  74   irg_walk(irg->end, init_link, optimize_in_place_wrapper, NULL);
  75
  76   current_ir_graph = rem;
  77 }
  78
  79 /********************************************************************/
  80 /* Routines for dead node elimination / copying garbage collection  */
  81 /* of the obstack.                                                  */
  82 /********************************************************************/
  83
  84 /* Remeber the new node in the old node by using a field all nodes have. */
  85 inline void
  86 set_new_node (ir_node *old, ir_node *new)
  87 {
  88   old->link = new;
  89 }
  90
  91 /* Get this new node, before the old node is forgotton.*/
  92 inline ir_node *
  93 get_new_node (ir_node * n)
  94 {
  95   return n->link;
  96 }
  97
  98
  99 /* We use the block_visited flag to mark that we have computed the
 100    number of useful predecessors for this block.
 101    Further we encode the new arity in this flag in the old blocks.
 102    Remembering the arity is useful, as it saves a lot of pointer
 103    accesses.  This function is called for all Phi and Block nodes
 104    in a Block. */
 105 inline int
 106 compute_new_arity(ir_node *b) {
 107   int i, res;
 108   int irg_v, block_v;
 109
 110   irg_v = get_irg_block_visited(current_ir_graph);
 111   block_v = get_Block_block_visited(b);
 112   if (block_v >= irg_v) {
 113     /* we computed the number of preds for this block and saved it in the
 114        block_v flag */
 115     return block_v - irg_v;
 116   } else {
 117     /* compute the number of good predecessors */
 118     res = get_irn_arity(b);
 119     for (i = 0; i < get_irn_arity(b); i++)
 120       if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--;
 121     /* save it in the flag. */
 122     set_Block_block_visited(b, irg_v + res);
 123     return res;
 124   }
 125 }
 126
 127 /* Copies the node to the new obstack. The Ins of the new node point to
 128    the predecessors on the old obstack.  For block/phi nodes not all
 129    predecessors might be copied.  n->link points to the new node.
 130    For Phi and Block nodes the function allocates in-arrays with an arity
 131    only for useful predecessors.  The arity is determined by counting
 132    the non-bad predecessors of the block. */
 133 void
 134 copy_node (ir_node *n, void *env) {
 135   ir_node *nn, *block;
 136   int new_arity;
 137
 138   if (get_irn_opcode(n) == iro_Block) {
 139     block = NULL;
 140     new_arity = compute_new_arity(n);
 141   } else {
 142     block = get_nodes_Block(n);
 143     if (get_irn_opcode(n) == iro_Phi) {
 144       new_arity = compute_new_arity(block);
 145     } else {
 146       new_arity = get_irn_arity(n);
 147     }
 148   }
 149   nn = new_ir_node(current_ir_graph,
 150                    block,
 151                    get_irn_op(n),
 152                    get_irn_mode(n),
 153                    new_arity,
 154                    get_irn_in(n));
 155   /* Copy the attributes.  These might point to additional data.  If this
 156      was allocated on the old obstack the pointers now are dangling.  This
 157      frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 158   copy_attrs(n, nn);
 159   set_new_node(n, nn);
 160
 161   /*  printf("\n old node: "); DDMSG2(n);
 162       printf(" new node: "); DDMSG2(nn); */
 163
 164 }
 165
 166 /* Copies new predecessors of old node to new node remembered in link.
 167    Spare the Bad predecessors of Phi and Block nodes. */
 168 void
 169 copy_preds (ir_node *n, void *env) {
 170   ir_node *nn, *block, *on;
 171   int i, j;
 172
 173   nn = get_new_node(n);
 174
 175   /* printf("\n old node: "); DDMSG2(n);
 176      printf(" new node: "); DDMSG2(nn);
 177      printf(" arities: old: %d, new: %d\n", get_irn_arity(n), get_irn_arity(nn)); */
 178
 179   if (get_irn_opcode(n) == iro_Block) {
 180     /* Don't copy Bad nodes. */
 181     j = 0;
 182     for (i = 0; i < get_irn_arity(n); i++)
 183       if (get_irn_opcode(get_irn_n(n, i)) != iro_Bad) {
 184         set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
 185         j++;
 186       }
 187     /* repair the block visited flag from above misuse. Repair it in both
 188        graphs so that the old one can still be used. */
 189     set_Block_block_visited(nn, 0);
 190     set_Block_block_visited(n, 0);
 191     /* Local optimization could not merge two subsequent blocks if
 192        in array contained Bads.  Now it's possible.
 193        @@@ I removed the call to optimize_in_place as it requires
 194        that the fields in ir_graph are set properly. */
 195     if (get_Block_n_cfgpreds(nn) == 1
 196         && get_irn_op(get_Block_cfgpred(nn, 0)) == op_Jmp)
 197       exchange(nn, get_nodes_Block(get_Block_cfgpred(nn, 0)));
 198   } else if (get_irn_opcode(n) == iro_Phi) {
 199     /* Don't copy node if corresponding predecessor in block is Bad.
 200        The Block itself should not be Bad. */
 201     block = get_nodes_Block(n);
 202     set_irn_n (nn, -1, get_new_node(block));
 203     j = 0;
 204     for (i = 0; i < get_irn_arity(n); i++)
 205       if (get_irn_opcode(get_irn_n(block, i)) != iro_Bad) {
 206         set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
 207         j++;
 208       }
 209     /* If the pre walker reached this Phi after the post walker visited the
 210        block block_visited is > 0. */
 211     set_Block_block_visited(get_nodes_Block(n), 0);
 212     /* Compacting the Phi's ins might generate Phis with only one
 213        predecessor. */
 214     if (get_irn_arity(n) == 1)
 215       exchange(n, get_irn_n(n, 0));
 216   } else {
 217     for (i = -1; i < get_irn_arity(n); i++)
 218       set_irn_n (nn, i, get_new_node(get_irn_n(n, i)));
 219   }
 220   /* Now the new node is complete.  We can add it to the hash table for cse. */
 221   /* add_identity (current_ir_graph->value_table, nn); */
 222   add_identities (current_ir_graph->value_table, nn);
 223 }
 224
 225 /* Copies the graph reachable from current_ir_graph->end to the obstack
 226    in current_ir_graph.
 227    Then fixes the fields in current_ir_graph containing nodes of the
 228    graph.  */
 229 void
 230 copy_graph () {
 231   /* Not all nodes remembered in current_ir_graph might be reachable
 232      from the end node.  Assure their link is set to NULL, so that
 233      we can test whether new nodes have been computed. */
 234   set_irn_link(get_irg_frame  (current_ir_graph), NULL);
 235   set_irn_link(get_irg_globals(current_ir_graph), NULL);
 236   set_irn_link(get_irg_args   (current_ir_graph), NULL);
 237
 238   /* we use the block walk flag for removing Bads from Blocks ins. */
 239   inc_irg_block_visited(current_ir_graph);
 240
 241   /* copy the graph */
 242   irg_walk(get_irg_end(current_ir_graph), copy_node, copy_preds, NULL);
 243
 244   /* fix the fields in current_ir_graph */
 245   set_irg_end        (current_ir_graph, get_new_node(get_irg_end(current_ir_graph)));
 246   set_irg_end_block  (current_ir_graph, get_new_node(get_irg_end_block(current_ir_graph)));
 247   if (get_irn_link(get_irg_frame(current_ir_graph)) == NULL) {
 248     copy_node (get_irg_frame(current_ir_graph), NULL);
 249     copy_preds(get_irg_frame(current_ir_graph), NULL);
 250   }
 251   if (get_irn_link(get_irg_globals(current_ir_graph)) == NULL) {
 252     copy_node (get_irg_globals(current_ir_graph), NULL);
 253     copy_preds(get_irg_globals(current_ir_graph), NULL);
 254   }
 255   if (get_irn_link(get_irg_args(current_ir_graph)) == NULL) {
 256     copy_node (get_irg_args(current_ir_graph), NULL);
 257     copy_preds(get_irg_args(current_ir_graph), NULL);
 258   }
 259   set_irg_start  (current_ir_graph, get_new_node(get_irg_start(current_ir_graph)));
 260
 261   set_irg_start_block(current_ir_graph,
 262                       get_new_node(get_irg_start_block(current_ir_graph)));
 263   set_irg_frame  (current_ir_graph, get_new_node(get_irg_frame(current_ir_graph)));
 264   set_irg_globals(current_ir_graph, get_new_node(get_irg_globals(current_ir_graph)));
 265   set_irg_args   (current_ir_graph, get_new_node(get_irg_args(current_ir_graph)));
 266   if (get_irn_link(get_irg_bad(current_ir_graph)) == NULL) {
 267     copy_node(get_irg_bad(current_ir_graph), NULL);
 268     copy_preds(get_irg_bad(current_ir_graph), NULL);
 269   }
 270   set_irg_bad(current_ir_graph, get_new_node(get_irg_bad(current_ir_graph)));
 271 }
 272
 273 /* Copies all reachable nodes to a new obstack.  Removes bad inputs
 274    from block nodes and the corresponding inputs from Phi nodes.
 275    Merges single exit blocks with single entry blocks and removes
 276    1-input Phis.
 277    Adds all new nodes to a new hash table for cse.  Does not
 278    perform cse, so the hash table might contain common subexpressions. */
 279 /* Amroq call this emigrate() */
 280 void
 281 dead_node_elimination(ir_graph *irg) {
 282   ir_graph *rem;
 283   struct obstack *graveyard_obst = NULL;
 284   struct obstack *rebirth_obst   = NULL;
 285
 286   /* Remember external state of current_ir_graph. */
 287   rem = current_ir_graph;
 288   current_ir_graph = irg;
 289
 290   if (get_optimize() && get_opt_dead_node_elimination()) {
 291
 292     /* A quiet place, where the old obstack can rest in peace,
 293        until it will be cremated. */
 294     graveyard_obst = irg->obst;
 295
 296     /* A new obstack, where the reachable nodes will be copied to. */
 297     rebirth_obst = (struct obstack *) xmalloc (sizeof (struct obstack));
 298     current_ir_graph->obst = rebirth_obst;
 299     obstack_init (current_ir_graph->obst);
 300
 301     /* We also need a new hash table for cse */
 302     del_identities (irg->value_table);
 303     irg->value_table = new_identities ();
 304
 305     /* Copy the graph from the old to the new obstack */
 306     copy_graph();
 307
 308     /* Free memory from old unoptimized obstack */
 309     obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
 310     xfree (graveyard_obst);           /* ... then free it.           */
 311   }
 312
 313   current_ir_graph = rem;
 314 }
 315
 316 /**********************************************************************/
 317 /*  Funcionality for inlining                                         */
 318 /**********************************************************************/
 319
 320 void inline_method(ir_node *call, ir_graph *called_graph) {
 321   ir_node *pre_call;
 322   ir_node *post_call, *post_bl;
 323   ir_node *in[5];
 324   ir_node *end, *end_bl;
 325   ir_node **res_pred;
 326   ir_node **cf_pred;
 327   ir_node *ret, *phi;
 328   ir_node *cf_op, *bl;
 329   int arity, n_ret, n_exc, n_res, i, j;
 330
 331   if (!get_opt_inline()) return;
 332
 333   /** Check preconditions **/
 334   assert(get_irn_op(call) == op_Call);
 335   assert(get_Call_type(call) == get_entity_type(get_irg_ent(called_graph)));
 336   assert(get_type_tpop(get_Call_type(call)) == type_method);
 337   if (called_graph == current_ir_graph) return;
 338
 339   /** Part the Call node into two nodes.  Pre_call collects the parameters of
 340   the procedure and later replaces the Start node of the called graph.
 341   Post_call is the old Call node and collects the results of the called
 342   graph. Both will end up being a tuple.  **/
 343   post_bl = get_nodes_Block(call);
 344   set_irg_current_block(current_ir_graph, post_bl);
 345   /* XxMxPxP von Start + Parameter von Call */
 346   in[0] = new_Jmp();
 347   in[1] = get_Call_mem(call);
 348   in[2] = get_irg_frame(current_ir_graph);
 349   in[3] = get_irg_globals(current_ir_graph);
 350   in[4] = new_Tuple (get_Call_n_params(call),
 351                      get_Call_param_arr(call));
 352   pre_call = new_Tuple(5, in);
 353   post_call = call;
 354
 355   /** Part the block of the Call node into two blocks.
 356       The new block gets the ins of the old block, pre_call and all its
 357       predecessors and all Phi nodes. **/
 358   part_block(pre_call);
 359
 360   /** Prepare state for dead node elimination **/
 361   /* Visited flags in calling irg must be >= flag in called irg.
 362      Else walker and arity computation will not work. */
 363   if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
 364     set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);   /***/
 365   if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
 366     set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
 367   /* Set pre_call as new Start node in link field of the start node of
 368       calling graph and pre_calls block as new block for the start block
 369       of calling graph.
 370       Further mark these nodes so that they are not visited by the
 371       copying. */
 372   set_irn_link(get_irg_start(called_graph), pre_call);
 373   set_irn_visited(get_irg_start(called_graph),
 374                   get_irg_visited(current_ir_graph));/***/
 375   set_irn_link(get_irg_start_block(called_graph),
 376                get_nodes_Block(pre_call));
 377   set_irn_visited(get_irg_start_block(called_graph),
 378                   get_irg_visited(current_ir_graph));  /***/
 379
 380   /* Initialize for compaction of in arrays */
 381   inc_irg_block_visited(current_ir_graph);
 382   /*
 383   set_Block_block_visited(get_irg_start_block(called_graph),
 384                         get_irg_block_visited(current_ir_graph) +1 +1); /* count for self edge */
 385
 386   /*** Replicate local entities of the called_graph ***/
 387   /* @@@ */
 388
 389   /* visited is > than that of called graph.  With this trick visited will
 390      remain unchanged so that an outer walker calling this inline will
 391      not visit the inlined nodes. */
 392   set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
 393
 394   /** Performing dead node elimination inlines the graph **/
 395   /* Copies the nodes to the obstack of current_ir_graph. */
 396   /* @@@ endless loops are not copied!! */
 397   irg_walk(get_irg_end(called_graph), copy_node, copy_preds, NULL);
 398
 399   /* Repair called_graph */
 400   set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
 401   set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
 402   set_Block_block_visited(get_irg_start_block(called_graph), 0);
 403
 404   /*** Merge the end of the inlined procedure with the call site ***/
 405
 406   /** Precompute some values **/
 407   end_bl = get_new_node(get_irg_end_block(called_graph));
 408   end = get_new_node(get_irg_end(called_graph));
 409   arity = get_irn_arity(end_bl);    /* arity = n_exc + n_ret  */
 410   n_res = get_method_n_res(get_Call_type(call));
 411
 412   res_pred = (ir_node **) malloc ((n_res) * sizeof (ir_node *));
 413   cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
 414
 415   set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */
 416
 417
 418   /** Collect control flow from Return blocks to post_calls block. Replace
 419       Return nodes by Jump nodes. **/
 420   n_ret = 0;
 421   for (i = 0; i < arity; i++) {
 422     ir_node *ret;
 423     ret = get_irn_n(end_bl, i);
 424     if (get_irn_op(ret) == op_Return) {
 425       cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_Block(ret));
 426       n_ret++;
 427     }
 428   }
 429   set_irn_in(post_bl, n_ret, cf_pred);
 430
 431   /** Collect results from Return nodes to post_call. Post_call is
 432       turned into a tuple. **/
 433   turn_into_tuple(post_call, 4);
 434   /* First the Memory-Phi */
 435   n_ret = 0;
 436   for (i = 0; i < arity; i++) {
 437     ret = get_irn_n(end_bl, i);
 438     if (get_irn_op(ret) == op_Return) {
 439       cf_pred[n_ret] = get_Return_mem(ret);
 440       n_ret++;
 441     }
 442   }
 443   phi = new_Phi(n_ret, cf_pred, mode_M);
 444   set_Tuple_pred(call, 0, phi);
 445   set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
 446   set_irn_link(post_bl, phi);
 447   /* Now the real results */
 448   if (n_res > 0) {
 449     for (j = 0; j < n_res; j++) {
 450       n_ret = 0;
 451       for (i = 0; i < arity; i++) {
 452         ret = get_irn_n(end_bl, i);
 453         if (get_irn_op(ret) == op_Return) {
 454           cf_pred[n_ret] = get_Return_res(ret, j);
 455           n_ret++;
 456         }
 457       }
 458       phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
 459       res_pred[j] = phi;
 460       set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
 461       set_irn_link(post_bl, phi);
 462     }
 463     set_Tuple_pred(call, 2, new_Tuple(n_res, res_pred));
 464   } else {
 465     set_Tuple_pred(call, 2, new_Bad());
 466   }
 467   /* Finally the exception control flow.  We need to add a Phi node to
 468      collect the memory containing the exception objects.  Further we need
 469      to add another block to get a correct representation of this Phi.  To
 470      this block we add a Jmp that resolves into the X output of the Call
 471      when the Call is turned into a tuple. */
 472   n_exc = 0;
 473   for (i = 0; i < arity; i++) {
 474     ir_node *ret;
 475     ret = get_irn_n(end_bl, i);
 476     if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
 477       cf_pred[n_exc] = ret;
 478       n_exc++;
 479     }
 480   }
 481   if (n_exc > 0) {
 482     new_Block(n_exc, cf_pred);      /* whatch it: current_block is changed! */
 483     set_Tuple_pred(call, 1, new_Jmp());
 484     /* The Phi for the memories with the exception objects */
 485     n_exc = 0;
 486     for (i = 0; i < arity; i++) {
 487       ir_node *ret;
 488       ret = skip_Proj(get_irn_n(end_bl, i));
 489       if (get_irn_op(ret) == op_Call) {
 490         cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3);
 491         n_exc++;
 492       } else if (is_fragile_op(ret)) {
 493         /* We rely that all cfops have the memory output at the same position. */
 494         cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0);
 495         n_exc++;
 496       } else if (get_irn_op(ret) == op_Raise) {
 497         cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1);
 498         n_exc++;
 499       }
 500     }
 501     set_Tuple_pred(call, 3, new_Phi(n_exc, cf_pred, mode_M));
 502   } else {
 503     set_Tuple_pred(call, 1, new_Bad());
 504     set_Tuple_pred(call, 3, new_Bad());
 505   }
 506   free(res_pred);
 507   free(cf_pred);
 508
 509   /*** Correct the control flow to the end node.
 510        If the exception control flow from the Call directly branched to the
 511        end block we now have the following control flow predecessor pattern:
 512        ProjX -> Tuple -> Jmp.
 513        We must remove the Jmp along with it's empty block and add Jmp's
 514        predecessors as predecessors of this end block. ***/
 515   /* find the problematic predecessor of the end block. */
 516   end_bl = get_irg_end_block(current_ir_graph);
 517   for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) {
 518     cf_op = get_Block_cfgpred(end_bl, i);
 519     if (get_irn_op(cf_op) == op_Proj) {
 520       cf_op = get_Proj_pred(cf_op);
 521       if (get_irn_op(cf_op) == op_Tuple) {
 522         cf_op = get_Tuple_pred(cf_op, 1);
 523         assert(get_irn_op(cf_op) == op_Jmp);
 524         break;
 525       }
 526     }
 527   }
 528   /* repair */
 529   if (i < get_Block_n_cfgpreds(end_bl)) {
 530     bl = get_nodes_Block(cf_op);
 531     arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1;
 532     cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
 533     for (j = 0; j < i; j++)
 534       cf_pred[j] = get_Block_cfgpred(end_bl, j);
 535     for (j = j; j < i + get_Block_n_cfgpreds(bl); j++)
 536       cf_pred[j] = get_Block_cfgpred(bl, j-i);
 537     for (j = j; j < arity; j++)
 538       cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1);
 539     set_irn_in(end_bl, arity, cf_pred);
 540     free(cf_pred);
 541   }
 542 }