nsz Git - libfirm/blob - ir/ir/irgopt.c

   1 /* Coyright (C) 1998 - 2000 by Universitaet Karlsruhe
   2 ** All rights reserved.
   3 **
   4 ** Author: Christian Schaefer
   5 **
   6 ** Optimizations for a whole ir graph, i.e., a procedure.
   7 */
   8
   9 /* $Id$ */
  10
  11 #ifdef HAVE_CONFIG_H
  12 # include <config.h>
  13 #endif
  14
  15 # include <assert.h>
  16
  17 # include "irprog.h"
  18 # include "irgopt.h"
  19 # include "irnode_t.h"
  20 # include "irgraph_t.h"
  21 # include "iropt_t.h"
  22 # include "irgwalk.h"
  23 # include "ircons.h"
  24 # include "misc.h"
  25 # include "irgmod.h"
  26 # include "array.h"
  27 # include "pset.h"
  28 # include "pdeq.h"  /* provisorisch fuer code placement */
  29 # include "irouts.h"
  30
  31 /* Defined in iropt.c */
  32 pset *new_identities (void);
  33 void  del_identities (pset *value_table);
  34 void  add_identities   (pset *value_table, ir_node *node);
  35
  36 #if 0  /* Warum ist das hier nochmal definiert?
  37           Hat's nicht gelinkt wegen typeO tities - tity ?? */
  38 /* To fill the hash table */
  39 void
  40 add_identity (pset *value_table, ir_node *n) {
  41   /* identify_remember (value_table, n);*/
  42 }
  43 #endif
  44
  45 /********************************************************************/
  46 /* apply optimizations of iropt to all nodes.                       */
  47 /********************************************************************/
  48
  49 void init_link (ir_node *n, void *env) {
  50   set_irn_link(n, NULL);
  51 }
  52
  53 void
  54 optimize_in_place_wrapper (ir_node *n, void *env) {
  55   int start, i;
  56   ir_node *optimized;
  57
  58   if (get_irn_op(n) == op_Block)
  59     start = 0;
  60   else
  61     start = -1;
  62
  63   /* optimize all sons after recursion, i.e., the sons' sons are
  64      optimized already. */
  65   for (i = start; i < get_irn_arity(n); i++) {
  66     optimized = optimize_in_place_2(get_irn_n(n, i));
  67     set_irn_n(n, i, optimized);
  68     assert(get_irn_op(optimized) != op_Id);
  69   }
  70 }
  71
  72 void
  73 local_optimize_graph (ir_graph *irg) {
  74   ir_graph *rem = current_ir_graph;
  75   current_ir_graph = irg;
  76
  77   /* Handle graph state */
  78   assert(get_irg_phase_state(irg) != phase_building);
  79   if (get_opt_global_cse())
  80     set_irg_pinned(current_ir_graph, floats);
  81   if (get_irg_outs_state(current_ir_graph) == outs_consistent)
  82     set_irg_outs_inconsistent(current_ir_graph);
  83
  84   /* Clean the value_table in irg for the cse. */
  85   del_identities(irg->value_table);
  86   irg->value_table = new_identities();
  87
  88   /* walk over the graph */
  89   irg_walk(irg->end, init_link, optimize_in_place_wrapper, NULL);
  90
  91   current_ir_graph = rem;
  92 }
  93
  94 /********************************************************************/
  95 /* Routines for dead node elimination / copying garbage collection  */
  96 /* of the obstack.                                                  */
  97 /********************************************************************/
  98
  99 /* Remeber the new node in the old node by using a field all nodes have. */
 100 inline void
 101 set_new_node (ir_node *old, ir_node *new)
 102 {
 103   old->link = new;
 104 }
 105
 106 /* Get this new node, before the old node is forgotton.*/
 107 inline ir_node *
 108 get_new_node (ir_node * n)
 109 {
 110   return n->link;
 111 }
 112
 113 /* We use the block_visited flag to mark that we have computed the
 114    number of useful predecessors for this block.
 115    Further we encode the new arity in this flag in the old blocks.
 116    Remembering the arity is useful, as it saves a lot of pointer
 117    accesses.  This function is called for all Phi and Block nodes
 118    in a Block. */
 119 inline int
 120 compute_new_arity(ir_node *b) {
 121   int i, res;
 122   int irg_v, block_v;
 123
 124   irg_v = get_irg_block_visited(current_ir_graph);
 125   block_v = get_Block_block_visited(b);
 126   if (block_v >= irg_v) {
 127     /* we computed the number of preds for this block and saved it in the
 128        block_v flag */
 129     return block_v - irg_v;
 130   } else {
 131     /* compute the number of good predecessors */
 132     res = get_irn_arity(b);
 133     for (i = 0; i < get_irn_arity(b); i++)
 134       if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--;
 135     /* save it in the flag. */
 136     set_Block_block_visited(b, irg_v + res);
 137     return res;
 138   }
 139 }
 140
 141 /* Copies the node to the new obstack. The Ins of the new node point to
 142    the predecessors on the old obstack.  For block/phi nodes not all
 143    predecessors might be copied.  n->link points to the new node.
 144    For Phi and Block nodes the function allocates in-arrays with an arity
 145    only for useful predecessors.  The arity is determined by counting
 146    the non-bad predecessors of the block. */
 147 void
 148 copy_node (ir_node *n, void *env) {
 149   ir_node *nn, *block;
 150   int new_arity;
 151
 152   if (get_irn_opcode(n) == iro_Block) {
 153     block = NULL;
 154     new_arity = compute_new_arity(n);
 155   } else {
 156     block = get_nodes_Block(n);
 157     if (get_irn_opcode(n) == iro_Phi) {
 158       new_arity = compute_new_arity(block);
 159     } else {
 160       new_arity = get_irn_arity(n);
 161     }
 162   }
 163   nn = new_ir_node(current_ir_graph,
 164                    block,
 165                    get_irn_op(n),
 166                    get_irn_mode(n),
 167                    new_arity,
 168                    get_irn_in(n));
 169   /* Copy the attributes.  These might point to additional data.  If this
 170      was allocated on the old obstack the pointers now are dangling.  This
 171      frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 172   copy_attrs(n, nn);
 173   set_new_node(n, nn);
 174
 175   /*  printf("\n old node: "); DDMSG2(n);
 176       printf(" new node: "); DDMSG2(nn); */
 177
 178 }
 179
 180 /* Copies new predecessors of old node to new node remembered in link.
 181    Spare the Bad predecessors of Phi and Block nodes. */
 182 void
 183 copy_preds (ir_node *n, void *env) {
 184   ir_node *nn, *block, *on;
 185   int i, j;
 186
 187   nn = get_new_node(n);
 188
 189   /* printf("\n old node: "); DDMSG2(n);
 190      printf(" new node: "); DDMSG2(nn);
 191      printf(" arities: old: %d, new: %d\n", get_irn_arity(n), get_irn_arity(nn)); */
 192
 193   if (get_irn_opcode(n) == iro_Block) {
 194     /* Don't copy Bad nodes. */
 195     j = 0;
 196     for (i = 0; i < get_irn_arity(n); i++)
 197       if (get_irn_opcode(get_irn_n(n, i)) != iro_Bad) {
 198         set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
 199         j++;
 200       }
 201     /* repair the block visited flag from above misuse. Repair it in both
 202        graphs so that the old one can still be used. */
 203     set_Block_block_visited(nn, 0);
 204     set_Block_block_visited(n, 0);
 205     /* Local optimization could not merge two subsequent blocks if
 206        in array contained Bads.  Now it's possible.
 207        We don't call optimize_in_place as it requires
 208        that the fields in ir_graph are set properly. */
 209     if (get_Block_n_cfgpreds(nn) == 1
 210         && get_irn_op(get_Block_cfgpred(nn, 0)) == op_Jmp)
 211       exchange(nn, get_nodes_Block(get_Block_cfgpred(nn, 0)));
 212   } else if (get_irn_opcode(n) == iro_Phi) {
 213     /* Don't copy node if corresponding predecessor in block is Bad.
 214        The Block itself should not be Bad. */
 215     block = get_nodes_Block(n);
 216     set_irn_n (nn, -1, get_new_node(block));
 217     j = 0;
 218     for (i = 0; i < get_irn_arity(n); i++)
 219       if (get_irn_opcode(get_irn_n(block, i)) != iro_Bad) {
 220         set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
 221         j++;
 222       }
 223     /* If the pre walker reached this Phi after the post walker visited the
 224        block block_visited is > 0. */
 225     set_Block_block_visited(get_nodes_Block(n), 0);
 226     /* Compacting the Phi's ins might generate Phis with only one
 227        predecessor. */
 228     if (get_irn_arity(n) == 1)
 229       exchange(n, get_irn_n(n, 0));
 230   } else {
 231     for (i = -1; i < get_irn_arity(n); i++)
 232       set_irn_n (nn, i, get_new_node(get_irn_n(n, i)));
 233   }
 234   /* Now the new node is complete.  We can add it to the hash table for cse.
 235      @@@ inlinening aborts if we identify End. Why? */
 236   if(get_irn_op(nn) != op_End)
 237     add_identities (current_ir_graph->value_table, nn);
 238 }
 239
 240 /* Copies the graph recursively, compacts the keepalive of the end node. */
 241 void
 242 copy_graph () {
 243   ir_node *oe, *ne; /* old end, new end */
 244   ir_node *ka; /* keep alive */
 245   int i;
 246
 247   oe = get_irg_end(current_ir_graph);
 248   /* copy the end node by hand, allocate dynamic in array! */
 249   ne = new_ir_node(current_ir_graph,
 250                    NULL,
 251                    op_End,
 252                    mode_X,
 253                    -1,
 254                    NULL);
 255   /* Copy the attributes.  Well, there might be some in the future... */
 256   copy_attrs(oe, ne);
 257   set_new_node(oe, ne);
 258
 259   /* copy the live nodes */
 260   irg_walk(get_nodes_Block(oe), copy_node, copy_preds, NULL);
 261   /* copy_preds for the end node ... */
 262   set_nodes_Block(ne, get_new_node(get_nodes_Block(oe)));
 263
 264   /** ... and now the keep alives. **/
 265   /* First pick the not marked block nodes and walk them.  We must pick these
 266      first as else we will oversee blocks reachable from Phis. */
 267   for (i = 0; i < get_irn_arity(oe); i++) {
 268     ka = get_irn_n(oe, i);
 269     if ((get_irn_op(ka) == op_Block) &&
 270         (get_irn_visited(ka) < get_irg_visited(current_ir_graph))) {
 271       /* We must keep the block alive and copy everything reachable */
 272       set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
 273       irg_walk(ka, copy_node, copy_preds, NULL);
 274       add_End_keepalive(ne, get_new_node(ka));
 275     }
 276   }
 277
 278   /* Now pick the Phis.  Here we will keep all! */
 279   for (i = 0; i < get_irn_arity(oe); i++) {
 280     ka = get_irn_n(oe, i);
 281     if ((get_irn_op(ka) == op_Phi)) {
 282       if (get_irn_visited(ka) < get_irg_visited(current_ir_graph)) {
 283         /* We didn't copy the Phi yet.  */
 284         set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
 285         irg_walk(ka, copy_node, copy_preds, NULL);
 286       }
 287       add_End_keepalive(ne, get_new_node(ka));
 288     }
 289   }
 290 }
 291
 292 /* Copies the graph reachable from current_ir_graph->end to the obstack
 293    in current_ir_graph and fixes the environment.
 294    Then fixes the fields in current_ir_graph containing nodes of the
 295    graph.  */
 296 void
 297 copy_graph_env () {
 298   /* Not all nodes remembered in current_ir_graph might be reachable
 299      from the end node.  Assure their link is set to NULL, so that
 300      we can test whether new nodes have been computed. */
 301   set_irn_link(get_irg_frame  (current_ir_graph), NULL);
 302   set_irn_link(get_irg_globals(current_ir_graph), NULL);
 303   set_irn_link(get_irg_args   (current_ir_graph), NULL);
 304
 305   /* we use the block walk flag for removing Bads from Blocks ins. */
 306   inc_irg_block_visited(current_ir_graph);
 307
 308   /* copy the graph */
 309   copy_graph();
 310
 311   /* fix the fields in current_ir_graph */
 312   free_End(get_irg_end(current_ir_graph));
 313   set_irg_end        (current_ir_graph, get_new_node(get_irg_end(current_ir_graph)));
 314   set_irg_end_block  (current_ir_graph, get_new_node(get_irg_end_block(current_ir_graph)));
 315   if (get_irn_link(get_irg_frame(current_ir_graph)) == NULL) {
 316     copy_node (get_irg_frame(current_ir_graph), NULL);
 317     copy_preds(get_irg_frame(current_ir_graph), NULL);
 318   }
 319   if (get_irn_link(get_irg_globals(current_ir_graph)) == NULL) {
 320     copy_node (get_irg_globals(current_ir_graph), NULL);
 321     copy_preds(get_irg_globals(current_ir_graph), NULL);
 322   }
 323   if (get_irn_link(get_irg_args(current_ir_graph)) == NULL) {
 324     copy_node (get_irg_args(current_ir_graph), NULL);
 325     copy_preds(get_irg_args(current_ir_graph), NULL);
 326   }
 327   set_irg_start  (current_ir_graph, get_new_node(get_irg_start(current_ir_graph)));
 328
 329   set_irg_start_block(current_ir_graph,
 330                       get_new_node(get_irg_start_block(current_ir_graph)));
 331   set_irg_frame  (current_ir_graph, get_new_node(get_irg_frame(current_ir_graph)));
 332   set_irg_globals(current_ir_graph, get_new_node(get_irg_globals(current_ir_graph)));
 333   set_irg_args   (current_ir_graph, get_new_node(get_irg_args(current_ir_graph)));
 334   if (get_irn_link(get_irg_bad(current_ir_graph)) == NULL) {
 335     copy_node(get_irg_bad(current_ir_graph), NULL);
 336     copy_preds(get_irg_bad(current_ir_graph), NULL);
 337   }
 338   set_irg_bad(current_ir_graph, get_new_node(get_irg_bad(current_ir_graph)));
 339 }
 340
 341 /* Copies all reachable nodes to a new obstack.  Removes bad inputs
 342    from block nodes and the corresponding inputs from Phi nodes.
 343    Merges single exit blocks with single entry blocks and removes
 344    1-input Phis.
 345    Adds all new nodes to a new hash table for cse.  Does not
 346    perform cse, so the hash table might contain common subexpressions. */
 347 /* Amroq call this emigrate() */
 348 void
 349 dead_node_elimination(ir_graph *irg) {
 350   ir_graph *rem;
 351   struct obstack *graveyard_obst = NULL;
 352   struct obstack *rebirth_obst   = NULL;
 353
 354   /* Remember external state of current_ir_graph. */
 355   rem = current_ir_graph;
 356   current_ir_graph = irg;
 357
 358   /* Handle graph state */
 359   assert(get_irg_phase_state(current_ir_graph) != phase_building);
 360   free_outs(current_ir_graph);
 361
 362   if (get_optimize() && get_opt_dead_node_elimination()) {
 363
 364     /* A quiet place, where the old obstack can rest in peace,
 365        until it will be cremated. */
 366     graveyard_obst = irg->obst;
 367
 368     /* A new obstack, where the reachable nodes will be copied to. */
 369     rebirth_obst = (struct obstack *) xmalloc (sizeof (struct obstack));
 370     current_ir_graph->obst = rebirth_obst;
 371     obstack_init (current_ir_graph->obst);
 372
 373     /* We also need a new hash table for cse */
 374     del_identities (irg->value_table);
 375     irg->value_table = new_identities ();
 376
 377     /* Copy the graph from the old to the new obstack */
 378     copy_graph_env();
 379
 380     /* Free memory from old unoptimized obstack */
 381     obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
 382     xfree (graveyard_obst);           /* ... then free it.           */
 383   }
 384
 385   current_ir_graph = rem;
 386 }
 387
 388 /**********************************************************************/
 389 /*  Funcionality for inlining                                         */
 390 /**********************************************************************/
 391
 392 /* Copy node for inlineing.  Copies the node by calling copy_node and
 393    then updates the entity if it's a local one.  env must be a pointer
 394    to the frame type of the procedure. The new entities must be in
 395    the link field of the entities. */
 396 void
 397 copy_node_inline (ir_node *n, void *env) {
 398   ir_node *new;
 399   type *frame_tp = (type *)env;
 400
 401   copy_node(n, NULL);
 402   if (get_irn_op(n) == op_Sel) {
 403     new = get_new_node (n);
 404     assert(get_irn_op(new) == op_Sel);
 405     if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
 406       set_Sel_entity(new, get_entity_link(get_Sel_entity(n)));
 407     }
 408   }
 409 }
 410
 411 void inline_method(ir_node *call, ir_graph *called_graph) {
 412   ir_node *pre_call;
 413   ir_node *post_call, *post_bl;
 414   ir_node *in[5];
 415   ir_node *end, *end_bl;
 416   ir_node **res_pred;
 417   ir_node **cf_pred;
 418   ir_node *ret, *phi;
 419   ir_node *cf_op, *bl;
 420   int arity, n_ret, n_exc, n_res, i, j;
 421   type *called_frame, *caller_frame;
 422
 423   if (!get_opt_inline()) return;
 424
 425   /* Handle graph state */
 426   assert(get_irg_phase_state(current_ir_graph) != phase_building);
 427   if (get_irg_outs_state(current_ir_graph) == outs_consistent)
 428     set_irg_outs_inconsistent(current_ir_graph);
 429
 430   /** Check preconditions **/
 431   assert(get_irn_op(call) == op_Call);
 432   assert(get_Call_type(call) == get_entity_type(get_irg_ent(called_graph)));
 433   assert(get_type_tpop(get_Call_type(call)) == type_method);
 434   if (called_graph == current_ir_graph) return;
 435
 436   /** Part the Call node into two nodes.  Pre_call collects the parameters of
 437           the procedure and later replaces the Start node of the called graph.
 438           Post_call is the old Call node and collects the results of the called
 439           graph. Both will end up being a tuple.  **/
 440   post_bl = get_nodes_Block(call);
 441   set_irg_current_block(current_ir_graph, post_bl);
 442   /* XxMxPxP of Start + parameter of Call */
 443   in[0] = new_Jmp();
 444   in[1] = get_Call_mem(call);
 445   in[2] = get_irg_frame(current_ir_graph);
 446   in[3] = get_irg_globals(current_ir_graph);
 447   in[4] = new_Tuple (get_Call_n_params(call),
 448                                          get_Call_param_arr(call));
 449   pre_call = new_Tuple(5, in);
 450   post_call = call;
 451
 452   /** Part the block of the Call node into two blocks.
 453       The new block gets the ins of the old block, pre_call and all its
 454       predecessors and all Phi nodes. **/
 455   part_block(pre_call);
 456
 457   /** Prepare state for dead node elimination **/
 458   /* Visited flags in calling irg must be >= flag in called irg.
 459      Else walker and arity computation will not work. */
 460   if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
 461     set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);   /***/
 462   if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
 463     set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
 464   /* Set pre_call as new Start node in link field of the start node of
 465          calling graph and pre_calls block as new block for the start block
 466          of calling graph.
 467          Further mark these nodes so that they are not visited by the
 468          copying. */
 469   set_irn_link(get_irg_start(called_graph), pre_call);
 470   set_irn_visited(get_irg_start(called_graph),
 471                                   get_irg_visited(current_ir_graph));/***/
 472   set_irn_link(get_irg_start_block(called_graph),
 473                            get_nodes_Block(pre_call));
 474   set_irn_visited(get_irg_start_block(called_graph),
 475                                   get_irg_visited(current_ir_graph));  /***/
 476
 477   /* Initialize for compaction of in arrays */
 478   inc_irg_block_visited(current_ir_graph);
 479   /*
 480           set_Block_block_visited(get_irg_start_block(called_graph),
 481           get_irg_block_visited(current_ir_graph) +1 +1); /* count for self edge */
 482
 483   /*** Replicate local entities of the called_graph ***/
 484   /* copy the entities. */
 485   called_frame = get_irg_frame_type(called_graph);
 486   for (i = 0; i < get_class_n_member(called_frame); i++) {
 487     entity *new_ent, *old_ent;
 488     old_ent = get_class_member(called_frame, i);
 489     new_ent = copy_entity_own(old_ent, get_cur_frame_type());
 490     set_entity_link(old_ent, new_ent);
 491   }
 492
 493   /* visited is > than that of called graph.  With this trick visited will
 494      remain unchanged so that an outer walker, e.g., searching the call nodes
 495      to inline, calling this inline will not visit the inlined nodes. */
 496   set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
 497
 498   /** Performing dead node elimination inlines the graph **/
 499   /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
 500      entities. */
 501   /* @@@ endless loops are not copied!! */
 502   irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds,
 503                    get_irg_frame_type(called_graph));
 504
 505   /* Repair called_graph */
 506   set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
 507   set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
 508   set_Block_block_visited(get_irg_start_block(called_graph), 0);
 509
 510   /*** Merge the end of the inlined procedure with the call site ***/
 511   /* We will turn the old Call node into a Tuple with the following
 512      predecessors:
 513      -1:  Block of Tuple.
 514      0: Phi of all Memories of Return statements.
 515      1: Jmp from new Block that merges the control flow from all exception
 516          predecessors of the old end block.
 517      2: Tuple of all arguments.
 518      3: Phi of Exception memories.
 519   */
 520
 521   /** Precompute some values **/
 522   end_bl = get_new_node(get_irg_end_block(called_graph));
 523   end = get_new_node(get_irg_end(called_graph));
 524   arity = get_irn_arity(end_bl);    /* arity = n_exc + n_ret  */
 525   n_res = get_method_n_res(get_Call_type(call));
 526
 527   res_pred = (ir_node **) malloc ((n_res) * sizeof (ir_node *));
 528   cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
 529
 530   set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */
 531
 532   /** archive keepalives **/
 533   for (i = 0; i < get_irn_arity(end); i++)
 534     add_End_keepalive(get_irg_end(current_ir_graph), get_irn_n(end, i));
 535   /* The new end node will die, but the in array is not on the obstack ... */
 536   free_End(end);
 537
 538   /** Collect control flow from Return blocks to post_calls block. Replace
 539       Return nodes by Jump nodes. **/
 540   n_ret = 0;
 541   for (i = 0; i < arity; i++) {
 542     ir_node *ret;
 543     ret = get_irn_n(end_bl, i);
 544     if (get_irn_op(ret) == op_Return) {
 545       cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_Block(ret));
 546       n_ret++;
 547     }
 548   }
 549   set_irn_in(post_bl, n_ret, cf_pred);
 550
 551   /** Collect results from Return nodes to post_call. Post_call is
 552       turned into a tuple. **/
 553   turn_into_tuple(post_call, 4);
 554   /* First the Memory-Phi */
 555   n_ret = 0;
 556   for (i = 0; i < arity; i++) {
 557     ret = get_irn_n(end_bl, i);
 558     if (get_irn_op(ret) == op_Return) {
 559       cf_pred[n_ret] = get_Return_mem(ret);
 560       n_ret++;
 561     }
 562   }
 563   phi = new_Phi(n_ret, cf_pred, mode_M);
 564   set_Tuple_pred(call, 0, phi);
 565   set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
 566   set_irn_link(post_bl, phi);
 567   /* Now the real results */
 568   if (n_res > 0) {
 569     for (j = 0; j < n_res; j++) {
 570       n_ret = 0;
 571       for (i = 0; i < arity; i++) {
 572                 ret = get_irn_n(end_bl, i);
 573                 if (get_irn_op(ret) == op_Return) {
 574                   cf_pred[n_ret] = get_Return_res(ret, j);
 575                   n_ret++;
 576                 }
 577       }
 578       phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
 579       res_pred[j] = phi;
 580       set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
 581       set_irn_link(post_bl, phi);
 582     }
 583     set_Tuple_pred(call, 2, new_Tuple(n_res, res_pred));
 584   } else {
 585     set_Tuple_pred(call, 2, new_Bad());
 586   }
 587   /* Finally the exception control flow.  We need to add a Phi node to
 588      collect the memory containing the exception objects.  Further we need
 589      to add another block to get a correct representation of this Phi.  To
 590      this block we add a Jmp that resolves into the X output of the Call
 591      when the Call is turned into a tuple. */
 592   n_exc = 0;
 593   for (i = 0; i < arity; i++) {
 594     ir_node *ret;
 595     ret = get_irn_n(end_bl, i);
 596     if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
 597       cf_pred[n_exc] = ret;
 598       n_exc++;
 599     }
 600   }
 601   if (n_exc > 0) {
 602     new_Block(n_exc, cf_pred);      /* whatch it: current_block is changed! */
 603     set_Tuple_pred(call, 1, new_Jmp());
 604     /* The Phi for the memories with the exception objects */
 605     n_exc = 0;
 606     for (i = 0; i < arity; i++) {
 607       ir_node *ret;
 608       ret = skip_Proj(get_irn_n(end_bl, i));
 609       if (get_irn_op(ret) == op_Call) {
 610                 cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3);
 611                 n_exc++;
 612       } else if (is_fragile_op(ret)) {
 613                 /* We rely that all cfops have the memory output at the same position. */
 614                 cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0);
 615                 n_exc++;
 616       } else if (get_irn_op(ret) == op_Raise) {
 617                 cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1);
 618                 n_exc++;
 619       }
 620     }
 621     set_Tuple_pred(call, 3, new_Phi(n_exc, cf_pred, mode_M));
 622   } else {
 623     set_Tuple_pred(call, 1, new_Bad());
 624     set_Tuple_pred(call, 3, new_Bad());
 625   }
 626   free(res_pred);
 627   free(cf_pred);
 628
 629   /*** Correct the control flow to the end node.
 630        If the exception control flow from the Call directly branched to the
 631        end block we now have the following control flow predecessor pattern:
 632        ProjX -> Tuple -> Jmp.
 633        We must remove the Jmp along with it's empty block and add Jmp's
 634        predecessors as predecessors of this end block. ***/
 635   /* find the problematic predecessor of the end block. */
 636   end_bl = get_irg_end_block(current_ir_graph);
 637   for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) {
 638     cf_op = get_Block_cfgpred(end_bl, i);
 639     if (get_irn_op(cf_op) == op_Proj) {
 640       cf_op = get_Proj_pred(cf_op);
 641       if (get_irn_op(cf_op) == op_Tuple) {
 642                 cf_op = get_Tuple_pred(cf_op, 1);
 643                 assert(get_irn_op(cf_op) == op_Jmp);
 644                 break;
 645       }
 646     }
 647   }
 648   /* repair */
 649   if (i < get_Block_n_cfgpreds(end_bl)) {
 650     bl = get_nodes_Block(cf_op);
 651     arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1;
 652     cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
 653     for (j = 0; j < i; j++)
 654       cf_pred[j] = get_Block_cfgpred(end_bl, j);
 655     for (j = j; j < i + get_Block_n_cfgpreds(bl); j++)
 656       cf_pred[j] = get_Block_cfgpred(bl, j-i);
 657     for (j = j; j < arity; j++)
 658       cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1);
 659     set_irn_in(end_bl, arity, cf_pred);
 660     free(cf_pred);
 661   }
 662 }
 663
 664 /********************************************************************/
 665 /* Apply inlineing to small methods.                                */
 666 /********************************************************************/
 667
 668 static int pos;
 669
 670 /* It makes no sense to inline too many calls in one procedure. Anyways,
 671    I didn't get a version with NEW_ARR_F to run. */
 672 #define MAX_INLINE 1024
 673
 674 static void collect_calls(ir_node *call, void *env) {
 675   ir_node **calls = (ir_node **)env;
 676   ir_node *addr;
 677   tarval *tv;
 678   ir_graph *called_irg;
 679
 680   if (get_irn_op(call) != op_Call) return;
 681
 682   addr = get_Call_ptr(call);
 683   if (get_irn_op(addr) == op_Const) {
 684     /* Check whether the constant is the pointer to a compiled entity. */
 685     tv = get_Const_tarval(addr);
 686     if (tv->u.p.ent) {
 687       called_irg = get_entity_irg(tv->u.p.ent);
 688       if (called_irg && pos < MAX_INLINE) {
 689         /* The Call node calls a locally defined method.  Remember to inline. */
 690         calls[pos] = call;
 691         pos++;
 692       }
 693     }
 694   }
 695 }
 696
 697
 698 /* Inlines all small methods at call sites where the called address comes
 699    from a Const node that references the entity representing the called
 700    method.
 701    The size argument is a rough measure for the code size of the method:
 702    Methods where the obstack containing the firm graph is smaller than
 703    size are inlined. */
 704 void inline_small_irgs(ir_graph *irg, int size) {
 705   int i;
 706   ir_node *calls[MAX_INLINE];
 707   ir_graph *rem = current_ir_graph;
 708
 709   if (!(get_optimize() && get_opt_inline())) return;
 710
 711   /*DDME(get_irg_ent(current_ir_graph));*/
 712
 713   current_ir_graph = irg;
 714   /* Handle graph state */
 715   assert(get_irg_phase_state(current_ir_graph) != phase_building);
 716
 717   /* Find Call nodes to inline.
 718      (We can not inline during a walk of the graph, as inlineing the same
 719      method several times changes the visited flag of the walked graph:
 720      after the first inlineing visited of the callee equals visited of
 721      the caller.  With the next inlineing both are increased.) */
 722   pos = 0;
 723   irg_walk(get_irg_end(irg), NULL, collect_calls, (void *) calls);
 724
 725   if ((pos > 0) && (pos < MAX_INLINE)) {
 726     /* There are calls to inline */
 727     collect_phiprojs(irg);
 728     for (i = 0; i < pos; i++) {
 729       tarval *tv;
 730       ir_graph *callee;
 731       tv = get_Const_tarval(get_Call_ptr(calls[i]));
 732       callee = get_entity_irg(tv->u.p.ent);
 733       if ((_obstack_memory_used(callee->obst) - obstack_room(callee->obst)) < size) {
 734         /*printf(" inlineing "); DDME(tv->u.p.ent);*/
 735         inline_method(calls[i], callee);
 736       }
 737     }
 738   }
 739
 740   current_ir_graph = rem;
 741 }
 742
 743
 744 /********************************************************************/
 745 /*  Code Placement.  Pinns all floating nodes to a block where they */
 746 /*  will be executed only if needed.                                */
 747 /********************************************************************/
 748
 749 static pdeq *worklist;          /* worklist of ir_node*s */
 750
 751 /* Find the earliest correct block for N.  --- Place N into the
 752    same Block as its dominance-deepest Input.  */
 753 static void
 754 place_floats_early (ir_node *n)
 755 {
 756   int i, start;
 757
 758   /* we must not run into an infinite loop */
 759   assert (irn_not_visited(n));
 760   mark_irn_visited(n);
 761
 762   /* Place floating nodes. */
 763   if (get_op_pinned(get_irn_op(n)) == floats) {
 764     int depth = 0;
 765     ir_node *b = new_Bad();   /* The block to place this node in */
 766
 767     assert(get_irn_op(n) != op_Block);
 768
 769     if ((get_irn_op(n) == op_Const) ||
 770         (get_irn_op(n) == op_SymConst) ||
 771         (is_Bad(n))) {
 772       /* These nodes will not be placed by the loop below. */
 773       b = get_irg_start_block(current_ir_graph);
 774       depth = 1;
 775     }
 776
 777     /* find the block for this node. */
 778     for (i = 0; i < get_irn_arity(n); i++) {
 779       ir_node *dep = get_irn_n(n, i);
 780       ir_node *dep_block;
 781       if ((irn_not_visited(dep)) &&
 782           (get_op_pinned(get_irn_op(dep)) == floats)) {
 783         place_floats_early (dep);
 784       }
 785       /* Because all loops contain at least one pinned node, now all
 786          our inputs are either pinned or place_early has already
 787          been finished on them.  We do not have any unfinished inputs!  */
 788       dep_block = get_nodes_Block(dep);
 789       if ((!is_Bad(dep_block)) &&
 790           (get_Block_dom_depth(dep_block) > depth)) {
 791         b = dep_block;
 792         depth = get_Block_dom_depth(dep_block);
 793       }
 794       /* Avoid that the node is placed in the Start block */
 795       if ((depth == 1) && (get_Block_dom_depth(get_nodes_Block(n)) > 1)) {
 796         b = get_Block_cfg_out(get_irg_start_block(current_ir_graph), 0);
 797         assert(b != get_irg_start_block(current_ir_graph));
 798         depth = 2;
 799       }
 800     }
 801     set_nodes_Block(n, b);
 802   }
 803
 804   /* Add predecessors of non floating nodes on worklist. */
 805   start = (get_irn_op(n) == op_Block) ? 0 : -1;
 806   for (i = start; i < get_irn_arity(n); i++) {
 807     ir_node *pred = get_irn_n(n, i);
 808     if (irn_not_visited(pred)) {
 809       pdeq_putr (worklist, pred);
 810     }
 811   }
 812 }
 813
 814 /* Floating nodes form subgraphs that begin at nodes as Const, Load,
 815    Start, Call and end at pinned nodes as Store, Call.  Place_early
 816    places all floating nodes reachable from its argument through floating
 817    nodes and adds all beginnings at pinned nodes to the worklist. */
 818 inline void place_early () {
 819   int i;
 820   bool del_me;
 821
 822   assert(worklist);
 823   inc_irg_visited(current_ir_graph);
 824
 825   /* this inits the worklist */
 826   place_floats_early (get_irg_end(current_ir_graph));
 827
 828   /* Work the content of the worklist. */
 829   while (!pdeq_empty (worklist)) {
 830     ir_node *n = pdeq_getl (worklist);
 831     if (irn_not_visited(n)) place_floats_early (n);
 832   }
 833
 834   set_irg_outs_inconsistent(current_ir_graph);
 835   current_ir_graph->pinned = pinned;
 836 }
 837
 838
 839 /* deepest common dominance ancestor of DCA and CONSUMER of PRODUCER */
 840 static ir_node *
 841 consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer)
 842 {
 843   ir_node *block;
 844
 845   /* Compute the latest block into which we can place a node so that it is
 846      before consumer. */
 847   if (get_irn_op(consumer) == op_Phi) {
 848     /* our comsumer is a Phi-node, the effective use is in all those
 849        blocks through which the Phi-node reaches producer */
 850     int i;
 851     ir_node *phi_block = get_nodes_Block(consumer);
 852     for (i = 0;  i < get_irn_arity(consumer); i++) {
 853       if (get_irn_n(consumer, i) == producer) {
 854         block = get_nodes_Block(get_Block_cfgpred(phi_block, i));
 855       }
 856     }
 857   } else {
 858     assert(is_no_Block(consumer));
 859     block = get_nodes_Block(consumer);
 860   }
 861
 862   /* Compute the deepest common ancestor of block and dca. */
 863   assert(block);
 864   if (!dca) return block;
 865   while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
 866     block = get_Block_idom(block);
 867   while (get_Block_dom_depth(dca) > get_Block_dom_depth(block))
 868     dca = get_Block_idom(dca);
 869   while (block != dca)
 870     { block = get_Block_idom(block); dca = get_Block_idom(dca); }
 871
 872   return dca;
 873 }
 874
 875 #if 0
 876 /* @@@ Needs loop informations.  Will implement later interprocedural. */
 877 static void
 878 move_out_of_loops (ir_node *n, ir_node *dca)
 879 {
 880   assert(dca);
 881
 882   /* Find the region deepest in the dominator tree dominating
 883      dca with the least loop nesting depth, but still dominated
 884      by our early placement. */
 885   ir_node *best = dca;
 886   while (dca != get_nodes_Block(n)) {
 887     dca = get_Block_idom(dca);
 888     if (!dca) break; /* should we put assert(dca)? */
 889     if (get_Block_loop_depth(dca) < get_Block_loop_depth(best)) {
 890       best = dca;
 891     }
 892   }
 893   if (get_Block_dom_depth(best) >= get_Block_dom_depth(get_nodes_Block(n)))
 894     set_nodes_Block(n, best);
 895 }
 896 #endif
 897
 898 /* Find the latest legal block for N and place N into the
 899    `optimal' Block between the latest and earliest legal block.
 900    The `optimal' block is the dominance-deepest block of those
 901    with the least loop-nesting-depth.  This places N out of as many
 902    loops as possible and then makes it as controldependant as
 903    possible. */
 904 static void
 905 place_floats_late (ir_node *n)
 906 {
 907   int i;
 908
 909   assert (irn_not_visited(n)); /* no multiple placement */
 910
 911   /* no need to place block nodes, control nodes are already placed. */
 912   if ((get_irn_op(n) != op_Block) && (!is_cfop(n)) && (get_irn_mode(n) != mode_X)) {
 913     /* Assure that our users are all placed, except the Phi-nodes.
 914        --- Each dataflow cycle contains at least one Phi-node.  We
 915        have to break the `user has to be placed before the
 916        producer' dependance cycle and the Phi-nodes are the
 917        place to do so, because we need to base our placement on the
 918        final region of our users, which is OK with Phi-nodes, as they
 919        are pinned, and they never have to be placed after a
 920        producer of one of their inputs in the same block anyway. */
 921     for (i = 0; i < get_irn_n_outs(n); i++) {
 922       ir_node *succ = get_irn_out(n, i);
 923       if (irn_not_visited(succ) && (get_irn_op(succ) != op_Phi))
 924         place_floats_late (succ);
 925     }
 926
 927     /* We have to determine the final block of this node... except for constants. */
 928     if ((get_op_pinned(get_irn_op(n)) == floats) &&
 929         (get_irn_op(n) != op_Const) &&
 930         (get_irn_op(n) != op_SymConst)) {
 931       ir_node *dca = NULL;      /* deepest common ancestor in the
 932                                    dominator tree of all nodes'
 933                                    blocks depending on us; our final
 934                                    placement has to dominate DCA. */
 935       for (i = 0; i < get_irn_n_outs(n); i++) {
 936         dca = consumer_dom_dca (dca, get_irn_out(n, i), n);
 937       }
 938       set_nodes_Block(n, dca);
 939 #if 0
 940       move_out_of_loops (n, dca);
 941 #endif
 942     }
 943   }
 944
 945   mark_irn_visited(n);
 946
 947   /* Add predecessors of all non-floating nodes on list. (Those of floating
 948      nodes are placeded already and therefore are marked.)  */
 949   for (i = 0; i < get_irn_n_outs(n); i++) {
 950     if (irn_not_visited(get_irn_out(n, i))) {
 951       pdeq_putr (worklist, get_irn_out(n, i));
 952     }
 953   }
 954 }
 955
 956 inline void place_late() {
 957   assert(worklist);
 958   inc_irg_visited(current_ir_graph);
 959
 960   /* This fills the worklist initially. */
 961   place_floats_late(get_irg_start_block(current_ir_graph));
 962   /* And now empty the worklist again... */
 963   while (!pdeq_empty (worklist)) {
 964     ir_node *n = pdeq_getl (worklist);
 965     if (irn_not_visited(n)) place_floats_late(n);
 966   }
 967 }
 968
 969 void place_code(ir_graph *irg) {
 970   ir_graph *rem = current_ir_graph;
 971   current_ir_graph = irg;
 972
 973   if (!(get_optimize() && get_opt_global_cse())) return;
 974
 975   /* Handle graph state */
 976   assert(get_irg_phase_state(irg) != phase_building);
 977   if (get_irg_dom_state(irg) != dom_consistent)
 978     compute_doms(irg);
 979
 980   /* Place all floating nodes as early as possible. This guarantees
 981      a legal code placement. */
 982   worklist = new_pdeq ();
 983   place_early();
 984
 985   /* place_early invalidates the outs, place_late needs them. */
 986   compute_outs(irg);
 987   /* Now move the nodes down in the dominator tree. This reduces the
 988      unnecessary executions of the node. */
 989   place_late();
 990
 991   del_pdeq (worklist);
 992   current_ir_graph = rem;
 993 }