3 * File name: ir/ir/irgopt.c
4 * Purpose: Optimizations for a whole ir graph, i.e., a procedure.
5 * Author: Christian Schaefer, Goetz Lindenmaier
6 * Modified by: Sebastian Felis
9 * Copyright: (c) 1998-2003 Universität Karlsruhe
10 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
23 # include "irnode_t.h"
24 # include "irgraph_t.h"
32 # include "pdeq.h" /* Fuer code placement */
35 # include "irbackedge_t.h"
36 # include "irflag_t.h"
37 # include "firmstat.h"
39 /* Defined in iropt.c */
40 pset *new_identities (void);
41 void del_identities (pset *value_table);
42 void add_identities (pset *value_table, ir_node *node);
44 /*------------------------------------------------------------------*/
45 /* apply optimizations of iropt to all nodes. */
46 /*------------------------------------------------------------------*/
48 static void init_link (ir_node *n, void *env) {
49 set_irn_link(n, NULL);
52 #if 0 /* Old version. Avoids Ids.
53 This is not necessary: we do a postwalk, and get_irn_n
54 removes ids anyways. So it's much cheaper to call the
55 optimization less often and use the exchange() algorithm. */
57 optimize_in_place_wrapper (ir_node *n, void *env) {
59 ir_node *optimized, *old;
61 irn_arity = get_irn_arity(n);
62 for (i = 0; i < irn_arity; i++) {
63 /* get_irn_n skips Id nodes, so comparison old != optimized does not
64 show all optimizations. Therefore always set new predecessor. */
65 old = get_irn_intra_n(n, i);
66 optimized = optimize_in_place_2(old);
67 set_irn_n(n, i, optimized);
70 if (get_irn_op(n) == op_Block) {
71 optimized = optimize_in_place_2(n);
72 if (optimized != n) exchange (n, optimized);
77 optimize_in_place_wrapper (ir_node *n, void *env) {
78 ir_node *optimized = optimize_in_place_2(n);
79 if (optimized != n) exchange (n, optimized);
86 local_optimize_graph (ir_graph *irg) {
87 ir_graph *rem = current_ir_graph;
88 current_ir_graph = irg;
90 /* Handle graph state */
91 assert(get_irg_phase_state(irg) != phase_building);
92 if (get_opt_global_cse())
93 set_irg_pinned(current_ir_graph, floats);
94 if (get_irg_outs_state(current_ir_graph) == outs_consistent)
95 set_irg_outs_inconsistent(current_ir_graph);
96 if (get_irg_dom_state(current_ir_graph) == dom_consistent)
97 set_irg_dom_inconsistent(current_ir_graph);
98 set_irg_loopinfo_inconsistent(current_ir_graph);
101 /* Clean the value_table in irg for the cse. */
102 del_identities(irg->value_table);
103 irg->value_table = new_identities();
105 /* walk over the graph */
106 irg_walk(irg->end, init_link, optimize_in_place_wrapper, NULL);
108 current_ir_graph = rem;
111 /*------------------------------------------------------------------*/
112 /* Routines for dead node elimination / copying garbage collection */
113 /* of the obstack. */
114 /*------------------------------------------------------------------*/
117 * Remember the new node in the old node by using a field all nodes have.
120 set_new_node (ir_node *old, ir_node *new)
126 * Get this new node, before the old node is forgotton.
128 static INLINE ir_node *
129 get_new_node (ir_node * n)
135 * We use the block_visited flag to mark that we have computed the
136 * number of useful predecessors for this block.
137 * Further we encode the new arity in this flag in the old blocks.
138 * Remembering the arity is useful, as it saves a lot of pointer
139 * accesses. This function is called for all Phi and Block nodes
143 compute_new_arity(ir_node *b) {
144 int i, res, irn_arity;
147 irg_v = get_irg_block_visited(current_ir_graph);
148 block_v = get_Block_block_visited(b);
149 if (block_v >= irg_v) {
150 /* we computed the number of preds for this block and saved it in the
152 return block_v - irg_v;
154 /* compute the number of good predecessors */
155 res = irn_arity = get_irn_arity(b);
156 for (i = 0; i < irn_arity; i++)
157 if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--;
158 /* save it in the flag. */
159 set_Block_block_visited(b, irg_v + res);
164 /* TODO: add an ir_op operation */
165 static INLINE void new_backedge_info(ir_node *n) {
166 switch(get_irn_opcode(n)) {
168 n->attr.block.cg_backedge = NULL;
169 n->attr.block.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
172 n->attr.phi_backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
175 n->attr.filter.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
182 * Copies the node to the new obstack. The Ins of the new node point to
183 * the predecessors on the old obstack. For block/phi nodes not all
184 * predecessors might be copied. n->link points to the new node.
185 * For Phi and Block nodes the function allocates in-arrays with an arity
186 * only for useful predecessors. The arity is determined by counting
187 * the non-bad predecessors of the block.
190 copy_node (ir_node *n, void *env) {
193 opcode op = get_irn_opcode(n);
194 /* The end node looses it's flexible in array. This doesn't matter,
195 as dead node elimination builds End by hand, inlineing doesn't use
197 /* assert(n->op == op_End || ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC)); */
200 /* node copied already */
202 } else if (op == iro_Block) {
204 new_arity = compute_new_arity(n);
205 n->attr.block.graph_arr = NULL;
207 block = get_nodes_Block(n);
208 if (get_irn_opcode(n) == iro_Phi) {
209 new_arity = compute_new_arity(block);
211 new_arity = get_irn_arity(n);
214 nn = new_ir_node(get_irn_dbg_info(n),
221 /* Copy the attributes. These might point to additional data. If this
222 was allocated on the old obstack the pointers now are dangling. This
223 frees e.g. the memory of the graph_arr allocated in new_immBlock. */
225 new_backedge_info(nn);
228 /* printf("\n old node: "); DDMSG2(n);
229 printf(" new node: "); DDMSG2(nn); */
234 * Copies new predecessors of old node to new node remembered in link.
235 * Spare the Bad predecessors of Phi and Block nodes.
238 copy_preds (ir_node *n, void *env) {
242 nn = get_new_node(n);
244 /* printf("\n old node: "); DDMSG2(n);
245 printf(" new node: "); DDMSG2(nn);
246 printf(" arities: old: %d, new: %d\n", get_irn_arity(n), get_irn_arity(nn)); */
248 if (get_irn_opcode(n) == iro_Block) {
249 /* Don't copy Bad nodes. */
251 irn_arity = get_irn_arity(n);
252 for (i = 0; i < irn_arity; i++)
253 if (get_irn_opcode(get_irn_n(n, i)) != iro_Bad) {
254 set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
255 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
258 /* repair the block visited flag from above misuse. Repair it in both
259 graphs so that the old one can still be used. */
260 set_Block_block_visited(nn, 0);
261 set_Block_block_visited(n, 0);
262 /* Local optimization could not merge two subsequent blocks if
263 in array contained Bads. Now it's possible.
264 We don't call optimize_in_place as it requires
265 that the fields in ir_graph are set properly. */
266 if ((get_opt_control_flow_straightening()) &&
267 (get_Block_n_cfgpreds(nn) == 1) &&
268 (get_irn_op(get_Block_cfgpred(nn, 0)) == op_Jmp)) {
269 ir_node *old = get_nodes_Block(get_Block_cfgpred(nn, 0));
271 /* Jmp jumps into the block it is in -- deal self cycle. */
272 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
273 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
278 } else if (get_irn_opcode(n) == iro_Phi) {
279 /* Don't copy node if corresponding predecessor in block is Bad.
280 The Block itself should not be Bad. */
281 block = get_nodes_Block(n);
282 set_irn_n (nn, -1, get_new_node(block));
284 irn_arity = get_irn_arity(n);
285 for (i = 0; i < irn_arity; i++)
286 if (get_irn_opcode(get_irn_n(block, i)) != iro_Bad) {
287 set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
288 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
291 /* If the pre walker reached this Phi after the post walker visited the
292 block block_visited is > 0. */
293 set_Block_block_visited(get_nodes_Block(n), 0);
294 /* Compacting the Phi's ins might generate Phis with only one
296 if (get_irn_arity(n) == 1)
297 exchange(n, get_irn_n(n, 0));
299 irn_arity = get_irn_arity(n);
300 for (i = -1; i < irn_arity; i++)
301 set_irn_n (nn, i, get_new_node(get_irn_n(n, i)));
303 /* Now the new node is complete. We can add it to the hash table for cse.
304 @@@ inlinening aborts if we identify End. Why? */
305 if(get_irn_op(nn) != op_End)
306 add_identities (current_ir_graph->value_table, nn);
310 * Copies the graph recursively, compacts the keepalive of the end node.
314 ir_node *oe, *ne, *ob, *nb; /* old end, new end, old bad, new bad */
315 ir_node *ka; /* keep alive */
318 oe = get_irg_end(current_ir_graph);
319 /* copy the end node by hand, allocate dynamic in array! */
320 ne = new_ir_node(get_irn_dbg_info(oe),
327 /* Copy the attributes. Well, there might be some in the future... */
329 set_new_node(oe, ne);
331 ob = get_irg_bad(current_ir_graph);
332 nb = new_ir_node(get_irn_dbg_info(ob),
339 set_new_node(ob, nb);
341 /* copy the live nodes */
342 irg_walk(get_nodes_Block(oe), copy_node, copy_preds, NULL);
343 /* copy_preds for the end node ... */
344 set_nodes_Block(ne, get_new_node(get_nodes_Block(oe)));
345 set_nodes_Block(nb, get_new_node(get_nodes_Block(ob)));
347 /*- ... and now the keep alives. -*/
348 /* First pick the not marked block nodes and walk them. We must pick these
349 first as else we will oversee blocks reachable from Phis. */
350 irn_arity = get_irn_arity(oe);
351 for (i = 0; i < irn_arity; i++) {
352 ka = get_irn_intra_n(oe, i);
353 if ((get_irn_op(ka) == op_Block) &&
354 (get_irn_visited(ka) < get_irg_visited(current_ir_graph))) {
355 /* We must keep the block alive and copy everything reachable */
356 set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
357 irg_walk(ka, copy_node, copy_preds, NULL);
358 add_End_keepalive(ne, get_new_node(ka));
362 /* Now pick the Phis. Here we will keep all! */
363 irn_arity = get_irn_arity(oe);
364 for (i = 0; i < irn_arity; i++) {
365 ka = get_irn_intra_n(oe, i);
366 if ((get_irn_op(ka) == op_Phi)) {
367 if (get_irn_visited(ka) < get_irg_visited(current_ir_graph)) {
368 /* We didn't copy the Phi yet. */
369 set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
370 irg_walk(ka, copy_node, copy_preds, NULL);
372 add_End_keepalive(ne, get_new_node(ka));
378 * Copies the graph reachable from current_ir_graph->end to the obstack
379 * in current_ir_graph and fixes the environment.
380 * Then fixes the fields in current_ir_graph containing nodes of the
384 copy_graph_env (void) {
386 /* Not all nodes remembered in current_ir_graph might be reachable
387 from the end node. Assure their link is set to NULL, so that
388 we can test whether new nodes have been computed. */
389 set_irn_link(get_irg_frame (current_ir_graph), NULL);
390 set_irn_link(get_irg_globals(current_ir_graph), NULL);
391 set_irn_link(get_irg_args (current_ir_graph), NULL);
393 /* we use the block walk flag for removing Bads from Blocks ins. */
394 inc_irg_block_visited(current_ir_graph);
399 /* fix the fields in current_ir_graph */
400 old_end = get_irg_end(current_ir_graph);
401 set_irg_end (current_ir_graph, get_new_node(old_end));
402 set_irg_end_except (current_ir_graph, get_irg_end(current_ir_graph));
403 set_irg_end_reg (current_ir_graph, get_irg_end(current_ir_graph));
405 set_irg_end_block (current_ir_graph, get_new_node(get_irg_end_block(current_ir_graph)));
406 if (get_irn_link(get_irg_frame(current_ir_graph)) == NULL) {
407 copy_node (get_irg_frame(current_ir_graph), NULL);
408 copy_preds(get_irg_frame(current_ir_graph), NULL);
410 if (get_irn_link(get_irg_globals(current_ir_graph)) == NULL) {
411 copy_node (get_irg_globals(current_ir_graph), NULL);
412 copy_preds(get_irg_globals(current_ir_graph), NULL);
414 if (get_irn_link(get_irg_args(current_ir_graph)) == NULL) {
415 copy_node (get_irg_args(current_ir_graph), NULL);
416 copy_preds(get_irg_args(current_ir_graph), NULL);
418 set_irg_start (current_ir_graph, get_new_node(get_irg_start(current_ir_graph)));
420 set_irg_start_block(current_ir_graph,
421 get_new_node(get_irg_start_block(current_ir_graph)));
422 set_irg_frame (current_ir_graph, get_new_node(get_irg_frame(current_ir_graph)));
423 set_irg_globals(current_ir_graph, get_new_node(get_irg_globals(current_ir_graph)));
424 set_irg_args (current_ir_graph, get_new_node(get_irg_args(current_ir_graph)));
425 if (get_irn_link(get_irg_bad(current_ir_graph)) == NULL) {
426 copy_node(get_irg_bad(current_ir_graph), NULL);
427 copy_preds(get_irg_bad(current_ir_graph), NULL);
429 set_irg_bad(current_ir_graph, get_new_node(get_irg_bad(current_ir_graph)));
433 * Copies all reachable nodes to a new obstack. Removes bad inputs
434 * from block nodes and the corresponding inputs from Phi nodes.
435 * Merges single exit blocks with single entry blocks and removes
437 * Adds all new nodes to a new hash table for cse. Does not
438 * perform cse, so the hash table might contain common subexpressions.
441 dead_node_elimination(ir_graph *irg) {
443 int rem_ipview = interprocedural_view;
444 struct obstack *graveyard_obst = NULL;
445 struct obstack *rebirth_obst = NULL;
447 stat_dead_node_elim_start(irg);
449 /* Remember external state of current_ir_graph. */
450 rem = current_ir_graph;
451 current_ir_graph = irg;
452 interprocedural_view = 0;
454 /* Handle graph state */
455 assert(get_irg_phase_state(current_ir_graph) != phase_building);
456 assert(get_irg_callee_info_state(current_ir_graph) == irg_callee_info_none);
457 free_outs(current_ir_graph);
459 /* @@@ so far we loose loops when copying */
460 free_loop_information(current_ir_graph);
462 if (get_opt_optimize() && get_opt_dead_node_elimination()) {
464 /* A quiet place, where the old obstack can rest in peace,
465 until it will be cremated. */
466 graveyard_obst = irg->obst;
468 /* A new obstack, where the reachable nodes will be copied to. */
469 rebirth_obst = (struct obstack *) xmalloc (sizeof (struct obstack));
470 current_ir_graph->obst = rebirth_obst;
471 obstack_init (current_ir_graph->obst);
473 /* We also need a new hash table for cse */
474 del_identities (irg->value_table);
475 irg->value_table = new_identities ();
477 /* Copy the graph from the old to the new obstack */
480 /* Free memory from old unoptimized obstack */
481 obstack_free(graveyard_obst, 0); /* First empty the obstack ... */
482 xfree (graveyard_obst); /* ... then free it. */
485 stat_dead_node_elim_stop(irg);
487 current_ir_graph = rem;
488 interprocedural_view = rem_ipview;
492 * Relink bad predeseccors of a block and store the old in array to the
493 * link field. This function is called by relink_bad_predecessors().
494 * The array of link field starts with the block operand at position 0.
495 * If block has bad predecessors, create a new in array without bad preds.
496 * Otherwise let in array untouched.
498 static void relink_bad_block_predecessors(ir_node *n, void *env) {
499 ir_node **new_in, *irn;
500 int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
502 /* if link field of block is NULL, look for bad predecessors otherwise
503 this is allready done */
504 if (get_irn_op(n) == op_Block &&
505 get_irn_link(n) == NULL) {
507 /* save old predecessors in link field (position 0 is the block operand)*/
508 set_irn_link(n, (void *)get_irn_in(n));
510 /* count predecessors without bad nodes */
511 old_irn_arity = get_irn_arity(n);
512 for (i = 0; i < old_irn_arity; i++)
513 if (!is_Bad(get_irn_n(n, i))) new_irn_arity++;
515 /* arity changing: set new predecessors without bad nodes */
516 if (new_irn_arity < old_irn_arity) {
517 /* get new predecessor array without Block predecessor */
518 new_in = NEW_ARR_D (ir_node *, current_ir_graph->obst, (new_irn_arity+1));
520 /* set new predeseccors in array */
523 for (i = 1; i < old_irn_arity; i++) {
524 irn = get_irn_n(n, i);
525 if (!is_Bad(irn)) new_in[new_irn_n++] = irn;
528 } /* ir node has bad predecessors */
530 } /* Block is not relinked */
534 * Relinks Bad predecesors from Bocks and Phis called by walker
535 * remove_bad_predecesors(). If n is a Block, call
536 * relink_bad_block_redecessors(). If n is a Phinode, call also the relinking
537 * function of Phi's Block. If this block has bad predecessors, relink preds
540 static void relink_bad_predecessors(ir_node *n, void *env) {
541 ir_node *block, **old_in;
542 int i, old_irn_arity, new_irn_arity;
544 /* relink bad predeseccors of a block */
545 if (get_irn_op(n) == op_Block)
546 relink_bad_block_predecessors(n, env);
548 /* If Phi node relink its block and its predecessors */
549 if (get_irn_op(n) == op_Phi) {
551 /* Relink predeseccors of phi's block */
552 block = get_nodes_Block(n);
553 if (get_irn_link(block) == NULL)
554 relink_bad_block_predecessors(block, env);
556 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
557 old_irn_arity = ARR_LEN(old_in);
559 /* Relink Phi predeseccors if count of predeseccors changed */
560 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
561 /* set new predeseccors in array
562 n->in[0] remains the same block */
564 for(i = 1; i < old_irn_arity; i++)
565 if (!is_Bad((ir_node *)old_in[i])) n->in[new_irn_arity++] = n->in[i];
567 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
570 } /* n is a Phi node */
574 * Removes Bad Bad predecesors from Blocks and the corresponding
575 * inputs to Phi nodes as in dead_node_elimination but without
577 * On walking up set the link field to NULL, on walking down call
578 * relink_bad_predecessors() (This function stores the old in array
579 * to the link field and sets a new in array if arity of predecessors
582 void remove_bad_predecessors(ir_graph *irg) {
583 irg_walk_graph(irg, init_link, relink_bad_predecessors, NULL);
587 /*--------------------------------------------------------------------*/
588 /* Funcionality for inlining */
589 /*--------------------------------------------------------------------*/
592 * Copy node for inlineing. Updates attributes that change when
593 * inlineing but not for dead node elimination.
595 * Copies the node by calling copy_node and then updates the entity if
596 * it's a local one. env must be a pointer of the frame type of the
597 * inlined procedure. The new entities must be in the link field of
601 copy_node_inline (ir_node *n, void *env) {
603 type *frame_tp = (type *)env;
606 if (get_irn_op(n) == op_Sel) {
607 new = get_new_node (n);
608 assert(get_irn_op(new) == op_Sel);
609 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
610 set_Sel_entity(new, get_entity_link(get_Sel_entity(n)));
612 } else if (get_irn_op(n) == op_Block) {
613 new = get_new_node (n);
614 new->attr.block.irg = current_ir_graph;
618 static void find_addr(ir_node *node, void *env)
620 if (get_irn_opcode(node) == iro_Proj) {
621 if (get_Proj_proj(node) == pn_Start_P_value_arg_base)
627 * currently, we cannot inline two cases:
628 * - call with compound arguments
629 * - graphs that take the address of a parameter
631 * check these condition here
633 static int can_inline(ir_node *call, ir_graph *called_graph)
635 type *call_type = get_Call_type(call);
636 int params, ress, i, res;
638 assert(is_method_type(call_type));
640 params = get_method_n_params(call_type);
641 ress = get_method_n_ress(call_type);
644 for (i = 0; i < params; ++i) {
645 type *p_type = get_method_param_type(call_type, i);
647 if (is_compound_type(p_type))
652 for (i = 0; i < ress; ++i) {
653 type *r_type = get_method_res_type(call_type, i);
655 if (is_compound_type(r_type))
660 irg_walk_graph(called_graph, find_addr, NULL, &res);
665 int inline_method(ir_node *call, ir_graph *called_graph) {
667 ir_node *post_call, *post_bl;
669 ir_node *end, *end_bl;
673 int arity, n_ret, n_exc, n_res, i, j, rem_opt, irn_arity;
676 irg_inline_property prop = get_irg_inline_property(called_graph);
678 if ( (prop != irg_inline_forced) && (!get_opt_optimize() || !get_opt_inline() ||
679 (prop == irg_inline_forbidden))) return 0;
683 * currently, we cannot inline two cases:
684 * - call with compound arguments
685 * - graphs that take the address of a parameter
687 if (! can_inline(call, called_graph))
690 /* -- Turn off optimizations, this can cause problems when allocating new nodes. -- */
691 rem_opt = get_opt_optimize();
694 /* Handle graph state */
695 assert(get_irg_phase_state(current_ir_graph) != phase_building);
696 assert(get_irg_pinned(current_ir_graph) == pinned);
697 assert(get_irg_pinned(called_graph) == pinned);
698 if (get_irg_outs_state(current_ir_graph) == outs_consistent)
699 set_irg_outs_inconsistent(current_ir_graph);
700 set_irg_loopinfo_inconsistent(current_ir_graph);
702 /* -- Check preconditions -- */
703 assert(get_irn_op(call) == op_Call);
704 /* @@@ does not work for InterfaceIII.java after cgana
705 assert(get_Call_type(call) == get_entity_type(get_irg_ent(called_graph)));
706 assert(smaller_type(get_entity_type(get_irg_ent(called_graph)),
707 get_Call_type(call)));
709 assert(get_type_tpop(get_Call_type(call)) == type_method);
710 if (called_graph == current_ir_graph) {
711 set_optimize(rem_opt);
715 /* here we know we WILL inline, so inform the statistics */
716 stat_inline(call, called_graph);
718 /* -- Decide how to handle exception control flow: Is there a handler
719 for the Call node, or do we branch directly to End on an exception?
720 exc_handling: 0 There is a handler.
722 2 Exception handling not represented in Firm. -- */
724 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
725 for (proj = (ir_node *)get_irn_link(call); proj; proj = (ir_node *)get_irn_link(proj)) {
726 assert(get_irn_op(proj) == op_Proj);
727 if (get_Proj_proj(proj) == pn_Call_X_except) Xproj = proj;
728 if (get_Proj_proj(proj) == pn_Call_M_except) Mproj = proj;
730 if (Mproj) { assert(Xproj); exc_handling = 0; } /* Mproj */
731 else if (Xproj) { exc_handling = 1; } /* !Mproj && Xproj */
732 else { exc_handling = 2; } /* !Mproj && !Xproj */
737 the procedure and later replaces the Start node of the called graph.
738 Post_call is the old Call node and collects the results of the called
739 graph. Both will end up being a tuple. -- */
740 post_bl = get_nodes_Block(call);
741 set_irg_current_block(current_ir_graph, post_bl);
742 /* XxMxPxP of Start + parameter of Call */
743 in[pn_Start_X_initial_exec] = new_Jmp();
744 in[pn_Start_M] = get_Call_mem(call);
745 in[pn_Start_P_frame_base] = get_irg_frame(current_ir_graph);
746 in[pn_Start_P_globals] = get_irg_globals(current_ir_graph);
747 in[pn_Start_T_args] = new_Tuple(get_Call_n_params(call), get_Call_param_arr(call));
748 /* in[pn_Start_P_value_arg_base] = ??? */
749 pre_call = new_Tuple(5, in);
753 The new block gets the ins of the old block, pre_call and all its
754 predecessors and all Phi nodes. -- */
755 part_block(pre_call);
757 /* -- Prepare state for dead node elimination -- */
758 /* Visited flags in calling irg must be >= flag in called irg.
759 Else walker and arity computation will not work. */
760 if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
761 set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);
762 if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
763 set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
764 /* Set pre_call as new Start node in link field of the start node of
765 calling graph and pre_calls block as new block for the start block
767 Further mark these nodes so that they are not visited by the
769 set_irn_link(get_irg_start(called_graph), pre_call);
770 set_irn_visited(get_irg_start(called_graph), get_irg_visited(current_ir_graph));
771 set_irn_link(get_irg_start_block(called_graph), get_nodes_Block(pre_call));
772 set_irn_visited(get_irg_start_block(called_graph), get_irg_visited(current_ir_graph));
773 set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
774 set_irn_visited(get_irg_bad(called_graph), get_irg_visited(current_ir_graph));
776 /* Initialize for compaction of in arrays */
777 inc_irg_block_visited(current_ir_graph);
779 /* -- Replicate local entities of the called_graph -- */
780 /* copy the entities. */
781 called_frame = get_irg_frame_type(called_graph);
782 for (i = 0; i < get_class_n_members(called_frame); i++) {
783 entity *new_ent, *old_ent;
784 old_ent = get_class_member(called_frame, i);
785 new_ent = copy_entity_own(old_ent, get_cur_frame_type());
786 set_entity_link(old_ent, new_ent);
789 /* visited is > than that of called graph. With this trick visited will
790 remain unchanged so that an outer walker, e.g., searching the call nodes
791 to inline, calling this inline will not visit the inlined nodes. */
792 set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
794 /* -- Performing dead node elimination inlines the graph -- */
795 /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
797 /* @@@ endless loops are not copied!! -- they should be, I think... */
798 irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds,
799 get_irg_frame_type(called_graph));
801 /* Repair called_graph */
802 set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
803 set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
804 set_Block_block_visited(get_irg_start_block(called_graph), 0);
806 /* -- Merge the end of the inlined procedure with the call site -- */
807 /* We will turn the old Call node into a Tuple with the following
810 0: Phi of all Memories of Return statements.
811 1: Jmp from new Block that merges the control flow from all exception
812 predecessors of the old end block.
813 2: Tuple of all arguments.
814 3: Phi of Exception memories.
815 In case the old Call directly branches to End on an exception we don't
816 need the block merging all exceptions nor the Phi of the exception
820 /* -- Precompute some values -- */
821 end_bl = get_new_node(get_irg_end_block(called_graph));
822 end = get_new_node(get_irg_end(called_graph));
823 arity = get_irn_arity(end_bl); /* arity = n_exc + n_ret */
824 n_res = get_method_n_ress(get_Call_type(call));
826 res_pred = (ir_node **) malloc (n_res * sizeof (ir_node *));
827 cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
829 set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */
831 /* -- archive keepalives -- */
832 irn_arity = get_irn_arity(end);
833 for (i = 0; i < irn_arity; i++)
834 add_End_keepalive(get_irg_end(current_ir_graph), get_irn_n(end, i));
836 /* The new end node will die. We need not free as the in array is on the obstack:
837 copy_node only generated 'D' arrays. */
839 /* -- Replace Return nodes by Jump nodes. -- */
841 for (i = 0; i < arity; i++) {
843 ret = get_irn_n(end_bl, i);
844 if (get_irn_op(ret) == op_Return) {
845 cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_Block(ret));
849 set_irn_in(post_bl, n_ret, cf_pred);
851 /* -- Build a Tuple for all results of the method.
852 Add Phi node if there was more than one Return. -- */
853 turn_into_tuple(post_call, 4);
854 /* First the Memory-Phi */
856 for (i = 0; i < arity; i++) {
857 ret = get_irn_n(end_bl, i);
858 if (get_irn_op(ret) == op_Return) {
859 cf_pred[n_ret] = get_Return_mem(ret);
863 phi = new_Phi(n_ret, cf_pred, mode_M);
864 set_Tuple_pred(call, pn_Call_M_regular, phi);
865 /* Conserve Phi-list for further inlinings -- but might be optimized */
866 if (get_nodes_Block(phi) == post_bl) {
867 set_irn_link(phi, get_irn_link(post_bl));
868 set_irn_link(post_bl, phi);
870 /* Now the real results */
872 for (j = 0; j < n_res; j++) {
874 for (i = 0; i < arity; i++) {
875 ret = get_irn_n(end_bl, i);
876 if (get_irn_op(ret) == op_Return) {
877 cf_pred[n_ret] = get_Return_res(ret, j);
881 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
883 /* Conserve Phi-list for further inlinings -- but might be optimized */
884 if (get_nodes_Block(phi) == post_bl) {
885 set_irn_link(phi, get_irn_link(post_bl));
886 set_irn_link(post_bl, phi);
889 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
891 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
893 /* Finally the exception control flow.
894 We have two (three) possible situations:
895 First if the Call branches to an exception handler: We need to add a Phi node to
896 collect the memory containing the exception objects. Further we need
897 to add another block to get a correct representation of this Phi. To
898 this block we add a Jmp that resolves into the X output of the Call
899 when the Call is turned into a tuple.
900 Second the Call branches to End, the exception is not handled. Just
901 add all inlined exception branches to the End node.
902 Third: there is no Exception edge at all. Handle as case two. */
903 if (exc_handling == 0) {
905 for (i = 0; i < arity; i++) {
907 ret = get_irn_n(end_bl, i);
908 if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
909 cf_pred[n_exc] = ret;
914 new_Block(n_exc, cf_pred); /* watch it: current_block is changed! */
915 set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
916 /* The Phi for the memories with the exception objects */
918 for (i = 0; i < arity; i++) {
920 ret = skip_Proj(get_irn_n(end_bl, i));
921 if (get_irn_op(ret) == op_Call) {
922 cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3);
924 } else if (is_fragile_op(ret)) {
925 /* We rely that all cfops have the memory output at the same position. */
926 cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0);
928 } else if (get_irn_op(ret) == op_Raise) {
929 cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1);
933 set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
935 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
936 set_Tuple_pred(call, pn_Call_M_except, new_Bad());
939 ir_node *main_end_bl;
940 int main_end_bl_arity;
943 /* assert(exc_handling == 1 || no exceptions. ) */
945 for (i = 0; i < arity; i++) {
946 ir_node *ret = get_irn_n(end_bl, i);
948 if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
949 cf_pred[n_exc] = ret;
953 main_end_bl = get_irg_end_block(current_ir_graph);
954 main_end_bl_arity = get_irn_arity(main_end_bl);
955 end_preds = (ir_node **) malloc ((n_exc + main_end_bl_arity) * sizeof (ir_node *));
957 for (i = 0; i < main_end_bl_arity; ++i)
958 end_preds[i] = get_irn_n(main_end_bl, i);
959 for (i = 0; i < n_exc; ++i)
960 end_preds[main_end_bl_arity + i] = cf_pred[i];
961 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
962 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
963 set_Tuple_pred(call, pn_Call_M_except, new_Bad());
969 #if 0 /* old. now better, correcter, faster implementation. */
971 /* -- If the exception control flow from the inlined Call directly
972 branched to the end block we now have the following control
973 flow predecessor pattern: ProjX -> Tuple -> Jmp. We must
974 remove the Jmp along with it's empty block and add Jmp's
975 predecessors as predecessors of this end block. No problem if
976 there is no exception, because then branches Bad to End which
978 @@@ can't we know this beforehand: by getting the Proj(1) from
979 the Call link list and checking whether it goes to Proj. */
980 /* find the problematic predecessor of the end block. */
981 end_bl = get_irg_end_block(current_ir_graph);
982 for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) {
983 cf_op = get_Block_cfgpred(end_bl, i);
984 if (get_irn_op(cf_op) == op_Proj) {
985 cf_op = get_Proj_pred(cf_op);
986 if ((get_irn_op(cf_op) == op_Tuple) && (cf_op == call)) {
987 /* There are unoptimized tuples from inlineing before when no exc */
988 assert(get_Proj_proj(get_Block_cfgpred(end_bl, i)) == pn_Call_X_except);
989 cf_op = get_Tuple_pred(cf_op, pn_Call_X_except);
990 assert(get_irn_op(cf_op) == op_Jmp);
996 if (i < get_Block_n_cfgpreds(end_bl)) {
997 bl = get_nodes_Block(cf_op);
998 arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1;
999 cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
1000 for (j = 0; j < i; j++)
1001 cf_pred[j] = get_Block_cfgpred(end_bl, j);
1002 for (j = j; j < i + get_Block_n_cfgpreds(bl); j++)
1003 cf_pred[j] = get_Block_cfgpred(bl, j-i);
1004 for (j = j; j < arity; j++)
1005 cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1);
1006 set_irn_in(end_bl, arity, cf_pred);
1008 /* Remove the exception pred from post-call Tuple. */
1009 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1014 /* -- Turn cse back on. -- */
1015 set_optimize(rem_opt);
1020 /********************************************************************/
1021 /* Apply inlineing to small methods. */
1022 /********************************************************************/
1024 /* It makes no sense to inline too many calls in one procedure. Anyways,
1025 I didn't get a version with NEW_ARR_F to run. */
1026 #define MAX_INLINE 1024
1029 * environment for inlining small irgs
1031 typedef struct _inline_env_t {
1033 ir_node *calls[MAX_INLINE];
1037 * Returns the irg called from a Call node. If the irg is not
1038 * known, NULL is returned.
1040 static ir_graph *get_call_called_irg(ir_node *call) {
1043 ir_graph *called_irg = NULL;
1045 assert(get_irn_op(call) == op_Call);
1047 addr = get_Call_ptr(call);
1048 if (get_irn_op(addr) == op_Const) {
1049 /* Check whether the constant is the pointer to a compiled entity. */
1050 tv = get_Const_tarval(addr);
1051 if (tarval_to_entity(tv))
1052 called_irg = get_entity_irg(tarval_to_entity(tv));
1057 static void collect_calls(ir_node *call, void *env) {
1058 inline_env_t *ienv = env;
1061 ir_graph *called_irg;
1063 if (get_irn_op(call) != op_Call) return;
1065 addr = get_Call_ptr(call);
1066 if (get_irn_op(addr) == op_Const) {
1067 /* Check whether the constant is the pointer to a compiled entity. */
1068 tv = get_Const_tarval(addr);
1069 if (tarval_to_entity(tv)) {
1070 called_irg = get_entity_irg(tarval_to_entity(tv));
1071 if (called_irg && ienv->pos < MAX_INLINE) {
1072 /* The Call node calls a locally defined method. Remember to inline. */
1073 ienv->calls[ienv->pos++] = call;
1080 * Inlines all small methods at call sites where the called address comes
1081 * from a Const node that references the entity representing the called
1083 * The size argument is a rough measure for the code size of the method:
1084 * Methods where the obstack containing the firm graph is smaller than
1087 void inline_small_irgs(ir_graph *irg, int size) {
1089 ir_graph *rem = current_ir_graph;
1092 if (!(get_opt_optimize() && get_opt_inline())) return;
1094 current_ir_graph = irg;
1095 /* Handle graph state */
1096 assert(get_irg_phase_state(current_ir_graph) != phase_building);
1097 assert(get_irg_callee_info_state(current_ir_graph) == irg_callee_info_none);
1099 /* Find Call nodes to inline.
1100 (We can not inline during a walk of the graph, as inlineing the same
1101 method several times changes the visited flag of the walked graph:
1102 after the first inlineing visited of the callee equals visited of
1103 the caller. With the next inlineing both are increased.) */
1105 irg_walk(get_irg_end(irg), NULL, collect_calls, &env);
1107 if ((env.pos > 0) && (env.pos < MAX_INLINE)) {
1108 /* There are calls to inline */
1109 collect_phiprojs(irg);
1110 for (i = 0; i < env.pos; i++) {
1113 tv = get_Const_tarval(get_Call_ptr(env.calls[i]));
1114 callee = get_entity_irg(tarval_to_entity(tv));
1115 if (((_obstack_memory_used(callee->obst) - obstack_room(callee->obst)) < size) ||
1116 (get_irg_inline_property(callee) == irg_inline_forced)) {
1117 inline_method(env.calls[i], callee);
1122 current_ir_graph = rem;
1126 * Environment for inlining irgs.
1129 int n_nodes; /**< Nodes in graph except Id, Tuple, Proj, Start, End */
1130 int n_nodes_orig; /**< for statistics */
1131 eset *call_nodes; /**< All call nodes in this graph */
1133 int n_call_nodes_orig; /**< for statistics */
1134 int n_callers; /**< Number of known graphs that call this graphs. */
1135 int n_callers_orig; /**< for statistics */
1138 static inline_irg_env *new_inline_irg_env(void) {
1139 inline_irg_env *env = malloc(sizeof(inline_irg_env));
1140 env->n_nodes = -2; /* uncount Start, End */
1141 env->n_nodes_orig = -2; /* uncount Start, End */
1142 env->call_nodes = eset_create();
1143 env->n_call_nodes = 0;
1144 env->n_call_nodes_orig = 0;
1146 env->n_callers_orig = 0;
1150 static void free_inline_irg_env(inline_irg_env *env) {
1151 eset_destroy(env->call_nodes);
1155 static void collect_calls2(ir_node *call, void *env) {
1156 inline_irg_env *x = (inline_irg_env *)env;
1157 ir_op *op = get_irn_op(call);
1160 /* count nodes in irg */
1161 if (op != op_Proj && op != op_Tuple && op != op_Sync) {
1166 if (op != op_Call) return;
1168 /* collect all call nodes */
1169 eset_insert(x->call_nodes, (void *)call);
1171 x->n_call_nodes_orig++;
1173 /* count all static callers */
1174 callee = get_call_called_irg(call);
1176 ((inline_irg_env *)get_irg_link(callee))->n_callers++;
1177 ((inline_irg_env *)get_irg_link(callee))->n_callers_orig++;
1181 INLINE static int is_leave(ir_graph *irg) {
1182 return (((inline_irg_env *)get_irg_link(irg))->n_call_nodes == 0);
1185 INLINE static int is_smaller(ir_graph *callee, int size) {
1186 return (((inline_irg_env *)get_irg_link(callee))->n_nodes < size);
1191 * Inlines small leave methods at call sites where the called address comes
1192 * from a Const node that references the entity representing the called
1194 * The size argument is a rough measure for the code size of the method:
1195 * Methods where the obstack containing the firm graph is smaller than
1198 void inline_leave_functions(int maxsize, int leavesize, int size) {
1199 inline_irg_env *env;
1200 int i, n_irgs = get_irp_n_irgs();
1201 ir_graph *rem = current_ir_graph;
1204 if (!(get_opt_optimize() && get_opt_inline())) return;
1206 /* extend all irgs by a temporary data structure for inlineing. */
1207 for (i = 0; i < n_irgs; ++i)
1208 set_irg_link(get_irp_irg(i), new_inline_irg_env());
1210 /* Precompute information in temporary data structure. */
1211 for (i = 0; i < n_irgs; ++i) {
1212 current_ir_graph = get_irp_irg(i);
1213 assert(get_irg_phase_state(current_ir_graph) != phase_building);
1214 assert(get_irg_callee_info_state(current_ir_graph) == irg_callee_info_none);
1216 irg_walk(get_irg_end(current_ir_graph), NULL, collect_calls2,
1217 get_irg_link(current_ir_graph));
1221 Inline leaves recursively -- we might construct new leaves. */
1222 /* int itercnt = 1; */
1223 while (did_inline) {
1224 /* printf("iteration %d\n", itercnt++); */
1226 for (i = 0; i < n_irgs; ++i) {
1229 int phiproj_computed = 0;
1231 current_ir_graph = get_irp_irg(i);
1232 env = (inline_irg_env *)get_irg_link(current_ir_graph);
1234 /* we can not walk and change a set, nor remove from it.
1236 walkset = env->call_nodes;
1237 env->call_nodes = eset_create();
1238 for (call = eset_first(walkset); call; call = eset_next(walkset)) {
1239 inline_irg_env *callee_env;
1240 ir_graph *callee = get_call_called_irg(call);
1242 if (env->n_nodes > maxsize) break;
1244 ((is_leave(callee) && is_smaller(callee, leavesize)) ||
1245 (get_irg_inline_property(callee) == irg_inline_forced))) {
1246 if (!phiproj_computed) {
1247 phiproj_computed = 1;
1248 collect_phiprojs(current_ir_graph);
1250 callee_env = (inline_irg_env *)get_irg_link(callee);
1251 /* printf(" %s: Inlineing %s.\n", get_entity_name(get_irg_entity(current_ir_graph)), */
1252 /* get_entity_name(get_irg_entity(callee))); */
1253 if (inline_method(call, callee)) {
1255 env->n_call_nodes--;
1256 eset_insert_all(env->call_nodes, callee_env->call_nodes);
1257 env->n_call_nodes += callee_env->n_call_nodes;
1258 env->n_nodes += callee_env->n_nodes;
1259 callee_env->n_callers--;
1262 eset_insert(env->call_nodes, call);
1265 eset_destroy(walkset);
1269 /* printf("Non leaves\n"); */
1270 /* inline other small functions. */
1271 for (i = 0; i < n_irgs; ++i) {
1274 int phiproj_computed = 0;
1276 current_ir_graph = get_irp_irg(i);
1277 env = (inline_irg_env *)get_irg_link(current_ir_graph);
1279 /* we can not walk and change a set, nor remove from it.
1281 walkset = env->call_nodes;
1282 env->call_nodes = eset_create();
1283 for (call = eset_first(walkset); call; call = eset_next(walkset)) {
1284 inline_irg_env *callee_env;
1285 ir_graph *callee = get_call_called_irg(call);
1287 if (env->n_nodes > maxsize) break;
1288 if (callee && is_smaller(callee, size)) {
1289 if (!phiproj_computed) {
1290 phiproj_computed = 1;
1291 collect_phiprojs(current_ir_graph);
1293 callee_env = (inline_irg_env *)get_irg_link(callee);
1294 /* printf(" %s: Inlineing %s.\n", get_entity_name(get_irg_entity(current_ir_graph)), */
1295 /* get_entity_name(get_irg_entity(callee))); */
1296 if (inline_method(call, callee)) {
1298 env->n_call_nodes--;
1299 eset_insert_all(env->call_nodes, callee_env->call_nodes);
1300 env->n_call_nodes += callee_env->n_call_nodes;
1301 env->n_nodes += callee_env->n_nodes;
1302 callee_env->n_callers--;
1305 eset_insert(env->call_nodes, call);
1308 eset_destroy(walkset);
1311 for (i = 0; i < n_irgs; ++i) {
1312 current_ir_graph = get_irp_irg(i);
1314 env = (inline_irg_env *)get_irg_link(current_ir_graph);
1315 if ((env->n_call_nodes_orig != env->n_call_nodes) ||
1316 (env->n_callers_orig != env->n_callers))
1317 printf("Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1318 env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1319 env->n_callers_orig, env->n_callers,
1320 get_entity_name(get_irg_entity(current_ir_graph)));
1322 free_inline_irg_env((inline_irg_env *)get_irg_link(current_ir_graph));
1325 current_ir_graph = rem;
1328 /*******************************************************************/
1329 /* Code Placement. Pins all floating nodes to a block where they */
1330 /* will be executed only if needed. */
1331 /*******************************************************************/
1334 * Find the earliest correct block for N. --- Place N into the
1335 * same Block as its dominance-deepest Input.
1338 place_floats_early(ir_node *n, pdeq *worklist)
1340 int i, start, irn_arity;
1342 /* we must not run into an infinite loop */
1343 assert (irn_not_visited(n));
1344 mark_irn_visited(n);
1346 /* Place floating nodes. */
1347 if (get_op_pinned(get_irn_op(n)) == floats) {
1349 ir_node *b = new_Bad(); /* The block to place this node in */
1350 int bad_recursion = is_Bad(get_nodes_block(n));
1352 assert(get_irn_op(n) != op_Block);
1354 if ((get_irn_op(n) == op_Const) ||
1355 (get_irn_op(n) == op_SymConst) ||
1357 (get_irn_op(n) == op_Unknown)) {
1358 /* These nodes will not be placed by the loop below. */
1359 b = get_irg_start_block(current_ir_graph);
1363 /* find the block for this node. */
1364 irn_arity = get_irn_arity(n);
1365 for (i = 0; i < irn_arity; i++) {
1366 ir_node *dep = get_irn_n(n, i);
1369 if ((irn_not_visited(dep))
1370 && (get_op_pinned(get_irn_op(dep)) == floats)) {
1371 place_floats_early(dep, worklist);
1375 * A node in the Bad block must stay in the bad block,
1376 * so don't compute a new block for it.
1381 /* Because all loops contain at least one pinned node, now all
1382 our inputs are either pinned or place_early has already
1383 been finished on them. We do not have any unfinished inputs! */
1384 dep_block = get_nodes_Block(dep);
1385 if ((!is_Bad(dep_block)) &&
1386 (get_Block_dom_depth(dep_block) > depth)) {
1388 depth = get_Block_dom_depth(dep_block);
1390 /* Avoid that the node is placed in the Start block */
1391 if ((depth == 1) && (get_Block_dom_depth(get_nodes_Block(n)) > 1)) {
1392 b = get_Block_cfg_out(get_irg_start_block(current_ir_graph), 0);
1393 assert(b != get_irg_start_block(current_ir_graph));
1397 set_nodes_Block(n, b);
1400 /* Add predecessors of non floating nodes on worklist. */
1401 start = (get_irn_op(n) == op_Block) ? 0 : -1;
1402 irn_arity = get_irn_arity(n);
1403 for (i = start; i < irn_arity; i++) {
1404 ir_node *pred = get_irn_n(n, i);
1405 if (irn_not_visited(pred)) {
1406 pdeq_putr (worklist, pred);
1412 * Floating nodes form subgraphs that begin at nodes as Const, Load,
1413 * Start, Call and that end at pinned nodes as Store, Call. Place_early
1414 * places all floating nodes reachable from its argument through floating
1415 * nodes and adds all beginnings at pinned nodes to the worklist.
1417 static INLINE void place_early(pdeq* worklist) {
1419 inc_irg_visited(current_ir_graph);
1421 /* this inits the worklist */
1422 place_floats_early(get_irg_end(current_ir_graph), worklist);
1424 /* Work the content of the worklist. */
1425 while (!pdeq_empty (worklist)) {
1426 ir_node *n = pdeq_getl (worklist);
1427 if (irn_not_visited(n)) place_floats_early(n, worklist);
1430 set_irg_outs_inconsistent(current_ir_graph);
1431 current_ir_graph->pinned = pinned;
1435 /** deepest common dominance ancestor of DCA and CONSUMER of PRODUCER. */
1437 consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer)
1439 ir_node *block = NULL;
1441 /* Compute the latest block into which we can place a node so that it is
1443 if (get_irn_op(consumer) == op_Phi) {
1444 /* our consumer is a Phi-node, the effective use is in all those
1445 blocks through which the Phi-node reaches producer */
1447 ir_node *phi_block = get_nodes_Block(consumer);
1448 irn_arity = get_irn_arity(consumer);
1449 for (i = 0; i < irn_arity; i++) {
1450 if (get_irn_n(consumer, i) == producer) {
1451 block = get_nodes_Block(get_Block_cfgpred(phi_block, i));
1455 assert(is_no_Block(consumer));
1456 block = get_nodes_Block(consumer);
1459 /* Compute the deepest common ancestor of block and dca. */
1461 if (!dca) return block;
1462 while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
1463 block = get_Block_idom(block);
1464 while (get_Block_dom_depth(dca) > get_Block_dom_depth(block))
1465 dca = get_Block_idom(dca);
1466 while (block != dca)
1467 { block = get_Block_idom(block); dca = get_Block_idom(dca); }
1472 static INLINE int get_irn_loop_depth(ir_node *n) {
1473 return get_loop_depth(get_irn_loop(n));
1477 * Move n to a block with less loop depth than it's current block. The
1478 * new block must be dominated by early.
1481 move_out_of_loops (ir_node *n, ir_node *early)
1483 ir_node *best, *dca;
1487 /* Find the region deepest in the dominator tree dominating
1488 dca with the least loop nesting depth, but still dominated
1489 by our early placement. */
1490 dca = get_nodes_Block(n);
1492 while (dca != early) {
1493 dca = get_Block_idom(dca);
1494 if (!dca) break; /* should we put assert(dca)? */
1495 if (get_irn_loop_depth(dca) < get_irn_loop_depth(best)) {
1499 if (best != get_nodes_Block(n)) {
1501 printf("Moving out of loop: "); DDMN(n);
1502 printf(" Outermost block: "); DDMN(early);
1503 printf(" Best block: "); DDMN(best);
1504 printf(" Innermost block: "); DDMN(get_nodes_Block(n));
1506 set_nodes_Block(n, best);
1511 * Find the latest legal block for N and place N into the
1512 * `optimal' Block between the latest and earliest legal block.
1513 * The `optimal' block is the dominance-deepest block of those
1514 * with the least loop-nesting-depth. This places N out of as many
1515 * loops as possible and then makes it as control dependant as
1519 place_floats_late(ir_node *n, pdeq *worklist)
1524 assert (irn_not_visited(n)); /* no multiple placement */
1526 /* no need to place block nodes, control nodes are already placed. */
1527 if ((get_irn_op(n) != op_Block) &&
1529 (get_irn_mode(n) != mode_X)) {
1530 /* Remember the early placement of this block to move it
1531 out of loop no further than the early placement. */
1532 early = get_nodes_Block(n);
1533 /* Assure that our users are all placed, except the Phi-nodes.
1534 --- Each data flow cycle contains at least one Phi-node. We
1535 have to break the `user has to be placed before the
1536 producer' dependence cycle and the Phi-nodes are the
1537 place to do so, because we need to base our placement on the
1538 final region of our users, which is OK with Phi-nodes, as they
1539 are pinned, and they never have to be placed after a
1540 producer of one of their inputs in the same block anyway. */
1541 for (i = 0; i < get_irn_n_outs(n); i++) {
1542 ir_node *succ = get_irn_out(n, i);
1543 if (irn_not_visited(succ) && (get_irn_op(succ) != op_Phi))
1544 place_floats_late(succ, worklist);
1547 /* We have to determine the final block of this node... except for
1549 if ((get_op_pinned(get_irn_op(n)) == floats) &&
1550 (get_irn_op(n) != op_Const) &&
1551 (get_irn_op(n) != op_SymConst)) {
1552 ir_node *dca = NULL; /* deepest common ancestor in the
1553 dominator tree of all nodes'
1554 blocks depending on us; our final
1555 placement has to dominate DCA. */
1556 for (i = 0; i < get_irn_n_outs(n); i++) {
1557 dca = consumer_dom_dca (dca, get_irn_out(n, i), n);
1559 set_nodes_Block(n, dca);
1561 move_out_of_loops (n, early);
1565 mark_irn_visited(n);
1567 /* Add predecessors of all non-floating nodes on list. (Those of floating
1568 nodes are placeded already and therefore are marked.) */
1569 for (i = 0; i < get_irn_n_outs(n); i++) {
1570 if (irn_not_visited(get_irn_out(n, i))) {
1571 pdeq_putr (worklist, get_irn_out(n, i));
1576 static INLINE void place_late(pdeq* worklist) {
1578 inc_irg_visited(current_ir_graph);
1580 /* This fills the worklist initially. */
1581 place_floats_late(get_irg_start_block(current_ir_graph), worklist);
1582 /* And now empty the worklist again... */
1583 while (!pdeq_empty (worklist)) {
1584 ir_node *n = pdeq_getl (worklist);
1585 if (irn_not_visited(n)) place_floats_late(n, worklist);
1589 void place_code(ir_graph *irg) {
1591 ir_graph *rem = current_ir_graph;
1593 current_ir_graph = irg;
1595 if (!(get_opt_optimize() && get_opt_global_cse())) return;
1597 /* Handle graph state */
1598 assert(get_irg_phase_state(irg) != phase_building);
1599 if (get_irg_dom_state(irg) != dom_consistent)
1602 if (1 || get_irg_loopinfo_state(irg) != loopinfo_consistent) {
1603 free_loop_information(irg);
1604 construct_backedges(irg);
1607 /* Place all floating nodes as early as possible. This guarantees
1608 a legal code placement. */
1609 worklist = new_pdeq();
1610 place_early(worklist);
1612 /* place_early invalidates the outs, place_late needs them. */
1614 /* Now move the nodes down in the dominator tree. This reduces the
1615 unnecessary executions of the node. */
1616 place_late(worklist);
1618 set_irg_outs_inconsistent(current_ir_graph);
1619 set_irg_loopinfo_inconsistent(current_ir_graph);
1621 current_ir_graph = rem;
1626 /********************************************************************/
1627 /* Control flow optimization. */
1628 /* Removes Bad control flow predecessors and empty blocks. A block */
1629 /* is empty if it contains only a Jmp node. */
1630 /* Blocks can only be removed if they are not needed for the */
1631 /* semantics of Phi nodes. */
1632 /********************************************************************/
1635 * Removes Tuples from Block control flow predecessors.
1636 * Optimizes blocks with equivalent_node().
1637 * Replaces n by Bad if n is unreachable control flow.
1639 static void merge_blocks(ir_node *n, void *env) {
1641 set_irn_link(n, NULL);
1643 if (get_irn_op(n) == op_Block) {
1645 for (i = 0; i < get_Block_n_cfgpreds(n); i++)
1646 /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go through.
1647 A different order of optimizations might cause problems. */
1648 if (get_opt_normalize())
1649 set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i)));
1650 } else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) {
1651 /* We will soon visit a block. Optimize it before visiting! */
1652 ir_node *b = get_nodes_Block(n);
1653 ir_node *new_node = equivalent_node(b);
1654 while (irn_not_visited(b) && (!is_Bad(new_node)) && (new_node != b)) {
1655 /* We would have to run gigo if new is bad, so we
1656 promote it directly below. */
1657 assert(((b == new_node) ||
1658 get_opt_control_flow_straightening() ||
1659 get_opt_control_flow_weak_simplification()) &&
1660 ("strange flag setting"));
1661 exchange (b, new_node);
1663 new_node = equivalent_node(b);
1665 /* GL @@@ get_opt_normalize hinzugefuegt, 5.5.2003 */
1666 if (is_Bad(new_node) && get_opt_normalize()) exchange(n, new_Bad());
1671 * Collects all Phi nodes in link list of Block.
1672 * Marks all blocks "block_visited" if they contain a node other
1675 static void collect_nodes(ir_node *n, void *env) {
1676 if (is_no_Block(n)) {
1677 ir_node *b = get_nodes_Block(n);
1679 if ((get_irn_op(n) == op_Phi)) {
1680 /* Collect Phi nodes to compact ins along with block's ins. */
1681 set_irn_link(n, get_irn_link(b));
1683 } else if (get_irn_op(n) != op_Jmp) { /* Check for non empty block. */
1684 mark_Block_block_visited(b);
1689 /** Returns true if pred is predecessor of block. */
1690 static int is_pred_of(ir_node *pred, ir_node *b) {
1692 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
1693 ir_node *b_pred = get_nodes_Block(get_Block_cfgpred(b, i));
1694 if (b_pred == pred) return 1;
1699 static int test_whether_dispensable(ir_node *b, int pos) {
1700 int i, j, n_preds = 1;
1701 int dispensable = 1;
1702 ir_node *cfop = get_Block_cfgpred(b, pos);
1703 ir_node *pred = get_nodes_Block(cfop);
1705 if (get_Block_block_visited(pred) + 1
1706 < get_irg_block_visited(current_ir_graph)) {
1707 if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
1708 /* Mark block so that is will not be removed. */
1709 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
1712 /* Seems to be empty. */
1713 if (!get_irn_link(b)) {
1714 /* There are no Phi nodes ==> dispensable. */
1715 n_preds = get_Block_n_cfgpreds(pred);
1717 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
1718 Work preds < pos as if they were already removed. */
1719 for (i = 0; i < pos; i++) {
1720 ir_node *b_pred = get_nodes_Block(get_Block_cfgpred(b, i));
1721 if (get_Block_block_visited(b_pred) + 1
1722 < get_irg_block_visited(current_ir_graph)) {
1723 for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
1724 ir_node *b_pred_pred = get_nodes_Block(get_Block_cfgpred(b_pred, j));
1725 if (is_pred_of(b_pred_pred, pred)) dispensable = 0;
1728 if (is_pred_of(b_pred, pred)) dispensable = 0;
1731 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
1732 ir_node *b_pred = get_nodes_Block(get_Block_cfgpred(b, i));
1733 if (is_pred_of(b_pred, pred)) dispensable = 0;
1736 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
1739 n_preds = get_Block_n_cfgpreds(pred);
1747 static void optimize_blocks(ir_node *b, void *env) {
1748 int i, j, k, max_preds, n_preds;
1749 ir_node *pred, *phi;
1752 /* Count the number of predecessor if this block is merged with pred blocks
1755 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
1756 max_preds += test_whether_dispensable(b, i);
1758 in = (ir_node **) malloc(max_preds * sizeof(ir_node *));
1761 printf(" working on "); DDMN(b);
1762 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
1763 pred = get_nodes_Block(get_Block_cfgpred(b, i));
1764 if (is_Bad(get_Block_cfgpred(b, i))) {
1765 printf(" removing Bad %i\n ", i);
1766 } else if (get_Block_block_visited(pred) +1
1767 < get_irg_block_visited(current_ir_graph)) {
1768 printf(" removing pred %i ", i); DDMN(pred);
1769 } else { printf(" Nothing to do for "); DDMN(pred); }
1771 * end Debug output -*/
1773 /*- Fix the Phi nodes -*/
1774 phi = get_irn_link(b);
1776 assert(get_irn_op(phi) == op_Phi);
1777 /* Find the new predecessors for the Phi */
1779 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
1780 pred = get_nodes_Block(get_Block_cfgpred(b, i));
1781 if (is_Bad(get_Block_cfgpred(b, i))) {
1783 } else if (get_Block_block_visited(pred) +1
1784 < get_irg_block_visited(current_ir_graph)) {
1785 /* It's an empty block and not yet visited. */
1786 ir_node *phi_pred = get_Phi_pred(phi, i);
1787 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
1788 if (get_nodes_Block(phi_pred) == pred) {
1789 assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */
1790 in[n_preds] = get_Phi_pred(phi_pred, j);
1792 in[n_preds] = phi_pred;
1796 /* The Phi_pred node is replaced now if it is a Phi.
1797 In Schleifen kann offenbar der entfernte Phi Knoten legal verwendet werden.
1798 Daher muss der Phiknoten durch den neuen ersetzt werden.
1799 Weiter muss der alte Phiknoten entfernt werden (durch ersetzen oder
1800 durch einen Bad) damit er aus den keep_alive verschwinden kann.
1801 Man sollte also, falls keine Schleife vorliegt, exchange mit new_Bad
1803 if (get_nodes_Block(phi_pred) == pred) {
1804 /* remove the Phi as it might be kept alive. Further there
1805 might be other users. */
1806 exchange(phi_pred, phi); /* geht, ist aber doch semantisch falsch! Warum?? */
1809 in[n_preds] = get_Phi_pred(phi, i);
1814 set_irn_in(phi, n_preds, in);
1816 phi = get_irn_link(phi);
1820 This happens only if merge between loop backedge and single loop entry. -*/
1821 for (k = 0; k < get_Block_n_cfgpreds(b); k++) {
1822 pred = get_nodes_Block(get_Block_cfgpred(b, k));
1823 if (get_Block_block_visited(pred)+1 < get_irg_block_visited(current_ir_graph)) {
1824 phi = get_irn_link(pred);
1826 if (get_irn_op(phi) == op_Phi) {
1827 set_nodes_Block(phi, b);
1830 for (i = 0; i < k; i++) {
1831 pred = get_nodes_Block(get_Block_cfgpred(b, i));
1832 if (is_Bad(get_Block_cfgpred(b, i))) {
1834 } else if (get_Block_block_visited(pred) +1
1835 < get_irg_block_visited(current_ir_graph)) {
1836 /* It's an empty block and not yet visited. */
1837 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
1838 /* @@@ Hier brauche ich Schleifeninformation!!! Kontrollflusskante
1839 muss Rueckwaertskante sein! (An allen vier in[n_preds] = phi
1840 Anweisungen.) Trotzdem tuts bisher!! */
1849 for (i = 0; i < get_Phi_n_preds(phi); i++) {
1850 in[n_preds] = get_Phi_pred(phi, i);
1853 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
1854 pred = get_nodes_Block(get_Block_cfgpred(b, i));
1855 if (is_Bad(get_Block_cfgpred(b, i))) {
1857 } else if (get_Block_block_visited(pred) +1
1858 < get_irg_block_visited(current_ir_graph)) {
1859 /* It's an empty block and not yet visited. */
1860 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
1869 set_irn_in(phi, n_preds, in);
1871 phi = get_irn_link(phi);
1876 /*- Fix the block -*/
1878 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
1879 pred = get_nodes_Block(get_Block_cfgpred(b, i));
1880 if (is_Bad(get_Block_cfgpred(b, i))) {
1882 } else if (get_Block_block_visited(pred) +1
1883 < get_irg_block_visited(current_ir_graph)) {
1884 /* It's an empty block and not yet visited. */
1885 assert(get_Block_n_cfgpreds(b) > 1);
1886 /* Else it should be optimized by equivalent_node. */
1887 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
1888 in[n_preds] = get_Block_cfgpred(pred, j);
1891 /* Remove block as it might be kept alive. */
1892 exchange(pred, b/*new_Bad()*/);
1894 in[n_preds] = get_Block_cfgpred(b, i);
1898 set_irn_in(b, n_preds, in);
1902 void optimize_cf(ir_graph *irg) {
1905 ir_node *end = get_irg_end(irg);
1906 ir_graph *rem = current_ir_graph;
1907 current_ir_graph = irg;
1909 /* Handle graph state */
1910 assert(get_irg_phase_state(irg) != phase_building);
1911 if (get_irg_outs_state(current_ir_graph) == outs_consistent)
1912 set_irg_outs_inconsistent(current_ir_graph);
1913 if (get_irg_dom_state(current_ir_graph) == dom_consistent)
1914 set_irg_dom_inconsistent(current_ir_graph);
1916 /* Use block visited flag to mark non-empty blocks. */
1917 inc_irg_block_visited(irg);
1918 irg_walk(end, merge_blocks, collect_nodes, NULL);
1920 /* Optimize the standard code. */
1921 irg_block_walk(get_irg_end_block(irg), optimize_blocks, NULL, NULL);
1923 /* Walk all keep alives, optimize them if block, add to new in-array
1924 for end if useful. */
1925 in = NEW_ARR_F (ir_node *, 1);
1926 in[0] = get_nodes_Block(end);
1927 inc_irg_visited(current_ir_graph);
1928 for(i = 0; i < get_End_n_keepalives(end); i++) {
1929 ir_node *ka = get_End_keepalive(end, i);
1930 if (irn_not_visited(ka)) {
1931 if ((get_irn_op(ka) == op_Block) && Block_not_block_visited(ka)) {
1932 set_irg_block_visited(current_ir_graph, /* Don't walk all the way to Start. */
1933 get_irg_block_visited(current_ir_graph)-1);
1934 irg_block_walk(ka, optimize_blocks, NULL, NULL);
1935 mark_irn_visited(ka);
1936 ARR_APP1 (ir_node *, in, ka);
1937 } else if (get_irn_op(ka) == op_Phi) {
1938 mark_irn_visited(ka);
1939 ARR_APP1 (ir_node *, in, ka);
1943 /* DEL_ARR_F(end->in); GL @@@ tut nicht ! */
1946 current_ir_graph = rem;
1951 * Called by walker of remove_critical_cf_edges().
1953 * Place an empty block to an edge between a blocks of multiple
1954 * predecessors and a block of multiple successors.
1957 * @param env Environment of walker. This field is unused and has
1960 static void walk_critical_cf_edges(ir_node *n, void *env) {
1962 ir_node *pre, *block, **in, *jmp;
1964 /* Block has multiple predecessors */
1965 if ((op_Block == get_irn_op(n)) &&
1966 (get_irn_arity(n) > 1)) {
1967 arity = get_irn_arity(n);
1969 if (n == get_irg_end_block(current_ir_graph))
1970 return; /* No use to add a block here. */
1972 for (i=0; i<arity; i++) {
1973 pre = get_irn_n(n, i);
1974 /* Predecessor has multiple successors. Insert new flow edge */
1975 if ((NULL != pre) &&
1976 (op_Proj == get_irn_op(pre)) &&
1977 op_Raise != get_irn_op(skip_Proj(pre))) {
1979 /* set predecessor array for new block */
1980 in = NEW_ARR_D (ir_node *, current_ir_graph->obst, 1);
1981 /* set predecessor of new block */
1983 block = new_Block(1, in);
1984 /* insert new jmp node to new block */
1985 switch_block(block);
1988 /* set successor of new block */
1989 set_irn_n(n, i, jmp);
1991 } /* predecessor has multiple successors */
1992 } /* for all predecessors */
1993 } /* n is a block */
1996 void remove_critical_cf_edges(ir_graph *irg) {
1997 if (get_opt_critical_edges())
1998 irg_walk_graph(irg, NULL, walk_critical_cf_edges, NULL);