2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * File name: ir/ir/irgopt.c
23 * Purpose: Optimizations for a whole ir graph, i.e., a procedure.
24 * Author: Christian Schaefer, Goetz Lindenmaier
25 * Modified by: Sebastian Felis, Michael Beck
28 * Copyright: (c) 1998-2007 Universität Karlsruhe
37 #include "irgraph_t.h"
50 #include "pdeq.h" /* Fuer code placement */
55 #include "irbackedge_t.h"
62 #include "iredges_t.h"
65 /*------------------------------------------------------------------*/
66 /* apply optimizations of iropt to all nodes. */
67 /*------------------------------------------------------------------*/
70 * A wrapper around optimize_inplace_2() to be called from a walker.
72 static void optimize_in_place_wrapper (ir_node *n, void *env) {
73 ir_node *optimized = optimize_in_place_2(n);
74 if (optimized != n) exchange (n, optimized);
78 * Do local optimizations for a node.
80 * @param n the IR-node where to start. Typically the End node
83 * @note current_ir_graph must be set
85 static INLINE void do_local_optimize(ir_node *n) {
86 /* Handle graph state */
87 assert(get_irg_phase_state(current_ir_graph) != phase_building);
89 if (get_opt_global_cse())
90 set_irg_pinned(current_ir_graph, op_pin_state_floats);
91 set_irg_outs_inconsistent(current_ir_graph);
92 set_irg_doms_inconsistent(current_ir_graph);
93 set_irg_loopinfo_inconsistent(current_ir_graph);
95 /* Clean the value_table in irg for the CSE. */
96 del_identities(current_ir_graph->value_table);
97 current_ir_graph->value_table = new_identities();
99 /* walk over the graph */
100 irg_walk(n, firm_clear_link, optimize_in_place_wrapper, NULL);
103 /* Applies local optimizations (see iropt.h) to all nodes reachable from node n */
104 void local_optimize_node(ir_node *n) {
105 ir_graph *rem = current_ir_graph;
106 current_ir_graph = get_irn_irg(n);
108 do_local_optimize(n);
110 current_ir_graph = rem;
114 * Block-Walker: uses dominance depth to mark dead blocks.
116 static void kill_dead_blocks(ir_node *block, void *env) {
117 if (get_Block_dom_depth(block) < 0) {
119 * Note that the new dominance code correctly handles
120 * the End block, i.e. it is always reachable from Start
122 set_Block_dead(block);
126 /* Applies local optimizations (see iropt.h) to all nodes reachable from node n. */
127 void local_optimize_graph(ir_graph *irg) {
128 ir_graph *rem = current_ir_graph;
129 current_ir_graph = irg;
131 if (get_irg_dom_state(irg) == dom_consistent)
132 irg_block_walk_graph(irg, NULL, kill_dead_blocks, NULL);
134 do_local_optimize(get_irg_end(irg));
136 current_ir_graph = rem;
140 * Enqueue all users of a node to a wait queue.
141 * Handles mode_T nodes.
143 static void enqueue_users(ir_node *n, pdeq *waitq) {
144 const ir_edge_t *edge;
146 foreach_out_edge(n, edge) {
147 ir_node *succ = get_edge_src_irn(edge);
149 if (get_irn_link(succ) != waitq) {
150 pdeq_putr(waitq, succ);
151 set_irn_link(succ, waitq);
153 if (get_irn_mode(succ) == mode_T) {
154 /* A mode_T node has Proj's. Because most optimizations
155 run on the Proj's we have to enqueue them also. */
156 enqueue_users(succ, waitq);
162 * Data flow optimization walker.
163 * Optimizes all nodes and enqueue it's users
166 static void opt_walker(ir_node *n, void *env) {
170 optimized = optimize_in_place_2(n);
171 set_irn_link(optimized, NULL);
173 if (optimized != n) {
174 enqueue_users(n, waitq);
175 exchange(n, optimized);
179 /* Applies local optimizations to all nodes in the graph until fixpoint. */
180 void optimize_graph_df(ir_graph *irg) {
181 pdeq *waitq = new_pdeq();
182 int state = edges_activated(irg);
183 ir_graph *rem = current_ir_graph;
185 current_ir_graph = irg;
190 if (get_opt_global_cse())
191 set_irg_pinned(current_ir_graph, op_pin_state_floats);
193 /* Clean the value_table in irg for the CSE. */
194 del_identities(irg->value_table);
195 irg->value_table = new_identities();
197 if (get_irg_dom_state(irg) == dom_consistent)
198 irg_block_walk_graph(irg, NULL, kill_dead_blocks, NULL);
200 /* invalidate info */
201 set_irg_outs_inconsistent(irg);
202 set_irg_doms_inconsistent(irg);
203 set_irg_loopinfo_inconsistent(irg);
205 set_using_irn_link(irg);
207 /* walk over the graph */
208 irg_walk_graph(irg, NULL, opt_walker, waitq);
210 /* finish the wait queue */
211 while (! pdeq_empty(waitq)) {
212 ir_node *n = pdeq_getl(waitq);
214 opt_walker(n, waitq);
219 clear_using_irn_link(irg);
222 edges_deactivate(irg);
224 current_ir_graph = rem;
228 /*------------------------------------------------------------------*/
229 /* Routines for dead node elimination / copying garbage collection */
230 /* of the obstack. */
231 /*------------------------------------------------------------------*/
234 * Remember the new node in the old node by using a field all nodes have.
236 #define set_new_node(oldn, newn) set_irn_link(oldn, newn)
239 * Get this new node, before the old node is forgotten.
241 #define get_new_node(oldn) get_irn_link(oldn)
244 * Check if a new node was set.
246 #define has_new_node(n) (get_new_node(n) != NULL)
249 * We use the block_visited flag to mark that we have computed the
250 * number of useful predecessors for this block.
251 * Further we encode the new arity in this flag in the old blocks.
252 * Remembering the arity is useful, as it saves a lot of pointer
253 * accesses. This function is called for all Phi and Block nodes
257 compute_new_arity(ir_node *b) {
258 int i, res, irn_arity;
261 irg_v = get_irg_block_visited(current_ir_graph);
262 block_v = get_Block_block_visited(b);
263 if (block_v >= irg_v) {
264 /* we computed the number of preds for this block and saved it in the
266 return block_v - irg_v;
268 /* compute the number of good predecessors */
269 res = irn_arity = get_irn_arity(b);
270 for (i = 0; i < irn_arity; i++)
271 if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--;
272 /* save it in the flag. */
273 set_Block_block_visited(b, irg_v + res);
279 * Copies the node to the new obstack. The Ins of the new node point to
280 * the predecessors on the old obstack. For block/phi nodes not all
281 * predecessors might be copied. n->link points to the new node.
282 * For Phi and Block nodes the function allocates in-arrays with an arity
283 * only for useful predecessors. The arity is determined by counting
284 * the non-bad predecessors of the block.
286 * @param n The node to be copied
287 * @param env if non-NULL, the node number attribute will be copied to the new node
289 * Note: Also used for loop unrolling.
291 static void copy_node(ir_node *n, void *env) {
294 ir_op *op = get_irn_op(n);
296 /* The end node looses it's flexible in array. This doesn't matter,
297 as dead node elimination builds End by hand, inlineing doesn't use
299 /* assert(op == op_End || ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC)); */
302 /* node copied already */
304 } else if (op == op_Block) {
306 new_arity = compute_new_arity(n);
307 n->attr.block.graph_arr = NULL;
309 block = get_nodes_block(n);
311 new_arity = compute_new_arity(block);
313 new_arity = get_irn_arity(n);
316 nn = new_ir_node(get_irn_dbg_info(n),
323 /* Copy the attributes. These might point to additional data. If this
324 was allocated on the old obstack the pointers now are dangling. This
325 frees e.g. the memory of the graph_arr allocated in new_immBlock. */
326 copy_node_attr(n, nn);
330 int copy_node_nr = env != NULL;
332 /* for easier debugging, we want to copy the node numbers too */
333 nn->node_nr = n->node_nr;
339 hook_dead_node_elim_subst(current_ir_graph, n, nn);
343 * Copies new predecessors of old node to new node remembered in link.
344 * Spare the Bad predecessors of Phi and Block nodes.
347 copy_preds(ir_node *n, void *env) {
351 nn = get_new_node(n);
354 /* Don't copy Bad nodes. */
356 irn_arity = get_irn_arity(n);
357 for (i = 0; i < irn_arity; i++) {
358 if (! is_Bad(get_irn_n(n, i))) {
359 set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
360 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
364 /* repair the block visited flag from above misuse. Repair it in both
365 graphs so that the old one can still be used. */
366 set_Block_block_visited(nn, 0);
367 set_Block_block_visited(n, 0);
368 /* Local optimization could not merge two subsequent blocks if
369 in array contained Bads. Now it's possible.
370 We don't call optimize_in_place as it requires
371 that the fields in ir_graph are set properly. */
372 if ((get_opt_control_flow_straightening()) &&
373 (get_Block_n_cfgpreds(nn) == 1) &&
374 (get_irn_op(get_Block_cfgpred(nn, 0)) == op_Jmp)) {
375 ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
377 /* Jmp jumps into the block it is in -- deal self cycle. */
378 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
379 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
384 } else if (get_irn_op(n) == op_Phi) {
385 /* Don't copy node if corresponding predecessor in block is Bad.
386 The Block itself should not be Bad. */
387 block = get_nodes_block(n);
388 set_irn_n(nn, -1, get_new_node(block));
390 irn_arity = get_irn_arity(n);
391 for (i = 0; i < irn_arity; i++) {
392 if (! is_Bad(get_irn_n(block, i))) {
393 set_irn_n(nn, j, get_new_node(get_irn_n(n, i)));
394 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
398 /* If the pre walker reached this Phi after the post walker visited the
399 block block_visited is > 0. */
400 set_Block_block_visited(get_nodes_block(n), 0);
401 /* Compacting the Phi's ins might generate Phis with only one
403 if (get_irn_arity(nn) == 1)
404 exchange(nn, get_irn_n(nn, 0));
406 irn_arity = get_irn_arity(n);
407 for (i = -1; i < irn_arity; i++)
408 set_irn_n (nn, i, get_new_node(get_irn_n(n, i)));
410 /* Now the new node is complete. We can add it to the hash table for CSE.
411 @@@ inlining aborts if we identify End. Why? */
412 if (get_irn_op(nn) != op_End)
413 add_identities(current_ir_graph->value_table, nn);
417 * Copies the graph recursively, compacts the keep-alives of the end node.
419 * @param irg the graph to be copied
420 * @param copy_node_nr If non-zero, the node number will be copied
422 static void copy_graph(ir_graph *irg, int copy_node_nr) {
423 ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
424 ir_node *ka; /* keep alive */
428 /* Some nodes must be copied by hand, sigh */
429 vfl = get_irg_visited(irg);
430 set_irg_visited(irg, vfl + 1);
432 oe = get_irg_end(irg);
433 mark_irn_visited(oe);
434 /* copy the end node by hand, allocate dynamic in array! */
435 ne = new_ir_node(get_irn_dbg_info(oe),
442 /* Copy the attributes. Well, there might be some in the future... */
443 copy_node_attr(oe, ne);
444 set_new_node(oe, ne);
446 /* copy the Bad node */
447 ob = get_irg_bad(irg);
448 mark_irn_visited(ob);
449 nb = new_ir_node(get_irn_dbg_info(ob),
456 copy_node_attr(ob, nb);
457 set_new_node(ob, nb);
459 /* copy the NoMem node */
460 om = get_irg_no_mem(irg);
461 mark_irn_visited(om);
462 nm = new_ir_node(get_irn_dbg_info(om),
469 copy_node_attr(om, nm);
470 set_new_node(om, nm);
472 /* copy the live nodes */
473 set_irg_visited(irg, vfl);
474 irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
476 /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
478 /* visit the anchors as well */
479 for (i = anchor_max - 1; i >= 0; --i) {
480 ir_node *n = irg->anchors[i];
482 if (n && (get_irn_visited(n) <= vfl)) {
483 set_irg_visited(irg, vfl);
484 irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
488 /* copy_preds for the end node ... */
489 set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
491 /*- ... and now the keep alives. -*/
492 /* First pick the not marked block nodes and walk them. We must pick these
493 first as else we will oversee blocks reachable from Phis. */
494 irn_arity = get_End_n_keepalives(oe);
495 for (i = 0; i < irn_arity; i++) {
496 ka = get_End_keepalive(oe, i);
498 if (get_irn_visited(ka) <= vfl) {
499 /* We must keep the block alive and copy everything reachable */
500 set_irg_visited(irg, vfl);
501 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
503 add_End_keepalive(ne, get_new_node(ka));
507 /* Now pick other nodes. Here we will keep all! */
508 irn_arity = get_End_n_keepalives(oe);
509 for (i = 0; i < irn_arity; i++) {
510 ka = get_End_keepalive(oe, i);
512 if (get_irn_visited(ka) <= vfl) {
513 /* We didn't copy the node yet. */
514 set_irg_visited(irg, vfl);
515 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
517 add_End_keepalive(ne, get_new_node(ka));
521 /* start block sometimes only reached after keep alives */
522 set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
523 set_nodes_block(nm, get_new_node(get_nodes_block(om)));
527 * Copies the graph reachable from current_ir_graph->end to the obstack
528 * in current_ir_graph and fixes the environment.
529 * Then fixes the fields in current_ir_graph containing nodes of the
532 * @param copy_node_nr If non-zero, the node number will be copied
535 copy_graph_env(int copy_node_nr) {
536 ir_graph *irg = current_ir_graph;
537 ir_node *old_end, *n;
540 /* remove end_except and end_reg nodes */
541 old_end = get_irg_end(irg);
542 set_irg_end_except (irg, old_end);
543 set_irg_end_reg (irg, old_end);
545 /* Not all nodes remembered in irg might be reachable
546 from the end node. Assure their link is set to NULL, so that
547 we can test whether new nodes have been computed. */
548 for (i = anchor_max - 1; i >= 0; --i) {
550 set_new_node(irg->anchors[i], NULL);
552 /* we use the block walk flag for removing Bads from Blocks ins. */
553 inc_irg_block_visited(irg);
556 copy_graph(irg, copy_node_nr);
558 /* fix the fields in irg */
559 old_end = get_irg_end(irg);
560 for (i = anchor_max - 1; i >= 0; --i) {
563 irg->anchors[i] = get_new_node(n);
569 * Copies all reachable nodes to a new obstack. Removes bad inputs
570 * from block nodes and the corresponding inputs from Phi nodes.
571 * Merges single exit blocks with single entry blocks and removes
573 * Adds all new nodes to a new hash table for CSE. Does not
574 * perform CSE, so the hash table might contain common subexpressions.
577 dead_node_elimination(ir_graph *irg) {
578 if (get_opt_optimize() && get_opt_dead_node_elimination()) {
580 int rem_ipview = get_interprocedural_view();
581 struct obstack *graveyard_obst = NULL;
582 struct obstack *rebirth_obst = NULL;
583 assert(! edges_activated(irg) && "dead node elimination requires disabled edges");
585 /* inform statistics that we started a dead-node elimination run */
586 hook_dead_node_elim(irg, 1);
588 /* Remember external state of current_ir_graph. */
589 rem = current_ir_graph;
590 current_ir_graph = irg;
591 set_interprocedural_view(0);
593 assert(get_irg_phase_state(irg) != phase_building);
595 /* Handle graph state */
596 free_callee_info(irg);
600 /* @@@ so far we loose loops when copying */
601 free_loop_information(irg);
603 set_irg_doms_inconsistent(irg);
605 /* A quiet place, where the old obstack can rest in peace,
606 until it will be cremated. */
607 graveyard_obst = irg->obst;
609 /* A new obstack, where the reachable nodes will be copied to. */
610 rebirth_obst = xmalloc(sizeof(*rebirth_obst));
611 irg->obst = rebirth_obst;
612 obstack_init(irg->obst);
613 irg->last_node_idx = 0;
615 /* We also need a new value table for CSE */
616 del_identities(irg->value_table);
617 irg->value_table = new_identities();
619 /* Copy the graph from the old to the new obstack */
620 copy_graph_env(/*copy_node_nr=*/1);
622 /* Free memory from old unoptimized obstack */
623 obstack_free(graveyard_obst, 0); /* First empty the obstack ... */
624 xfree (graveyard_obst); /* ... then free it. */
626 /* inform statistics that the run is over */
627 hook_dead_node_elim(irg, 0);
629 current_ir_graph = rem;
630 set_interprocedural_view(rem_ipview);
635 * Relink bad predecessors of a block and store the old in array to the
636 * link field. This function is called by relink_bad_predecessors().
637 * The array of link field starts with the block operand at position 0.
638 * If block has bad predecessors, create a new in array without bad preds.
639 * Otherwise let in array untouched.
641 static void relink_bad_block_predecessors(ir_node *n, void *env) {
642 ir_node **new_in, *irn;
643 int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
645 /* if link field of block is NULL, look for bad predecessors otherwise
646 this is already done */
647 if (get_irn_op(n) == op_Block &&
648 get_irn_link(n) == NULL) {
650 /* save old predecessors in link field (position 0 is the block operand)*/
651 set_irn_link(n, get_irn_in(n));
653 /* count predecessors without bad nodes */
654 old_irn_arity = get_irn_arity(n);
655 for (i = 0; i < old_irn_arity; i++)
656 if (!is_Bad(get_irn_n(n, i))) new_irn_arity++;
658 /* arity changing: set new predecessors without bad nodes */
659 if (new_irn_arity < old_irn_arity) {
660 /* Get new predecessor array. We do not resize the array, as we must
661 keep the old one to update Phis. */
662 new_in = NEW_ARR_D (ir_node *, current_ir_graph->obst, (new_irn_arity+1));
664 /* set new predecessors in array */
667 for (i = 0; i < old_irn_arity; i++) {
668 irn = get_irn_n(n, i);
670 new_in[new_irn_n] = irn;
671 is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
675 /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
676 ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
678 } /* ir node has bad predecessors */
679 } /* Block is not relinked */
683 * Relinks Bad predecessors from Blocks and Phis called by walker
684 * remove_bad_predecesors(). If n is a Block, call
685 * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
686 * function of Phi's Block. If this block has bad predecessors, relink preds
689 static void relink_bad_predecessors(ir_node *n, void *env) {
690 ir_node *block, **old_in;
691 int i, old_irn_arity, new_irn_arity;
693 /* relink bad predecessors of a block */
694 if (get_irn_op(n) == op_Block)
695 relink_bad_block_predecessors(n, env);
697 /* If Phi node relink its block and its predecessors */
698 if (get_irn_op(n) == op_Phi) {
700 /* Relink predecessors of phi's block */
701 block = get_nodes_block(n);
702 if (get_irn_link(block) == NULL)
703 relink_bad_block_predecessors(block, env);
705 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
706 old_irn_arity = ARR_LEN(old_in);
708 /* Relink Phi predecessors if count of predecessors changed */
709 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
710 /* set new predecessors in array
711 n->in[0] remains the same block */
713 for(i = 1; i < old_irn_arity; i++)
714 if (!is_Bad((ir_node *)old_in[i])) {
715 n->in[new_irn_arity] = n->in[i];
716 is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
720 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
721 ARR_SETLEN(int, n->attr.phi_backedge, new_irn_arity);
723 } /* n is a Phi node */
727 * Removes Bad Bad predecessors from Blocks and the corresponding
728 * inputs to Phi nodes as in dead_node_elimination but without
730 * On walking up set the link field to NULL, on walking down call
731 * relink_bad_predecessors() (This function stores the old in array
732 * to the link field and sets a new in array if arity of predecessors
735 void remove_bad_predecessors(ir_graph *irg) {
736 irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
743 __)|_| | \_/ | \_/(/_ |_/\__|__
745 The following stuff implements a facility that automatically patches
746 registered ir_node pointers to the new node when a dead node elimination occurs.
749 struct _survive_dce_t {
753 hook_entry_t dead_node_elim;
754 hook_entry_t dead_node_elim_subst;
757 typedef struct _survive_dce_list_t {
758 struct _survive_dce_list_t *next;
760 } survive_dce_list_t;
762 static void dead_node_hook(void *context, ir_graph *irg, int start) {
763 survive_dce_t *sd = context;
765 /* Create a new map before the dead node elimination is performed. */
767 sd->new_places = pmap_create_ex(pmap_count(sd->places));
769 /* Patch back all nodes if dead node elimination is over and something is to be done. */
770 pmap_destroy(sd->places);
771 sd->places = sd->new_places;
772 sd->new_places = NULL;
777 * Hook called when dead node elimination replaces old by nw.
779 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
780 survive_dce_t *sd = context;
781 survive_dce_list_t *list = pmap_get(sd->places, old);
783 /* If the node is to be patched back, write the new address to all registered locations. */
785 survive_dce_list_t *p;
787 for (p = list; p; p = p->next)
790 pmap_insert(sd->new_places, nw, list);
795 * Make a new Survive DCE environment.
797 survive_dce_t *new_survive_dce(void) {
798 survive_dce_t *res = xmalloc(sizeof(res[0]));
799 obstack_init(&res->obst);
800 res->places = pmap_create();
801 res->new_places = NULL;
803 res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
804 res->dead_node_elim.context = res;
805 res->dead_node_elim.next = NULL;
807 res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
808 res->dead_node_elim_subst.context = res;
809 res->dead_node_elim_subst.next = NULL;
811 #ifndef FIRM_ENABLE_HOOKS
812 assert(0 && "need hooks enabled");
815 register_hook(hook_dead_node_elim, &res->dead_node_elim);
816 register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
821 * Free a Survive DCE environment.
823 void free_survive_dce(survive_dce_t *sd) {
824 obstack_free(&sd->obst, NULL);
825 pmap_destroy(sd->places);
826 unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
827 unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
832 * Register a node pointer to be patched upon DCE.
833 * When DCE occurs, the node pointer specified by @p place will be
834 * patched to the new address of the node it is pointing to.
836 * @param sd The Survive DCE environment.
837 * @param place The address of the node pointer.
839 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
840 if (*place != NULL) {
841 ir_node *irn = *place;
842 survive_dce_list_t *curr = pmap_get(sd->places, irn);
843 survive_dce_list_t *nw = obstack_alloc(&sd->obst, sizeof(nw[0]));
848 pmap_insert(sd->places, irn, nw);
852 /*--------------------------------------------------------------------*/
853 /* Functionality for inlining */
854 /*--------------------------------------------------------------------*/
857 * Copy node for inlineing. Updates attributes that change when
858 * inlineing but not for dead node elimination.
860 * Copies the node by calling copy_node() and then updates the entity if
861 * it's a local one. env must be a pointer of the frame type of the
862 * inlined procedure. The new entities must be in the link field of
866 copy_node_inline(ir_node *n, void *env) {
868 ir_type *frame_tp = (ir_type *)env;
871 if (get_irn_op(n) == op_Sel) {
872 nn = get_new_node (n);
874 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
875 set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
877 } else if (get_irn_op(n) == op_Block) {
878 nn = get_new_node (n);
879 nn->attr.block.irg = current_ir_graph;
884 * Walker: checks if P_value_arg_base is used.
886 static void find_addr(ir_node *node, void *env) {
887 int *allow_inline = env;
888 if (is_Proj(node) && get_irn_op(get_Proj_pred(node)) == op_Start) {
889 if (get_Proj_proj(node) == pn_Start_P_value_arg_base)
895 * Check if we can inline a given call.
896 * Currently, we cannot inline two cases:
897 * - call with compound arguments
898 * - graphs that take the address of a parameter
900 * check these conditions here
902 static int can_inline(ir_node *call, ir_graph *called_graph) {
903 ir_type *call_type = get_Call_type(call);
904 int params, ress, i, res;
905 assert(is_Method_type(call_type));
907 params = get_method_n_params(call_type);
908 ress = get_method_n_ress(call_type);
910 /* check parameters for compound arguments */
911 for (i = 0; i < params; ++i) {
912 ir_type *p_type = get_method_param_type(call_type, i);
914 if (is_compound_type(p_type))
918 /* check results for compound arguments */
919 for (i = 0; i < ress; ++i) {
920 ir_type *r_type = get_method_res_type(call_type, i);
922 if (is_compound_type(r_type))
927 irg_walk_graph(called_graph, find_addr, NULL, &res);
932 /* Inlines a method at the given call site. */
933 int inline_method(ir_node *call, ir_graph *called_graph) {
935 ir_node *post_call, *post_bl;
936 ir_node *in[pn_Start_max];
937 ir_node *end, *end_bl;
941 int arity, n_ret, n_exc, n_res, i, j, rem_opt, irn_arity;
943 ir_type *called_frame;
944 irg_inline_property prop = get_irg_inline_property(called_graph);
946 if ( (prop < irg_inline_forced) &&
947 (!get_opt_optimize() || !get_opt_inline() || (prop == irg_inline_forbidden))) return 0;
949 /* Do not inline variadic functions. */
950 if (get_method_variadicity(get_entity_type(get_irg_entity(called_graph))) == variadicity_variadic)
953 assert(get_method_n_params(get_entity_type(get_irg_entity(called_graph))) ==
954 get_method_n_params(get_Call_type(call)));
957 * currently, we cannot inline two cases:
958 * - call with compound arguments
959 * - graphs that take the address of a parameter
961 if (! can_inline(call, called_graph))
964 /* -- Turn off optimizations, this can cause problems when allocating new nodes. -- */
965 rem_opt = get_opt_optimize();
968 /* Handle graph state */
969 assert(get_irg_phase_state(current_ir_graph) != phase_building);
970 assert(get_irg_pinned(current_ir_graph) == op_pin_state_pinned);
971 assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
972 set_irg_outs_inconsistent(current_ir_graph);
973 set_irg_extblk_inconsistent(current_ir_graph);
974 set_irg_doms_inconsistent(current_ir_graph);
975 set_irg_loopinfo_inconsistent(current_ir_graph);
976 set_irg_callee_info_state(current_ir_graph, irg_callee_info_inconsistent);
978 /* -- Check preconditions -- */
979 assert(is_Call(call));
980 /* @@@ does not work for InterfaceIII.java after cgana
981 assert(get_Call_type(call) == get_entity_type(get_irg_entity(called_graph)));
982 assert(smaller_type(get_entity_type(get_irg_entity(called_graph)),
983 get_Call_type(call)));
985 if (called_graph == current_ir_graph) {
986 set_optimize(rem_opt);
990 /* here we know we WILL inline, so inform the statistics */
991 hook_inline(call, called_graph);
993 /* -- Decide how to handle exception control flow: Is there a handler
994 for the Call node, or do we branch directly to End on an exception?
996 0 There is a handler.
998 2 Exception handling not represented in Firm. -- */
1000 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
1001 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
1002 assert(is_Proj(proj));
1003 if (get_Proj_proj(proj) == pn_Call_X_except) Xproj = proj;
1004 if (get_Proj_proj(proj) == pn_Call_M_except) Mproj = proj;
1006 if (Mproj) { assert(Xproj); exc_handling = 0; } /* Mproj */
1007 else if (Xproj) { exc_handling = 1; } /* !Mproj && Xproj */
1008 else { exc_handling = 2; } /* !Mproj && !Xproj */
1012 the procedure and later replaces the Start node of the called graph.
1013 Post_call is the old Call node and collects the results of the called
1014 graph. Both will end up being a tuple. -- */
1015 post_bl = get_nodes_block(call);
1016 set_irg_current_block(current_ir_graph, post_bl);
1017 /* XxMxPxPxPxT of Start + parameter of Call */
1018 in[pn_Start_X_initial_exec] = new_Jmp();
1019 in[pn_Start_M] = get_Call_mem(call);
1020 in[pn_Start_P_frame_base] = get_irg_frame(current_ir_graph);
1021 in[pn_Start_P_globals] = get_irg_globals(current_ir_graph);
1022 in[pn_Start_P_tls] = get_irg_tls(current_ir_graph);
1023 in[pn_Start_T_args] = new_Tuple(get_Call_n_params(call), get_Call_param_arr(call));
1024 /* in[pn_Start_P_value_arg_base] = ??? */
1025 assert(pn_Start_P_value_arg_base == pn_Start_max - 1 && "pn_Start_P_value_arg_base not supported, fix");
1026 pre_call = new_Tuple(pn_Start_max - 1, in);
1030 The new block gets the ins of the old block, pre_call and all its
1031 predecessors and all Phi nodes. -- */
1032 part_block(pre_call);
1034 /* -- Prepare state for dead node elimination -- */
1035 /* Visited flags in calling irg must be >= flag in called irg.
1036 Else walker and arity computation will not work. */
1037 if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
1038 set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);
1039 if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
1040 set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
1041 /* Set pre_call as new Start node in link field of the start node of
1042 calling graph and pre_calls block as new block for the start block
1044 Further mark these nodes so that they are not visited by the
1046 set_irn_link(get_irg_start(called_graph), pre_call);
1047 set_irn_visited(get_irg_start(called_graph), get_irg_visited(current_ir_graph));
1048 set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1049 set_irn_visited(get_irg_start_block(called_graph), get_irg_visited(current_ir_graph));
1050 set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1051 set_irn_visited(get_irg_bad(called_graph), get_irg_visited(current_ir_graph));
1053 /* Initialize for compaction of in arrays */
1054 inc_irg_block_visited(current_ir_graph);
1056 /* -- Replicate local entities of the called_graph -- */
1057 /* copy the entities. */
1058 called_frame = get_irg_frame_type(called_graph);
1059 for (i = 0; i < get_class_n_members(called_frame); i++) {
1060 ir_entity *new_ent, *old_ent;
1061 old_ent = get_class_member(called_frame, i);
1062 new_ent = copy_entity_own(old_ent, get_cur_frame_type());
1063 set_entity_link(old_ent, new_ent);
1066 /* visited is > than that of called graph. With this trick visited will
1067 remain unchanged so that an outer walker, e.g., searching the call nodes
1068 to inline, calling this inline will not visit the inlined nodes. */
1069 set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
1071 /* -- Performing dead node elimination inlines the graph -- */
1072 /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1074 /* @@@ endless loops are not copied!! -- they should be, I think... */
1075 irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds,
1076 get_irg_frame_type(called_graph));
1078 /* Repair called_graph */
1079 set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
1080 set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
1081 set_Block_block_visited(get_irg_start_block(called_graph), 0);
1083 /* -- Merge the end of the inlined procedure with the call site -- */
1084 /* We will turn the old Call node into a Tuple with the following
1087 0: Phi of all Memories of Return statements.
1088 1: Jmp from new Block that merges the control flow from all exception
1089 predecessors of the old end block.
1090 2: Tuple of all arguments.
1091 3: Phi of Exception memories.
1092 In case the old Call directly branches to End on an exception we don't
1093 need the block merging all exceptions nor the Phi of the exception
1097 /* -- Precompute some values -- */
1098 end_bl = get_new_node(get_irg_end_block(called_graph));
1099 end = get_new_node(get_irg_end(called_graph));
1100 arity = get_irn_arity(end_bl); /* arity = n_exc + n_ret */
1101 n_res = get_method_n_ress(get_Call_type(call));
1103 res_pred = xmalloc (n_res * sizeof(*res_pred));
1104 cf_pred = xmalloc (arity * sizeof(*res_pred));
1106 set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */
1108 /* -- archive keepalives -- */
1109 irn_arity = get_irn_arity(end);
1110 for (i = 0; i < irn_arity; i++)
1111 add_End_keepalive(get_irg_end(current_ir_graph), get_irn_n(end, i));
1113 /* The new end node will die. We need not free as the in array is on the obstack:
1114 copy_node() only generated 'D' arrays. */
1116 /* -- Replace Return nodes by Jump nodes. -- */
1118 for (i = 0; i < arity; i++) {
1120 ret = get_irn_n(end_bl, i);
1121 if (is_Return(ret)) {
1122 cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_block(ret));
1126 set_irn_in(post_bl, n_ret, cf_pred);
1128 /* -- Build a Tuple for all results of the method.
1129 Add Phi node if there was more than one Return. -- */
1130 turn_into_tuple(post_call, 4);
1131 /* First the Memory-Phi */
1133 for (i = 0; i < arity; i++) {
1134 ret = get_irn_n(end_bl, i);
1135 if (is_Return(ret)) {
1136 cf_pred[n_ret] = get_Return_mem(ret);
1140 phi = new_Phi(n_ret, cf_pred, mode_M);
1141 set_Tuple_pred(call, pn_Call_M_regular, phi);
1142 /* Conserve Phi-list for further inlinings -- but might be optimized */
1143 if (get_nodes_block(phi) == post_bl) {
1144 set_irn_link(phi, get_irn_link(post_bl));
1145 set_irn_link(post_bl, phi);
1147 /* Now the real results */
1149 for (j = 0; j < n_res; j++) {
1151 for (i = 0; i < arity; i++) {
1152 ret = get_irn_n(end_bl, i);
1153 if (get_irn_op(ret) == op_Return) {
1154 cf_pred[n_ret] = get_Return_res(ret, j);
1159 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1163 /* Conserve Phi-list for further inlinings -- but might be optimized */
1164 if (get_nodes_block(phi) == post_bl) {
1165 set_irn_link(phi, get_irn_link(post_bl));
1166 set_irn_link(post_bl, phi);
1169 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1171 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1173 /* Finally the exception control flow.
1174 We have two (three) possible situations:
1175 First if the Call branches to an exception handler: We need to add a Phi node to
1176 collect the memory containing the exception objects. Further we need
1177 to add another block to get a correct representation of this Phi. To
1178 this block we add a Jmp that resolves into the X output of the Call
1179 when the Call is turned into a tuple.
1180 Second the Call branches to End, the exception is not handled. Just
1181 add all inlined exception branches to the End node.
1182 Third: there is no Exception edge at all. Handle as case two. */
1183 if (exc_handling == 0) {
1185 for (i = 0; i < arity; i++) {
1187 ret = get_irn_n(end_bl, i);
1188 if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
1189 cf_pred[n_exc] = ret;
1194 new_Block(n_exc, cf_pred); /* watch it: current_block is changed! */
1195 set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1196 /* The Phi for the memories with the exception objects */
1198 for (i = 0; i < arity; i++) {
1200 ret = skip_Proj(get_irn_n(end_bl, i));
1202 cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 3);
1204 } else if (is_fragile_op(ret)) {
1205 /* We rely that all cfops have the memory output at the same position. */
1206 cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 0);
1208 } else if (get_irn_op(ret) == op_Raise) {
1209 cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 1);
1213 set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
1215 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1216 set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1219 ir_node *main_end_bl;
1220 int main_end_bl_arity;
1221 ir_node **end_preds;
1223 /* assert(exc_handling == 1 || no exceptions. ) */
1225 for (i = 0; i < arity; i++) {
1226 ir_node *ret = get_irn_n(end_bl, i);
1228 if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
1229 cf_pred[n_exc] = ret;
1233 main_end_bl = get_irg_end_block(current_ir_graph);
1234 main_end_bl_arity = get_irn_arity(main_end_bl);
1235 end_preds = xmalloc ((n_exc + main_end_bl_arity) * sizeof(*end_preds));
1237 for (i = 0; i < main_end_bl_arity; ++i)
1238 end_preds[i] = get_irn_n(main_end_bl, i);
1239 for (i = 0; i < n_exc; ++i)
1240 end_preds[main_end_bl_arity + i] = cf_pred[i];
1241 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1242 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1243 set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1249 /* -- Turn CSE back on. -- */
1250 set_optimize(rem_opt);
1255 /********************************************************************/
1256 /* Apply inlineing to small methods. */
1257 /********************************************************************/
1259 /** Represents a possible inlinable call in a graph. */
1260 typedef struct _call_entry call_entry;
1261 struct _call_entry {
1262 ir_node *call; /**< the Call */
1263 ir_graph *callee; /**< the callee called here */
1264 call_entry *next; /**< for linking the next one */
1268 * environment for inlining small irgs
1270 typedef struct _inline_env_t {
1271 struct obstack obst; /**< an obstack where call_entries are allocated on. */
1272 call_entry *head; /**< the head of the call entry list */
1273 call_entry *tail; /**< the tail of the call entry list */
1277 * Returns the irg called from a Call node. If the irg is not
1278 * known, NULL is returned.
1280 static ir_graph *get_call_called_irg(ir_node *call) {
1282 ir_graph *called_irg = NULL;
1284 addr = get_Call_ptr(call);
1285 if (is_SymConst(addr) && get_SymConst_kind(addr) == symconst_addr_ent) {
1286 called_irg = get_entity_irg(get_SymConst_entity(addr));
1293 * Walker: Collect all calls to known graphs inside a graph.
1295 static void collect_calls(ir_node *call, void *env) {
1296 if (is_Call(call)) {
1297 ir_graph *called_irg = get_call_called_irg(call);
1299 /* The Call node calls a locally defined method. Remember to inline. */
1300 inline_env_t *ienv = env;
1301 call_entry *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
1303 entry->callee = called_irg;
1306 if (ienv->tail == NULL)
1309 ienv->tail->next = entry;
1316 * Inlines all small methods at call sites where the called address comes
1317 * from a Const node that references the entity representing the called
1319 * The size argument is a rough measure for the code size of the method:
1320 * Methods where the obstack containing the firm graph is smaller than
1323 void inline_small_irgs(ir_graph *irg, int size) {
1324 ir_graph *rem = current_ir_graph;
1327 DEBUG_ONLY(firm_dbg_module_t *dbg;)
1329 if (!(get_opt_optimize() && get_opt_inline())) return;
1331 FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1333 current_ir_graph = irg;
1334 /* Handle graph state */
1335 assert(get_irg_phase_state(irg) != phase_building);
1336 free_callee_info(irg);
1338 /* Find Call nodes to inline.
1339 (We can not inline during a walk of the graph, as inlineing the same
1340 method several times changes the visited flag of the walked graph:
1341 after the first inlineing visited of the callee equals visited of
1342 the caller. With the next inlineing both are increased.) */
1343 obstack_init(&env.obst);
1344 env.head = env.tail = NULL;
1345 irg_walk_graph(irg, NULL, collect_calls, &env);
1347 if (env.head != NULL) {
1348 /* There are calls to inline */
1349 collect_phiprojs(irg);
1350 for (entry = env.head; entry != NULL; entry = entry->next) {
1351 ir_graph *callee = entry->callee;
1352 if (((_obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst)) < size) ||
1353 (get_irg_inline_property(callee) >= irg_inline_forced)) {
1354 inline_method(entry->call, callee);
1358 obstack_free(&env.obst, NULL);
1359 current_ir_graph = rem;
1363 * Environment for inlining irgs.
1366 int n_nodes; /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1367 int n_nodes_orig; /**< for statistics */
1368 call_entry *call_head; /**< The head of the list of all call nodes in this graph. */
1369 call_entry *call_tail; /**< The tail of the list of all call nodes in this graph .*/
1370 int n_call_nodes; /**< Number of Call nodes in the graph. */
1371 int n_call_nodes_orig; /**< for statistics */
1372 int n_callers; /**< Number of known graphs that call this graphs. */
1373 int n_callers_orig; /**< for statistics */
1374 int got_inline; /**< Set, if at leat one call inside this graph was inlined. */
1378 * Allocate a new environment for inlining.
1380 static inline_irg_env *alloc_inline_irg_env(struct obstack *obst) {
1381 inline_irg_env *env = obstack_alloc(obst, sizeof(*env));
1382 env->n_nodes = -2; /* do not count count Start, End */
1383 env->n_nodes_orig = -2; /* do not count Start, End */
1384 env->call_head = NULL;
1385 env->call_tail = NULL;
1386 env->n_call_nodes = 0;
1387 env->n_call_nodes_orig = 0;
1389 env->n_callers_orig = 0;
1390 env->got_inline = 0;
1394 typedef struct walker_env {
1395 struct obstack *obst; /**< the obstack for allocations. */
1396 inline_irg_env *x; /**< the inline environment */
1397 int ignore_runtime; /**< the ignore runtime flag */
1401 * post-walker: collect all calls in the inline-environment
1402 * of a graph and sum some statistics.
1404 static void collect_calls2(ir_node *call, void *ctx) {
1406 inline_irg_env *x = env->x;
1407 ir_op *op = get_irn_op(call);
1411 /* count meaningful nodes in irg */
1412 if (op != op_Proj && op != op_Tuple && op != op_Sync) {
1417 if (op != op_Call) return;
1419 /* check, if it's a runtime call */
1420 if (env->ignore_runtime) {
1421 ir_node *symc = get_Call_ptr(call);
1423 if (is_SymConst(symc) && get_SymConst_kind(symc) == symconst_addr_ent) {
1424 ir_entity *ent = get_SymConst_entity(symc);
1426 if (get_entity_additional_properties(ent) & mtp_property_runtime)
1431 /* collect all call nodes */
1433 ++x->n_call_nodes_orig;
1435 callee = get_call_called_irg(call);
1437 inline_irg_env *callee_env = get_irg_link(callee);
1438 /* count all static callers */
1439 ++callee_env->n_callers;
1440 ++callee_env->n_callers_orig;
1442 /* link it in the list of possible inlinable entries */
1443 entry = obstack_alloc(env->obst, sizeof(*entry));
1445 entry->callee = callee;
1447 if (x->call_tail == NULL)
1448 x->call_head = entry;
1450 x->call_tail->next = entry;
1451 x->call_tail = entry;
1456 * Returns TRUE if the number of callers in 0 in the irg's environment,
1457 * hence this irg is a leave.
1459 INLINE static int is_leave(ir_graph *irg) {
1460 inline_irg_env *env = get_irg_link(irg);
1461 return env->n_call_nodes == 0;
1465 * Returns TRUE if the number of callers is smaller size in the irg's environment.
1467 INLINE static int is_smaller(ir_graph *callee, int size) {
1468 inline_irg_env *env = get_irg_link(callee);
1469 return env->n_nodes < size;
1473 * Append the nodes of the list src to the nodes of the list in environment dst.
1475 static void append_call_list(struct obstack *obst, inline_irg_env *dst, call_entry *src) {
1476 call_entry *entry, *nentry;
1478 /* Note that the src list points to Call nodes in the inlined graph, but
1479 we need Call nodes in our graph. Luckily the inliner leaves this information
1480 in the link field. */
1481 for (entry = src; entry != NULL; entry = entry->next) {
1482 nentry = obstack_alloc(obst, sizeof(*nentry));
1483 nentry->call = get_irn_link(entry->call);
1484 nentry->callee = entry->callee;
1485 nentry->next = NULL;
1486 dst->call_tail->next = nentry;
1487 dst->call_tail = nentry;
1492 * Inlines small leave methods at call sites where the called address comes
1493 * from a Const node that references the entity representing the called
1495 * The size argument is a rough measure for the code size of the method:
1496 * Methods where the obstack containing the firm graph is smaller than
1499 void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_runtime) {
1500 inline_irg_env *env;
1506 call_entry *entry, *tail;
1507 const call_entry *centry;
1508 struct obstack obst;
1509 DEBUG_ONLY(firm_dbg_module_t *dbg;)
1511 if (!(get_opt_optimize() && get_opt_inline())) return;
1513 FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1514 rem = current_ir_graph;
1515 obstack_init(&obst);
1517 /* extend all irgs by a temporary data structure for inlining. */
1518 n_irgs = get_irp_n_irgs();
1519 for (i = 0; i < n_irgs; ++i)
1520 set_irg_link(get_irp_irg(i), alloc_inline_irg_env(&obst));
1522 /* Precompute information in temporary data structure. */
1524 wenv.ignore_runtime = ignore_runtime;
1525 for (i = 0; i < n_irgs; ++i) {
1526 ir_graph *irg = get_irp_irg(i);
1528 assert(get_irg_phase_state(irg) != phase_building);
1529 free_callee_info(irg);
1531 wenv.x = get_irg_link(irg);
1532 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1535 /* -- and now inline. -- */
1537 /* Inline leaves recursively -- we might construct new leaves. */
1541 for (i = 0; i < n_irgs; ++i) {
1543 int phiproj_computed = 0;
1545 current_ir_graph = get_irp_irg(i);
1546 env = (inline_irg_env *)get_irg_link(current_ir_graph);
1549 for (entry = env->call_head; entry != NULL; entry = entry->next) {
1552 if (env->n_nodes > maxsize) break;
1555 callee = entry->callee;
1557 if (is_leave(callee) && is_smaller(callee, leavesize)) {
1558 if (!phiproj_computed) {
1559 phiproj_computed = 1;
1560 collect_phiprojs(current_ir_graph);
1562 did_inline = inline_method(call, callee);
1565 /* Do some statistics */
1566 inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1568 env->got_inline = 1;
1569 --env->n_call_nodes;
1570 env->n_nodes += callee_env->n_nodes;
1571 --callee_env->n_callers;
1573 /* remove this call from the list */
1575 tail->next = entry->next;
1577 env->call_head = entry->next;
1583 env->call_tail = tail;
1585 } while (did_inline);
1587 /* inline other small functions. */
1588 for (i = 0; i < n_irgs; ++i) {
1590 int phiproj_computed = 0;
1592 current_ir_graph = get_irp_irg(i);
1593 env = (inline_irg_env *)get_irg_link(current_ir_graph);
1595 /* note that the list of possible calls is updated during the process */
1597 for (entry = env->call_head; entry != NULL; entry = entry->next) {
1601 callee = entry->callee;
1603 if (((is_smaller(callee, size) && (env->n_nodes < maxsize)) || /* small function */
1604 (get_irg_inline_property(callee) >= irg_inline_forced))) {
1605 if (!phiproj_computed) {
1606 phiproj_computed = 1;
1607 collect_phiprojs(current_ir_graph);
1609 if (inline_method(call, callee)) {
1610 inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1612 /* callee was inline. Append it's call list. */
1613 env->got_inline = 1;
1614 --env->n_call_nodes;
1615 append_call_list(&obst, env, callee_env->call_head);
1616 env->n_call_nodes += callee_env->n_call_nodes;
1617 env->n_nodes += callee_env->n_nodes;
1618 --callee_env->n_callers;
1620 /* after we have inlined callee, all called methods inside callee
1621 are now called once more */
1622 for (centry = callee_env->call_head; centry != NULL; centry = centry->next) {
1623 inline_irg_env *penv = get_irg_link(centry->callee);
1627 /* remove this call from the list */
1629 tail->next = entry->next;
1631 env->call_head = entry->next;
1637 env->call_tail = tail;
1640 for (i = 0; i < n_irgs; ++i) {
1641 irg = get_irp_irg(i);
1642 env = (inline_irg_env *)get_irg_link(irg);
1644 if (env->got_inline) {
1645 /* this irg got calls inlined */
1646 set_irg_outs_inconsistent(irg);
1647 set_irg_doms_inconsistent(irg);
1649 optimize_graph_df(irg);
1652 if (env->got_inline || (env->n_callers_orig != env->n_callers))
1653 DB((dbg, SET_LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1654 env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1655 env->n_callers_orig, env->n_callers,
1656 get_entity_name(get_irg_entity(irg))));
1659 obstack_free(&obst, NULL);
1660 current_ir_graph = rem;
1663 /*******************************************************************/
1664 /* Code Placement. Pins all floating nodes to a block where they */
1665 /* will be executed only if needed. */
1666 /*******************************************************************/
1669 * Returns non-zero, is a block is not reachable from Start.
1671 * @param block the block to test
1674 is_Block_unreachable(ir_node *block) {
1675 return is_Block_dead(block) || get_Block_dom_depth(block) < 0;
1679 * Find the earliest correct block for node n. --- Place n into the
1680 * same Block as its dominance-deepest Input.
1682 * We have to avoid calls to get_nodes_block() here
1683 * because the graph is floating.
1685 * move_out_of_loops() expects that place_floats_early() have placed
1686 * all "living" nodes into a living block. That's why we must
1687 * move nodes in dead block with "live" successors into a valid
1689 * We move them just into the same block as it's successor (or
1690 * in case of a Phi into the effective use block). For Phi successors,
1691 * this may still be a dead block, but then there is no real use, as
1692 * the control flow will be dead later.
1694 * @param n the node to be placed
1695 * @param worklist a worklist, predecessors of non-floating nodes are placed here
1698 place_floats_early(ir_node *n, waitq *worklist) {
1701 /* we must not run into an infinite loop */
1702 assert(irn_not_visited(n));
1703 mark_irn_visited(n);
1705 /* Place floating nodes. */
1706 if (get_irn_pinned(n) == op_pin_state_floats) {
1707 ir_node *curr_block = get_irn_n(n, -1);
1708 int in_dead_block = is_Block_unreachable(curr_block);
1710 ir_node *b = NULL; /* The block to place this node in */
1712 assert(is_no_Block(n));
1714 if (is_irn_start_block_placed(n)) {
1715 /* These nodes will not be placed by the loop below. */
1716 b = get_irg_start_block(current_ir_graph);
1720 /* find the block for this node. */
1721 irn_arity = get_irn_arity(n);
1722 for (i = 0; i < irn_arity; i++) {
1723 ir_node *pred = get_irn_n(n, i);
1724 ir_node *pred_block;
1726 if ((irn_not_visited(pred))
1727 && (get_irn_pinned(pred) == op_pin_state_floats)) {
1730 * If the current node is NOT in a dead block, but one of its
1731 * predecessors is, we must move the predecessor to a live block.
1732 * Such thing can happen, if global CSE chose a node from a dead block.
1733 * We move it simply to our block.
1734 * Note that neither Phi nor End nodes are floating, so we don't
1735 * need to handle them here.
1737 if (! in_dead_block) {
1738 if (get_irn_pinned(pred) == op_pin_state_floats &&
1739 is_Block_unreachable(get_irn_n(pred, -1)))
1740 set_nodes_block(pred, curr_block);
1742 place_floats_early(pred, worklist);
1746 * A node in the Bad block must stay in the bad block,
1747 * so don't compute a new block for it.
1752 /* Because all loops contain at least one op_pin_state_pinned node, now all
1753 our inputs are either op_pin_state_pinned or place_early() has already
1754 been finished on them. We do not have any unfinished inputs! */
1755 pred_block = get_irn_n(pred, -1);
1756 if ((!is_Block_dead(pred_block)) &&
1757 (get_Block_dom_depth(pred_block) > depth)) {
1759 depth = get_Block_dom_depth(pred_block);
1761 /* Avoid that the node is placed in the Start block */
1762 if ((depth == 1) && (get_Block_dom_depth(get_irn_n(n, -1)) > 1)
1763 && get_irg_phase_state(current_ir_graph) != phase_backend) {
1764 b = get_Block_cfg_out(get_irg_start_block(current_ir_graph), 0);
1765 assert(b != get_irg_start_block(current_ir_graph));
1770 set_nodes_block(n, b);
1774 * Add predecessors of non floating nodes and non-floating predecessors
1775 * of floating nodes to worklist and fix their blocks if the are in dead block.
1777 irn_arity = get_irn_arity(n);
1779 if (get_irn_op(n) == op_End) {
1781 * Simplest case: End node. Predecessors are keep-alives,
1782 * no need to move out of dead block.
1784 for (i = -1; i < irn_arity; ++i) {
1785 ir_node *pred = get_irn_n(n, i);
1786 if (irn_not_visited(pred))
1787 waitq_put(worklist, pred);
1789 } else if (is_Block(n)) {
1791 * Blocks: Predecessors are control flow, no need to move
1792 * them out of dead block.
1794 for (i = irn_arity - 1; i >= 0; --i) {
1795 ir_node *pred = get_irn_n(n, i);
1796 if (irn_not_visited(pred))
1797 waitq_put(worklist, pred);
1799 } else if (is_Phi(n)) {
1801 ir_node *curr_block = get_irn_n(n, -1);
1802 int in_dead_block = is_Block_unreachable(curr_block);
1805 * Phi nodes: move nodes from dead blocks into the effective use
1806 * of the Phi-input if the Phi is not in a bad block.
1808 pred = get_irn_n(n, -1);
1809 if (irn_not_visited(pred))
1810 waitq_put(worklist, pred);
1812 for (i = irn_arity - 1; i >= 0; --i) {
1813 ir_node *pred = get_irn_n(n, i);
1815 if (irn_not_visited(pred)) {
1816 if (! in_dead_block &&
1817 get_irn_pinned(pred) == op_pin_state_floats &&
1818 is_Block_unreachable(get_irn_n(pred, -1))) {
1819 set_nodes_block(pred, get_Block_cfgpred_block(curr_block, i));
1821 waitq_put(worklist, pred);
1826 ir_node *curr_block = get_irn_n(n, -1);
1827 int in_dead_block = is_Block_unreachable(curr_block);
1830 * All other nodes: move nodes from dead blocks into the same block.
1832 pred = get_irn_n(n, -1);
1833 if (irn_not_visited(pred))
1834 waitq_put(worklist, pred);
1836 for (i = irn_arity - 1; i >= 0; --i) {
1837 ir_node *pred = get_irn_n(n, i);
1839 if (irn_not_visited(pred)) {
1840 if (! in_dead_block &&
1841 get_irn_pinned(pred) == op_pin_state_floats &&
1842 is_Block_unreachable(get_irn_n(pred, -1))) {
1843 set_nodes_block(pred, curr_block);
1845 waitq_put(worklist, pred);
1852 * Floating nodes form subgraphs that begin at nodes as Const, Load,
1853 * Start, Call and that end at op_pin_state_pinned nodes as Store, Call. Place_early
1854 * places all floating nodes reachable from its argument through floating
1855 * nodes and adds all beginnings at op_pin_state_pinned nodes to the worklist.
1857 * @param worklist a worklist, used for the algorithm, empty on in/output
1859 static void place_early(waitq *worklist) {
1861 inc_irg_visited(current_ir_graph);
1863 /* this inits the worklist */
1864 place_floats_early(get_irg_end(current_ir_graph), worklist);
1866 /* Work the content of the worklist. */
1867 while (!waitq_empty(worklist)) {
1868 ir_node *n = waitq_get(worklist);
1869 if (irn_not_visited(n))
1870 place_floats_early(n, worklist);
1873 set_irg_outs_inconsistent(current_ir_graph);
1874 set_irg_pinned(current_ir_graph, op_pin_state_pinned);
1878 * Compute the deepest common ancestor of block and dca.
1880 static ir_node *calc_dca(ir_node *dca, ir_node *block) {
1883 /* we do not want to place nodes in dead blocks */
1884 if (is_Block_dead(block))
1887 /* We found a first legal placement. */
1888 if (!dca) return block;
1890 /* Find a placement that is dominates both, dca and block. */
1891 while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
1892 block = get_Block_idom(block);
1894 while (get_Block_dom_depth(dca) > get_Block_dom_depth(block)) {
1895 dca = get_Block_idom(dca);
1898 while (block != dca) {
1899 block = get_Block_idom(block); dca = get_Block_idom(dca);
1905 /** Deepest common dominance ancestor of DCA and CONSUMER of PRODUCER.
1906 * I.e., DCA is the block where we might place PRODUCER.
1907 * A data flow edge points from producer to consumer.
1910 consumer_dom_dca(ir_node *dca, ir_node *consumer, ir_node *producer) {
1911 ir_node *block = NULL;
1913 /* Compute the latest block into which we can place a node so that it is
1915 if (get_irn_op(consumer) == op_Phi) {
1916 /* our consumer is a Phi-node, the effective use is in all those
1917 blocks through which the Phi-node reaches producer */
1919 ir_node *phi_block = get_nodes_block(consumer);
1920 irn_arity = get_irn_arity(consumer);
1922 for (i = 0; i < irn_arity; i++) {
1923 if (get_irn_n(consumer, i) == producer) {
1924 ir_node *new_block = get_nodes_block(get_Block_cfgpred(phi_block, i));
1926 if (! is_Block_unreachable(new_block))
1927 block = calc_dca(block, new_block);
1932 block = get_irn_n(producer, -1);
1934 assert(is_no_Block(consumer));
1935 block = get_nodes_block(consumer);
1938 /* Compute the deepest common ancestor of block and dca. */
1939 return calc_dca(dca, block);
1942 /* FIXME: the name clashes here with the function from ana/field_temperature.c
1944 static INLINE int get_irn_loop_depth(ir_node *n) {
1945 return get_loop_depth(get_irn_loop(n));
1949 * Move n to a block with less loop depth than it's current block. The
1950 * new block must be dominated by early.
1952 * @param n the node that should be moved
1953 * @param early the earliest block we can n move to
1955 static void move_out_of_loops(ir_node *n, ir_node *early) {
1956 ir_node *best, *dca;
1960 /* Find the region deepest in the dominator tree dominating
1961 dca with the least loop nesting depth, but still dominated
1962 by our early placement. */
1963 dca = get_nodes_block(n);
1966 while (dca != early) {
1967 dca = get_Block_idom(dca);
1968 if (!dca || is_Bad(dca)) break; /* may be Bad if not reachable from Start */
1969 if (get_irn_loop_depth(dca) < get_irn_loop_depth(best)) {
1973 if (best != get_nodes_block(n)) {
1975 printf("Moving out of loop: "); DDMN(n);
1976 printf(" Outermost block: "); DDMN(early);
1977 printf(" Best block: "); DDMN(best);
1978 printf(" Innermost block: "); DDMN(get_nodes_block(n));
1980 set_nodes_block(n, best);
1985 * Find the latest legal block for N and place N into the
1986 * `optimal' Block between the latest and earliest legal block.
1987 * The `optimal' block is the dominance-deepest block of those
1988 * with the least loop-nesting-depth. This places N out of as many
1989 * loops as possible and then makes it as control dependent as
1992 * @param n the node to be placed
1993 * @param worklist a worklist, all successors of non-floating nodes are
1996 static void place_floats_late(ir_node *n, pdeq *worklist) {
2000 assert(irn_not_visited(n)); /* no multiple placement */
2002 mark_irn_visited(n);
2004 /* no need to place block nodes, control nodes are already placed. */
2005 if ((get_irn_op(n) != op_Block) &&
2007 (get_irn_mode(n) != mode_X)) {
2008 /* Remember the early_blk placement of this block to move it
2009 out of loop no further than the early_blk placement. */
2010 early_blk = get_irn_n(n, -1);
2013 * BEWARE: Here we also get code, that is live, but
2014 * was in a dead block. If the node is life, but because
2015 * of CSE in a dead block, we still might need it.
2018 /* Assure that our users are all placed, except the Phi-nodes.
2019 --- Each data flow cycle contains at least one Phi-node. We
2020 have to break the `user has to be placed before the
2021 producer' dependence cycle and the Phi-nodes are the
2022 place to do so, because we need to base our placement on the
2023 final region of our users, which is OK with Phi-nodes, as they
2024 are op_pin_state_pinned, and they never have to be placed after a
2025 producer of one of their inputs in the same block anyway. */
2026 for (i = get_irn_n_outs(n) - 1; i >= 0; --i) {
2027 ir_node *succ = get_irn_out(n, i);
2028 if (irn_not_visited(succ) && (get_irn_op(succ) != op_Phi))
2029 place_floats_late(succ, worklist);
2032 if (! is_Block_dead(early_blk)) {
2033 /* do only move things that where not dead */
2034 ir_op *op = get_irn_op(n);
2036 /* We have to determine the final block of this node... except for
2037 constants and Projs */
2038 if ((get_irn_pinned(n) == op_pin_state_floats) &&
2040 (op != op_SymConst) &&
2043 ir_node *dca = NULL; /* deepest common ancestor in the
2044 dominator tree of all nodes'
2045 blocks depending on us; our final
2046 placement has to dominate DCA. */
2047 for (i = get_irn_n_outs(n) - 1; i >= 0; --i) {
2048 ir_node *succ = get_irn_out(n, i);
2051 if (get_irn_op(succ) == op_End) {
2053 * This consumer is the End node, a keep alive edge.
2054 * This is not a real consumer, so we ignore it
2059 /* ignore if succ is in dead code */
2060 succ_blk = get_irn_n(succ, -1);
2061 if (is_Block_unreachable(succ_blk))
2063 dca = consumer_dom_dca(dca, succ, n);
2066 set_nodes_block(n, dca);
2067 move_out_of_loops(n, early_blk);
2073 /* Add successors of all non-floating nodes on list. (Those of floating
2074 nodes are placed already and therefore are marked.) */
2075 for (i = 0; i < get_irn_n_outs(n); i++) {
2076 ir_node *succ = get_irn_out(n, i);
2077 if (irn_not_visited(get_irn_out(n, i))) {
2078 pdeq_putr(worklist, succ);
2084 * Place floating nodes on the given worklist as late as possible using
2085 * the dominance tree.
2087 * @param worklist the worklist containing the nodes to place
2089 static void place_late(waitq *worklist) {
2091 inc_irg_visited(current_ir_graph);
2093 /* This fills the worklist initially. */
2094 place_floats_late(get_irg_start_block(current_ir_graph), worklist);
2096 /* And now empty the worklist again... */
2097 while (!waitq_empty(worklist)) {
2098 ir_node *n = waitq_get(worklist);
2099 if (irn_not_visited(n))
2100 place_floats_late(n, worklist);
2104 /* Code Placement. */
2105 void place_code(ir_graph *irg) {
2107 ir_graph *rem = current_ir_graph;
2109 current_ir_graph = irg;
2111 /* Handle graph state */
2112 assert(get_irg_phase_state(irg) != phase_building);
2115 if (1 || get_irg_loopinfo_state(irg) != loopinfo_consistent) {
2116 free_loop_information(irg);
2117 construct_backedges(irg);
2120 /* Place all floating nodes as early as possible. This guarantees
2121 a legal code placement. */
2122 worklist = new_waitq();
2123 place_early(worklist);
2125 /* place_early() invalidates the outs, place_late needs them. */
2126 compute_irg_outs(irg);
2128 /* Now move the nodes down in the dominator tree. This reduces the
2129 unnecessary executions of the node. */
2130 place_late(worklist);
2132 set_irg_outs_inconsistent(current_ir_graph);
2133 set_irg_loopinfo_inconsistent(current_ir_graph);
2134 del_waitq(worklist);
2135 current_ir_graph = rem;
2139 * Called by walker of remove_critical_cf_edges().
2141 * Place an empty block to an edge between a blocks of multiple
2142 * predecessors and a block of multiple successors.
2145 * @param env Environment of walker. The changed field.
2147 static void walk_critical_cf_edges(ir_node *n, void *env) {
2149 ir_node *pre, *block, *jmp;
2151 ir_graph *irg = get_irn_irg(n);
2153 /* Block has multiple predecessors */
2154 arity = get_irn_arity(n);
2156 if (n == get_irg_end_block(irg))
2157 return; /* No use to add a block here. */
2159 for (i = 0; i < arity; ++i) {
2162 pre = get_irn_n(n, i);
2163 cfop = get_irn_op(skip_Proj(pre));
2164 /* Predecessor has multiple successors. Insert new control flow edge but
2165 ignore exception edges. */
2166 if (! is_op_fragile(cfop) && is_op_forking(cfop)) {
2167 /* set predecessor of new block */
2168 block = new_r_Block(irg, 1, &pre);
2169 /* insert new jmp node to new block */
2170 jmp = new_r_Jmp(irg, block);
2171 /* set successor of new block */
2172 set_irn_n(n, i, jmp);
2174 } /* predecessor has multiple successors */
2175 } /* for all predecessors */
2176 } /* n is a multi-entry block */
2179 void remove_critical_cf_edges(ir_graph *irg) {
2182 irg_block_walk_graph(irg, NULL, walk_critical_cf_edges, &changed);
2184 /* control flow changed */
2185 set_irg_outs_inconsistent(irg);
2186 set_irg_extblk_inconsistent(irg);
2187 set_irg_doms_inconsistent(irg);
2188 set_irg_loopinfo_inconsistent(irg);