2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Implements several optimizations for IA32.
23 * @author Christian Wuerdig
33 #include "firm_types.h"
43 #include "../benode_t.h"
44 #include "../besched_t.h"
46 #include "ia32_new_nodes.h"
47 #include "bearch_ia32_t.h"
48 #include "gen_ia32_regalloc_if.h"
49 #include "ia32_transform.h"
50 #include "ia32_dbg_stat.h"
51 #include "ia32_util.h"
53 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
55 //#define AGGRESSIVE_AM
58 IA32_AM_CAND_NONE = 0, /**< no addressmode possible with irn inputs */
59 IA32_AM_CAND_LEFT = 1, /**< addressmode possible with left input */
60 IA32_AM_CAND_RIGHT = 2, /**< addressmode possible with right input */
61 IA32_AM_CAND_BOTH = 3 /**< addressmode possible with both inputs */
64 typedef int is_op_func_t(const ir_node *n);
65 typedef ir_node *load_func_t(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
68 * checks if a node represents the NOREG value
70 static INLINE int be_is_NoReg(ia32_code_gen_t *cg, const ir_node *irn) {
71 return irn == cg->noreg_gp || irn == cg->noreg_xmm || irn == cg->noreg_vfp;
74 /********************************************************************************************************
75 * _____ _ _ ____ _ _ _ _ _
76 * | __ \ | | | | / __ \ | | (_) (_) | | (_)
77 * | |__) |__ ___ _ __ | |__ ___ | | ___ | | | |_ __ | |_ _ _ __ ___ _ ______ _| |_ _ ___ _ __
78 * | ___/ _ \/ _ \ '_ \| '_ \ / _ \| |/ _ \ | | | | '_ \| __| | '_ ` _ \| |_ / _` | __| |/ _ \| '_ \
79 * | | | __/ __/ |_) | | | | (_) | | __/ | |__| | |_) | |_| | | | | | | |/ / (_| | |_| | (_) | | | |
80 * |_| \___|\___| .__/|_| |_|\___/|_|\___| \____/| .__/ \__|_|_| |_| |_|_/___\__,_|\__|_|\___/|_| |_|
83 ********************************************************************************************************/
86 * NOTE: THESE PEEPHOLE OPTIMIZATIONS MUST BE CALLED AFTER SCHEDULING AND REGISTER ALLOCATION.
89 // only optimize up to 48 stores behind IncSPs
90 #define MAXPUSH_OPTIMIZE 48
93 * Tries to create pushs from IncSP,Store combinations
95 static void ia32_create_Pushs(ir_node *irn, ia32_code_gen_t *cg) {
99 ir_node *stores[MAXPUSH_OPTIMIZE];
100 ir_node *block = get_nodes_block(irn);
101 ir_graph *irg = cg->irg;
103 ir_mode *spmode = get_irn_mode(irn);
105 memset(stores, 0, sizeof(stores));
107 assert(be_is_IncSP(irn));
109 offset = be_get_IncSP_offset(irn);
114 * We first walk the schedule after the IncSP node as long as we find
115 * suitable stores that could be transformed to a push.
116 * We save them into the stores array which is sorted by the frame offset/4
117 * attached to the node
119 for(node = sched_next(irn); !sched_is_end(node); node = sched_next(node)) {
124 // it has to be a store
125 if(!is_ia32_Store(node))
128 // it has to use our sp value
129 if(get_irn_n(node, 0) != irn)
131 // store has to be attached to NoMem
132 mem = get_irn_n(node, 3);
137 if( (get_ia32_am_flavour(node) & ia32_am_IS) != 0)
140 offset = get_ia32_am_offs_int(node);
142 storeslot = offset / 4;
143 if(storeslot >= MAXPUSH_OPTIMIZE)
146 // storing into the same slot twice is bad (and shouldn't happen...)
147 if(stores[storeslot] != NULL)
150 // storing at half-slots is bad
154 stores[storeslot] = node;
157 curr_sp = get_irn_n(irn, 0);
159 // walk the stores in inverse order and create pushs for them
160 i = (offset / 4) - 1;
161 if(i >= MAXPUSH_OPTIMIZE) {
162 i = MAXPUSH_OPTIMIZE - 1;
165 for( ; i >= 0; --i) {
166 const arch_register_t *spreg;
168 ir_node *val, *mem, *mem_proj;
169 ir_node *store = stores[i];
170 ir_node *noreg = ia32_new_NoReg_gp(cg);
172 if(store == NULL || is_Bad(store))
175 val = get_irn_n(store, 2);
176 mem = get_irn_n(store, 3);
177 spreg = arch_get_irn_register(cg->arch_env, curr_sp);
179 push = new_rd_ia32_Push(get_irn_dbg_info(store), irg, block, noreg, noreg, val, curr_sp, mem);
181 set_ia32_am_support(push, ia32_am_Source, ia32_am_unary);
182 copy_ia32_Immop_attr(push, store);
184 sched_add_before(irn, push);
186 // create stackpointer proj
187 curr_sp = new_r_Proj(irg, block, push, spmode, pn_ia32_Push_stack);
188 arch_set_irn_register(cg->arch_env, curr_sp, spreg);
190 // create memory proj
191 mem_proj = new_r_Proj(irg, block, push, mode_M, pn_ia32_Push_M);
193 // use the memproj now
194 exchange(store, mem_proj);
196 // we can remove the store now
202 be_set_IncSP_offset(irn, offset);
204 // can we remove the IncSP now?
206 const ir_edge_t *edge, *next;
208 foreach_out_edge_safe(irn, edge, next) {
209 ir_node *arg = get_edge_src_irn(edge);
210 int pos = get_edge_src_pos(edge);
212 set_irn_n(arg, pos, curr_sp);
215 set_irn_n(irn, 0, new_Bad());
218 set_irn_n(irn, 0, curr_sp);
223 * Tries to optimize two following IncSP.
225 static void ia32_optimize_IncSP(ir_node *node)
230 ir_node *pred = be_get_IncSP_pred(node);
233 if(!be_is_IncSP(pred))
236 if(get_irn_n_edges(pred) > 1)
239 pred_offs = be_get_IncSP_offset(pred);
240 curr_offs = be_get_IncSP_offset(node);
242 if(pred_offs == BE_STACK_FRAME_SIZE_EXPAND) {
243 if(curr_offs != BE_STACK_FRAME_SIZE_SHRINK) {
247 } else if(pred_offs == BE_STACK_FRAME_SIZE_SHRINK) {
248 if(curr_offs != BE_STACK_FRAME_SIZE_EXPAND) {
252 } else if(curr_offs == BE_STACK_FRAME_SIZE_EXPAND
253 || curr_offs == BE_STACK_FRAME_SIZE_SHRINK) {
256 offs = curr_offs + pred_offs;
259 be_set_IncSP_offset(node, offs);
261 /* rewire dependency edges */
262 predpred = be_get_IncSP_pred(pred);
263 edges_reroute_kind(pred, predpred, EDGE_KIND_DEP, current_ir_graph);
266 be_set_IncSP_pred(node, predpred);
272 * Performs Peephole Optimizations.
274 static void ia32_peephole_optimize_node(ir_node *node, void *env) {
275 ia32_code_gen_t *cg = env;
277 if (be_is_IncSP(node)) {
278 ia32_optimize_IncSP(node);
280 if (cg->opt & IA32_OPT_PUSHARGS)
281 ia32_create_Pushs(node, cg);
285 void ia32_peephole_optimization(ir_graph *irg, ia32_code_gen_t *cg) {
286 irg_walk_graph(irg, ia32_peephole_optimize_node, NULL, cg);
289 /******************************************************************
291 * /\ | | | | | \/ | | |
292 * / \ __| | __| |_ __ ___ ___ ___| \ / | ___ __| | ___
293 * / /\ \ / _` |/ _` | '__/ _ \/ __/ __| |\/| |/ _ \ / _` |/ _ \
294 * / ____ \ (_| | (_| | | | __/\__ \__ \ | | | (_) | (_| | __/
295 * /_/ \_\__,_|\__,_|_| \___||___/___/_| |_|\___/ \__,_|\___|
297 ******************************************************************/
304 static int node_is_ia32_comm(const ir_node *irn) {
305 return is_ia32_irn(irn) ? is_ia32_commutative(irn) : 0;
308 static int ia32_get_irn_n_edges(const ir_node *irn) {
309 const ir_edge_t *edge;
312 foreach_out_edge(irn, edge) {
320 * Determines if pred is a Proj and if is_op_func returns true for it's predecessor.
322 * @param pred The node to be checked
323 * @param is_op_func The check-function
324 * @return 1 if conditions are fulfilled, 0 otherwise
326 static int pred_is_specific_node(const ir_node *pred, is_op_func_t *is_op_func) {
327 return is_op_func(pred);
331 * Determines if pred is a Proj and if is_op_func returns true for it's predecessor
332 * and if the predecessor is in block bl.
334 * @param bl The block
335 * @param pred The node to be checked
336 * @param is_op_func The check-function
337 * @return 1 if conditions are fulfilled, 0 otherwise
339 static int pred_is_specific_nodeblock(const ir_node *bl, const ir_node *pred,
340 int (*is_op_func)(const ir_node *n))
343 pred = get_Proj_pred(pred);
344 if ((bl == get_nodes_block(pred)) && is_op_func(pred)) {
353 * Checks if irn is a candidate for address calculation. We avoid transforming
354 * adds to leas if they have a load as pred, because then we can use AM mode
357 * - none of the operand must be a Load within the same block OR
358 * - all Loads must have more than one user OR
360 * @param block The block the Loads must/mustnot be in
361 * @param irn The irn to check
362 * return 1 if irn is a candidate, 0 otherwise
364 static int is_addr_candidate(const ir_node *irn)
366 #ifndef AGGRESSIVE_AM
367 const ir_node *block = get_nodes_block(irn);
368 ir_node *left, *right;
371 left = get_irn_n(irn, 2);
372 right = get_irn_n(irn, 3);
374 if (pred_is_specific_nodeblock(block, left, is_ia32_Ld)) {
375 n = ia32_get_irn_n_edges(left);
376 /* load with only one user: don't create LEA */
381 if (pred_is_specific_nodeblock(block, right, is_ia32_Ld)) {
382 n = ia32_get_irn_n_edges(right);
393 * Checks if irn is a candidate for address mode.
396 * - at least one operand has to be a Load within the same block AND
397 * - the load must not have other users than the irn AND
398 * - the irn must not have a frame entity set
400 * @param cg The ia32 code generator
401 * @param h The height information of the irg
402 * @param block The block the Loads must/mustnot be in
403 * @param irn The irn to check
404 * @return 0 if irn is no candidate, 1 if left load can be used, 2 if right one, 3 for both
406 static ia32_am_cand_t is_am_candidate(heights_t *h, const ir_node *block, ir_node *irn) {
407 ir_node *in, *load, *other, *left, *right;
408 int is_cand = 0, cand;
412 if (is_ia32_Ld(irn) || is_ia32_St(irn) ||
413 is_ia32_vfild(irn) || is_ia32_vfist(irn) ||
414 is_ia32_xStoreSimple(irn))
417 if(get_ia32_frame_ent(irn) != NULL)
418 return IA32_AM_CAND_NONE;
420 left = get_irn_n(irn, 2);
421 arity = get_irn_arity(irn);
422 is_binary = get_ia32_am_arity(irn) == ia32_am_binary;
425 right = get_irn_n(irn, 3);
427 assert(get_ia32_am_arity(irn) == ia32_am_unary);
434 if (pred_is_specific_nodeblock(block, in, is_ia32_Ld)) {
435 #ifndef AGGRESSIVE_AM
437 n = ia32_get_irn_n_edges(in);
438 is_cand = (n == 1) ? 1 : is_cand; /* load with more than one user: no AM */
443 load = get_Proj_pred(in);
446 /* 8bit Loads are not supported (for binary ops),
447 * they cannot be used with every register */
448 if (get_ia32_am_arity(irn) == ia32_am_binary &&
449 get_mode_size_bits(get_ia32_ls_mode(load)) < 16) {
453 /* If there is a data dependency of other irn from load: cannot use AM */
454 if (is_cand && is_binary && get_nodes_block(other) == block) {
455 other = skip_Proj(other);
456 is_cand = heights_reachable_in_block(h, other, load) ? 0 : is_cand;
457 /* this could happen in loops */
458 is_cand = heights_reachable_in_block(h, load, irn) ? 0 : is_cand;
462 cand = is_cand ? IA32_AM_CAND_LEFT : IA32_AM_CAND_NONE;
466 if (pred_is_specific_nodeblock(block, in, is_ia32_Ld)) {
467 #ifndef AGGRESSIVE_AM
469 n = ia32_get_irn_n_edges(in);
470 is_cand = (n == 1) ? 1 : is_cand; /* load with more than one user: no AM */
475 load = get_Proj_pred(in);
478 /* 8bit Loads are not supported, they cannot be used with every register */
479 /* 8bit Loads are not supported (for binary ops),
480 * they cannot be used with every register */
481 if (get_ia32_am_arity(irn) == ia32_am_binary &&
482 get_mode_size_bits(get_ia32_ls_mode(load)) < 16) {
486 /* If there is a data dependency of other irn from load: cannot use load */
487 if (is_cand && is_binary && get_nodes_block(other) == block) {
488 other = skip_Proj(other);
489 is_cand = heights_reachable_in_block(h, other, load) ? 0 : is_cand;
490 /* this could happen in loops */
491 is_cand = heights_reachable_in_block(h, load, irn) ? 0 : is_cand;
495 cand = is_cand ? (cand | IA32_AM_CAND_RIGHT) : cand;
497 /* if the irn has a frame entity: we do not use address mode */
502 * Compares the base and index addr and the load/store entities
503 * and returns 1 if they are equal.
505 static int load_store_addr_is_equal(const ir_node *load, const ir_node *store,
506 const ir_node *addr_b, const ir_node *addr_i)
508 if(get_irn_n(load, 0) != addr_b)
510 if(get_irn_n(load, 1) != addr_i)
513 if(get_ia32_frame_ent(load) != get_ia32_frame_ent(store))
516 if(get_ia32_am_sc(load) != get_ia32_am_sc(store))
518 if(is_ia32_am_sc_sign(load) != is_ia32_am_sc_sign(store))
520 if(get_ia32_am_offs_int(load) != get_ia32_am_offs_int(store))
522 if(get_ia32_ls_mode(load) != get_ia32_ls_mode(store))
528 typedef enum _ia32_take_lea_attr {
529 IA32_LEA_ATTR_NONE = 0,
530 IA32_LEA_ATTR_BASE = (1 << 0),
531 IA32_LEA_ATTR_INDEX = (1 << 1),
532 IA32_LEA_ATTR_OFFS = (1 << 2),
533 IA32_LEA_ATTR_SCALE = (1 << 3),
534 IA32_LEA_ATTR_AMSC = (1 << 4),
535 IA32_LEA_ATTR_FENT = (1 << 5)
536 } ia32_take_lea_attr;
539 * Decides if we have to keep the LEA operand or if we can assimilate it.
541 static int do_new_lea(ir_node *irn, ir_node *base, ir_node *index, ir_node *lea,
542 int have_am_sc, ia32_code_gen_t *cg)
544 ir_entity *irn_ent = get_ia32_frame_ent(irn);
545 ir_entity *lea_ent = get_ia32_frame_ent(lea);
547 int is_noreg_base = be_is_NoReg(cg, base);
548 int is_noreg_index = be_is_NoReg(cg, index);
549 ia32_am_flavour_t am_flav = get_ia32_am_flavour(lea);
551 /* If the Add and the LEA both have a different frame entity set: keep */
552 if (irn_ent && lea_ent && (irn_ent != lea_ent))
553 return IA32_LEA_ATTR_NONE;
554 else if (! irn_ent && lea_ent)
555 ret_val |= IA32_LEA_ATTR_FENT;
557 /* If the Add and the LEA both have already an address mode symconst: keep */
558 if (have_am_sc && get_ia32_am_sc(lea))
559 return IA32_LEA_ATTR_NONE;
560 else if (get_ia32_am_sc(lea))
561 ret_val |= IA32_LEA_ATTR_AMSC;
563 /* Check the different base-index combinations */
565 if (! is_noreg_base && ! is_noreg_index) {
566 /* Assimilate if base is the lea and the LEA is just a Base + Offset calculation */
567 if ((base == lea) && ! (am_flav & ia32_I ? 1 : 0)) {
568 if (am_flav & ia32_O)
569 ret_val |= IA32_LEA_ATTR_OFFS;
571 ret_val |= IA32_LEA_ATTR_BASE;
574 return IA32_LEA_ATTR_NONE;
576 else if (! is_noreg_base && is_noreg_index) {
577 /* Base is set but index not */
579 /* Base points to LEA: assimilate everything */
580 if (am_flav & ia32_O)
581 ret_val |= IA32_LEA_ATTR_OFFS;
582 if (am_flav & ia32_S)
583 ret_val |= IA32_LEA_ATTR_SCALE;
584 if (am_flav & ia32_I)
585 ret_val |= IA32_LEA_ATTR_INDEX;
587 ret_val |= IA32_LEA_ATTR_BASE;
589 else if (am_flav & ia32_B ? 0 : 1) {
590 /* Base is not the LEA but the LEA is an index only calculation: assimilate */
591 if (am_flav & ia32_O)
592 ret_val |= IA32_LEA_ATTR_OFFS;
593 if (am_flav & ia32_S)
594 ret_val |= IA32_LEA_ATTR_SCALE;
596 ret_val |= IA32_LEA_ATTR_INDEX;
599 return IA32_LEA_ATTR_NONE;
601 else if (is_noreg_base && ! is_noreg_index) {
602 /* Index is set but not base */
604 /* Index points to LEA: assimilate everything */
605 if (am_flav & ia32_O)
606 ret_val |= IA32_LEA_ATTR_OFFS;
607 if (am_flav & ia32_S)
608 ret_val |= IA32_LEA_ATTR_SCALE;
609 if (am_flav & ia32_B)
610 ret_val |= IA32_LEA_ATTR_BASE;
612 ret_val |= IA32_LEA_ATTR_INDEX;
614 else if (am_flav & ia32_I ? 0 : 1) {
615 /* Index is not the LEA but the LEA is a base only calculation: assimilate */
616 if (am_flav & ia32_O)
617 ret_val |= IA32_LEA_ATTR_OFFS;
618 if (am_flav & ia32_S)
619 ret_val |= IA32_LEA_ATTR_SCALE;
621 ret_val |= IA32_LEA_ATTR_BASE;
624 return IA32_LEA_ATTR_NONE;
627 assert(0 && "There must have been set base or index");
634 * Adds res before irn into schedule if irn was scheduled.
635 * @param irn The schedule point
636 * @param res The node to be scheduled
638 static INLINE void try_add_to_sched(ir_node *irn, ir_node *res) {
639 if (sched_is_scheduled(irn))
640 sched_add_before(irn, res);
644 * Removes node from schedule if it is not used anymore. If irn is a mode_T node
645 * all it's Projs are removed as well.
646 * @param irn The irn to be removed from schedule
648 static INLINE void try_kill(ir_node *node)
650 if(get_irn_mode(node) == mode_T) {
651 const ir_edge_t *edge, *next;
652 foreach_out_edge_safe(node, edge, next) {
653 ir_node *proj = get_edge_src_irn(edge);
658 if(get_irn_n_edges(node) != 0)
661 if (sched_is_scheduled(node)) {
669 * Folds Add or Sub to LEA if possible
671 static ir_node *fold_addr(ia32_code_gen_t *cg, ir_node *irn) {
672 ir_graph *irg = get_irn_irg(irn);
673 dbg_info *dbg_info = get_irn_dbg_info(irn);
674 ir_node *block = get_nodes_block(irn);
676 ir_node *shift = NULL;
677 ir_node *lea_o = NULL;
687 ir_entity *am_sc = NULL;
688 ir_entity *lea_ent = NULL;
689 ir_node *noreg = ia32_new_NoReg_gp(cg);
690 ir_node *left, *right, *temp;
691 ir_node *base, *index;
692 int consumed_left_shift;
693 ia32_am_flavour_t am_flav;
695 if (is_ia32_Add(irn))
698 left = get_irn_n(irn, 2);
699 right = get_irn_n(irn, 3);
701 /* "normalize" arguments in case of add with two operands */
702 if (isadd && ! be_is_NoReg(cg, right)) {
703 /* put LEA == ia32_am_O as right operand */
704 if (is_ia32_Lea(left) && get_ia32_am_flavour(left) == ia32_am_O) {
705 set_irn_n(irn, 2, right);
706 set_irn_n(irn, 3, left);
712 /* put LEA != ia32_am_O as left operand */
713 if (is_ia32_Lea(right) && get_ia32_am_flavour(right) != ia32_am_O) {
714 set_irn_n(irn, 2, right);
715 set_irn_n(irn, 3, left);
721 /* put SHL as left operand iff left is NOT a LEA */
722 if (! is_ia32_Lea(left) && pred_is_specific_node(right, is_ia32_Shl)) {
723 set_irn_n(irn, 2, right);
724 set_irn_n(irn, 3, left);
737 /* check for operation with immediate */
738 if (is_ia32_ImmConst(irn)) {
739 tarval *tv = get_ia32_Immop_tarval(irn);
741 DBG((dbg, LEVEL_1, "\tfound op with imm const"));
743 offs_cnst = get_tarval_long(tv);
746 else if (isadd && is_ia32_ImmSymConst(irn)) {
747 DBG((dbg, LEVEL_1, "\tfound op with imm symconst"));
751 am_sc = get_ia32_Immop_symconst(irn);
752 am_sc_sign = is_ia32_am_sc_sign(irn);
755 /* determine the operand which needs to be checked */
756 temp = be_is_NoReg(cg, right) ? left : right;
758 /* check if right operand is AMConst (LEA with ia32_am_O) */
759 /* but we can only eat it up if there is no other symconst */
760 /* because the linker won't accept two symconsts */
761 if (! have_am_sc && is_ia32_Lea(temp) && get_ia32_am_flavour(temp) == ia32_am_O) {
762 DBG((dbg, LEVEL_1, "\tgot op with LEA am_O"));
764 offs_lea = get_ia32_am_offs_int(temp);
765 am_sc = get_ia32_am_sc(temp);
766 am_sc_sign = is_ia32_am_sc_sign(temp);
773 else if (temp == right)
778 /* default for add -> make right operand to index */
781 consumed_left_shift = -1;
783 DBG((dbg, LEVEL_1, "\tgot LEA candidate with index %+F\n", index));
785 /* determine the operand which needs to be checked */
787 if (is_ia32_Lea(left)) {
789 consumed_left_shift = 0;
792 /* check for SHL 1,2,3 */
793 if (pred_is_specific_node(temp, is_ia32_Shl)) {
794 ir_node *right = get_irn_n(temp, n_ia32_Shl_right);
796 if (is_ia32_Immediate(right)) {
797 const ia32_immediate_attr_t *attr
798 = get_ia32_immediate_attr_const(right);
799 long shiftval = attr->offset;
802 index = get_irn_n(temp, 2);
803 consumed_left_shift = consumed_left_shift < 0 ? 1 : 0;
807 DBG((dbg, LEVEL_1, "\tgot scaled index %+F\n", index));
813 if (! be_is_NoReg(cg, index)) {
814 /* if we have index, but left == right -> no base */
818 else if (consumed_left_shift == 1) {
819 /* -> base is right operand */
820 base = (right == lea_o) ? noreg : right;
825 /* Try to assimilate a LEA as left operand */
826 if (is_ia32_Lea(left) && (get_ia32_am_flavour(left) != ia32_am_O)) {
827 /* check if we can assimilate the LEA */
828 int take_attr = do_new_lea(irn, base, index, left, have_am_sc, cg);
830 if (take_attr == IA32_LEA_ATTR_NONE) {
831 DBG((dbg, LEVEL_1, "\tleave old LEA, creating new one\n"));
834 DBG((dbg, LEVEL_1, "\tgot LEA as left operand ... assimilating\n"));
835 lea = left; /* for statistics */
837 if (take_attr & IA32_LEA_ATTR_OFFS)
838 offs = get_ia32_am_offs_int(left);
840 if (take_attr & IA32_LEA_ATTR_AMSC) {
841 am_sc = get_ia32_am_sc(left);
843 am_sc_sign = is_ia32_am_sc_sign(left);
846 if (take_attr & IA32_LEA_ATTR_SCALE)
847 scale = get_ia32_am_scale(left);
849 if (take_attr & IA32_LEA_ATTR_BASE)
850 base = get_irn_n(left, 0);
852 if (take_attr & IA32_LEA_ATTR_INDEX)
853 index = get_irn_n(left, 1);
855 if (take_attr & IA32_LEA_ATTR_FENT)
856 lea_ent = get_ia32_frame_ent(left);
860 /* ok, we can create a new LEA */
862 res = new_rd_ia32_Lea(dbg_info, irg, block, base, index);
863 /* we don't want stuff before the barrier... */
864 if(be_is_NoReg(cg, base) && be_is_NoReg(cg, index)) {
865 add_irn_dep(res, get_irg_frame(irg));
868 /* add the old offset of a previous LEA */
869 add_ia32_am_offs_int(res, offs);
871 /* add the new offset */
873 add_ia32_am_offs_int(res, offs_cnst);
874 add_ia32_am_offs_int(res, offs_lea);
876 /* either lea_O-cnst, -cnst or -lea_O */
877 if (offs_cnst != 0) {
878 add_ia32_am_offs_int(res, offs_lea);
879 add_ia32_am_offs_int(res, -offs_cnst);
881 add_ia32_am_offs_int(res, offs_lea);
885 /* set the address mode symconst */
887 set_ia32_am_sc(res, am_sc);
889 set_ia32_am_sc_sign(res);
892 /* copy the frame entity (could be set in case of Add */
893 /* which was a FrameAddr) */
894 if (lea_ent != NULL) {
895 set_ia32_frame_ent(res, lea_ent);
896 set_ia32_use_frame(res);
898 set_ia32_frame_ent(res, get_ia32_frame_ent(irn));
899 if(is_ia32_use_frame(irn))
900 set_ia32_use_frame(res);
904 set_ia32_am_scale(res, scale);
907 /* determine new am flavour */
908 if (offs || offs_cnst || offs_lea || have_am_sc) {
911 if (! be_is_NoReg(cg, base)) {
914 if (! be_is_NoReg(cg, index)) {
920 set_ia32_am_flavour(res, am_flav);
922 set_ia32_op_type(res, ia32_AddrModeS);
924 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(cg, irn));
926 DBG((dbg, LEVEL_1, "\tLEA [%+F + %+F * %d + %d]\n", base, index, scale, get_ia32_am_offs_int(res)));
928 assert(irn && "Couldn't find result proj");
930 /* get the result Proj of the Add/Sub */
931 try_add_to_sched(irn, res);
933 /* exchange the old op with the new LEA */
937 /* we will exchange it, report here before the Proj is created */
938 if (shift && lea && lea_o) {
942 DBG_OPT_LEA4(irn, lea_o, lea, shift, res);
943 } else if (shift && lea) {
946 DBG_OPT_LEA3(irn, lea, shift, res);
947 } else if (shift && lea_o) {
950 DBG_OPT_LEA3(irn, lea_o, shift, res);
951 } else if (lea && lea_o) {
954 DBG_OPT_LEA3(irn, lea_o, lea, res);
957 DBG_OPT_LEA2(irn, shift, res);
960 DBG_OPT_LEA2(irn, lea, res);
963 DBG_OPT_LEA2(irn, lea_o, res);
965 DBG_OPT_LEA1(irn, res);
974 * Merges a Load/Store node with a LEA.
975 * @param irn The Load/Store node
978 static void merge_loadstore_lea(ir_node *irn, ir_node *lea) {
979 ir_entity *irn_ent = get_ia32_frame_ent(irn);
980 ir_entity *lea_ent = get_ia32_frame_ent(lea);
982 /* If the irn and the LEA both have a different frame entity set: do not merge */
983 if (irn_ent != NULL && lea_ent != NULL && (irn_ent != lea_ent))
985 else if (irn_ent == NULL && lea_ent != NULL) {
986 set_ia32_frame_ent(irn, lea_ent);
987 set_ia32_use_frame(irn);
990 /* get the AM attributes from the LEA */
991 add_ia32_am_offs_int(irn, get_ia32_am_offs_int(lea));
992 set_ia32_am_scale(irn, get_ia32_am_scale(lea));
993 set_ia32_am_flavour(irn, get_ia32_am_flavour(lea));
995 set_ia32_am_sc(irn, get_ia32_am_sc(lea));
996 if (is_ia32_am_sc_sign(lea))
997 set_ia32_am_sc_sign(irn);
999 set_ia32_op_type(irn, is_ia32_Ld(irn) ? ia32_AddrModeS : ia32_AddrModeD);
1001 /* set base and index */
1002 set_irn_n(irn, 0, get_irn_n(lea, 0));
1003 set_irn_n(irn, 1, get_irn_n(lea, 1));
1007 /* clear remat flag */
1008 set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
1010 if (is_ia32_Ld(irn))
1011 DBG_OPT_LOAD_LEA(lea, irn);
1013 DBG_OPT_STORE_LEA(lea, irn);
1018 * Sets new_right index of irn to right and new_left index to left.
1019 * Also exchange left and right
1021 static void exchange_left_right(ir_node *irn, ir_node **left, ir_node **right,
1022 int new_left, int new_right)
1026 assert(is_ia32_commutative(irn));
1028 set_irn_n(irn, new_right, *right);
1029 set_irn_n(irn, new_left, *left);
1035 /* this is only needed for Compares, but currently ALL nodes
1036 * have this attribute :-) */
1037 set_ia32_pncode(irn, get_inversed_pnc(get_ia32_pncode(irn)));
1041 * Performs address calculation optimization (create LEAs if possible)
1043 static void optimize_lea(ia32_code_gen_t *cg, ir_node *irn) {
1044 if (! is_ia32_irn(irn))
1047 /* Following cases can occur: */
1048 /* - Sub (l, imm) -> LEA [base - offset] */
1049 /* - Sub (l, r == LEA with ia32_am_O) -> LEA [base - offset] */
1050 /* - Add (l, imm) -> LEA [base + offset] */
1051 /* - Add (l, r == LEA with ia32_am_O) -> LEA [base + offset] */
1052 /* - Add (l == LEA with ia32_am_O, r) -> LEA [base + offset] */
1053 /* - Add (l, r) -> LEA [base + index * scale] */
1054 /* with scale > 1 iff l/r == shl (1,2,3) */
1055 if (is_ia32_Sub(irn) || is_ia32_Add(irn)) {
1058 if(!is_addr_candidate(irn))
1061 DBG((dbg, LEVEL_1, "\tfound address calculation candidate %+F ... ", irn));
1062 res = fold_addr(cg, irn);
1065 DB((dbg, LEVEL_1, "transformed into %+F\n", res));
1067 DB((dbg, LEVEL_1, "not transformed\n"));
1068 } else if (is_ia32_Ld(irn) || is_ia32_St(irn)) {
1069 /* - Load -> LEA into Load } TODO: If the LEA is used by more than one Load/Store */
1070 /* - Store -> LEA into Store } it might be better to keep the LEA */
1071 ir_node *left = get_irn_n(irn, 0);
1073 if (is_ia32_Lea(left)) {
1074 const ir_edge_t *edge, *ne;
1077 /* merge all Loads/Stores connected to this LEA with the LEA */
1078 foreach_out_edge_safe(left, edge, ne) {
1079 src = get_edge_src_irn(edge);
1081 if (src && (get_edge_src_pos(edge) == 0) && (is_ia32_Ld(src) || is_ia32_St(src))) {
1082 DBG((dbg, LEVEL_1, "\nmerging %+F into %+F\n", left, irn));
1083 if (! is_ia32_got_lea(src))
1084 merge_loadstore_lea(src, left);
1085 set_ia32_got_lea(src);
1092 static void optimize_conv_store(ir_node *node)
1096 ir_mode *store_mode;
1098 if(!is_ia32_Store(node) && !is_ia32_Store8Bit(node))
1101 pred = get_irn_n(node, 2);
1102 if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
1105 /* the store only stores the lower bits, so we only need the conv
1106 * it it shrinks the mode */
1107 conv_mode = get_ia32_ls_mode(pred);
1108 store_mode = get_ia32_ls_mode(node);
1109 if(get_mode_size_bits(conv_mode) < get_mode_size_bits(store_mode))
1112 set_irn_n(node, 2, get_irn_n(pred, 2));
1113 if(get_irn_n_edges(pred) == 0) {
1118 static void optimize_load_conv(ir_node *node)
1120 ir_node *pred, *predpred;
1124 if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
1127 pred = get_irn_n(node, 2);
1131 predpred = get_Proj_pred(pred);
1132 if(!is_ia32_Load(predpred))
1135 /* the load is sign extending the upper bits, so we only need the conv
1136 * if it shrinks the mode */
1137 load_mode = get_ia32_ls_mode(predpred);
1138 conv_mode = get_ia32_ls_mode(node);
1139 if(get_mode_size_bits(conv_mode) < get_mode_size_bits(load_mode))
1142 if(get_mode_sign(conv_mode) != get_mode_sign(load_mode)) {
1143 /* change the load if it has only 1 user */
1144 if(get_irn_n_edges(pred) == 1) {
1146 if(get_mode_sign(conv_mode)) {
1147 newmode = find_signed_mode(load_mode);
1149 newmode = find_unsigned_mode(load_mode);
1151 assert(newmode != NULL);
1152 set_ia32_ls_mode(predpred, newmode);
1154 /* otherwise we have to keep the conv */
1160 exchange(node, pred);
1163 static void optimize_conv_conv(ir_node *node)
1169 if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
1172 assert(n_ia32_Conv_I2I_val == n_ia32_Conv_I2I8Bit_val);
1173 pred = get_irn_n(node, n_ia32_Conv_I2I_val);
1174 if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
1177 /* we know that after a conv, the upper bits are sign extended
1178 * so we only need the 2nd conv if it shrinks the mode */
1179 conv_mode = get_ia32_ls_mode(node);
1180 pred_mode = get_ia32_ls_mode(pred);
1181 if(get_mode_size_bits(conv_mode) < get_mode_size_bits(pred_mode))
1184 /* we can't eliminate an upconv signed->unsigned */
1185 if (get_mode_size_bits(conv_mode) != get_mode_size_bits(pred_mode) &&
1186 !get_mode_sign(conv_mode) && get_mode_sign(pred_mode))
1190 exchange(node, pred);
1193 static void optimize_node(ir_node *node, void *env)
1195 ia32_code_gen_t *cg = env;
1197 optimize_load_conv(node);
1198 optimize_conv_store(node);
1199 optimize_conv_conv(node);
1200 optimize_lea(cg, node);
1204 * Checks for address mode patterns and performs the
1205 * necessary transformations.
1206 * This function is called by a walker.
1208 static void optimize_am(ir_node *irn, void *env) {
1209 ia32_am_opt_env_t *am_opt_env = env;
1210 ia32_code_gen_t *cg = am_opt_env->cg;
1211 ir_graph *irg = get_irn_irg(irn);
1212 heights_t *h = am_opt_env->h;
1213 ir_node *block, *left, *right;
1214 ir_node *store, *load, *mem_proj;
1215 ir_node *addr_b, *addr_i;
1216 int need_exchange_on_fail = 0;
1217 ia32_am_type_t am_support;
1218 ia32_am_arity_t am_arity;
1219 ia32_am_cand_t cand;
1220 ia32_am_cand_t orig_cand;
1222 int source_possible;
1224 static const arch_register_req_t dest_out_reg_req_0 = {
1225 arch_register_req_type_none,
1226 NULL, /* regclass */
1227 NULL, /* limit bitset */
1229 -1 /* different pos */
1231 static const arch_register_req_t *dest_am_out_reqs[] = {
1235 if (!is_ia32_irn(irn) || is_ia32_Ld(irn) || is_ia32_St(irn))
1237 if (is_ia32_Lea(irn))
1240 am_support = get_ia32_am_support(irn);
1241 am_arity = get_ia32_am_arity(irn);
1242 block = get_nodes_block(irn);
1244 /* fold following patterns:
1245 * - op -> Load into AMop with am_Source
1247 * - op is am_Source capable AND
1248 * - the Load is only used by this op AND
1249 * - the Load is in the same block
1250 * - Store -> op -> Load into AMop with am_Dest
1252 * - op is am_Dest capable AND
1253 * - the Store uses the same address as the Load AND
1254 * - the Load is only used by this op AND
1255 * - the Load and Store are in the same block AND
1256 * - nobody else uses the result of the op
1258 if (am_support == ia32_am_None)
1261 assert(am_arity == ia32_am_unary || am_arity == ia32_am_binary);
1263 cand = is_am_candidate(h, block, irn);
1264 if (cand == IA32_AM_CAND_NONE)
1268 DBG((dbg, LEVEL_1, "\tfound address mode candidate %+F (candleft %d candright %d)... \n", irn,
1269 cand & IA32_AM_CAND_LEFT, cand & IA32_AM_CAND_RIGHT));
1271 left = get_irn_n(irn, 2);
1272 if (am_arity == ia32_am_unary) {
1273 assert(get_irn_arity(irn) >= 4);
1275 assert(cand == IA32_AM_CAND_BOTH);
1277 assert(get_irn_arity(irn) >= 5);
1278 right = get_irn_n(irn, 3);
1281 dest_possible = am_support & ia32_am_Dest ? 1 : 0;
1282 source_possible = am_support & ia32_am_Source ? 1 : 0;
1284 DBG((dbg, LEVEL_2, "\tdest_possible %d source_possible %d ... \n", dest_possible, source_possible));
1286 if (dest_possible) {
1291 /* we should only have 1 user which is a store */
1292 if (ia32_get_irn_n_edges(irn) == 1) {
1293 ir_node *succ = get_edge_src_irn(get_irn_out_edge_first(irn));
1295 if (is_ia32_xStore(succ) || is_ia32_Store(succ)) {
1297 addr_b = get_irn_n(store, 0);
1298 addr_i = get_irn_n(store, 1);
1302 if (store == NULL) {
1303 DBG((dbg, LEVEL_2, "\tno store found, not using dest_mode\n"));
1308 if (dest_possible) {
1309 /* normalize nodes, we need the interesting load on the left side */
1310 if (cand & IA32_AM_CAND_RIGHT) {
1311 load = get_Proj_pred(right);
1312 if (load_store_addr_is_equal(load, store, addr_b, addr_i)
1313 && node_is_ia32_comm(irn)) {
1314 DBG((dbg, LEVEL_2, "\texchanging left/right\n"));
1315 exchange_left_right(irn, &left, &right, 3, 2);
1316 need_exchange_on_fail ^= 1;
1317 if (cand == IA32_AM_CAND_RIGHT)
1318 cand = IA32_AM_CAND_LEFT;
1323 if (dest_possible) {
1324 if(cand & IA32_AM_CAND_LEFT && is_Proj(left)) {
1325 load = get_Proj_pred(left);
1327 #ifndef AGGRESSIVE_AM
1328 /* we have to be the only user of the load */
1329 if (get_irn_n_edges(left) > 1) {
1330 DBG((dbg, LEVEL_2, "\tmatching load has too may users, not using dest_mode\n"));
1335 DBG((dbg, LEVEL_2, "\tno matching load found, not using dest_mode"));
1340 if (dest_possible) {
1341 /* the store has to use the loads memory or the same memory
1343 ir_node *loadmem = get_irn_n(load, 2);
1344 ir_node *storemem = get_irn_n(store, 3);
1345 assert(get_irn_mode(loadmem) == mode_M);
1346 assert(get_irn_mode(storemem) == mode_M);
1347 /* TODO there could be a sync between store and load... */
1348 if(storemem != loadmem && (!is_Proj(storemem) || get_Proj_pred(storemem) != load)) {
1349 DBG((dbg, LEVEL_2, "\tload/store using different memories, not using dest_mode"));
1354 if (dest_possible) {
1355 /* Compare Load and Store address */
1356 if (!load_store_addr_is_equal(load, store, addr_b, addr_i)) {
1357 DBG((dbg, LEVEL_2, "\taddresses not equal, not using dest_mode"));
1362 if (dest_possible) {
1363 ir_mode *lsmode = get_ia32_ls_mode(load);
1364 if(get_mode_size_bits(lsmode) != 32) {
1369 if (dest_possible) {
1370 /* all conditions fullfilled, do the transformation */
1371 assert(cand & IA32_AM_CAND_LEFT);
1373 /* set new base, index and attributes */
1374 set_irn_n(irn, 0, addr_b);
1375 set_irn_n(irn, 1, addr_i);
1376 add_ia32_am_offs_int(irn, get_ia32_am_offs_int(load));
1377 set_ia32_am_scale(irn, get_ia32_am_scale(load));
1378 set_ia32_am_flavour(irn, get_ia32_am_flavour(load));
1379 set_ia32_op_type(irn, ia32_AddrModeD);
1380 set_ia32_frame_ent(irn, get_ia32_frame_ent(load));
1381 set_ia32_ls_mode(irn, get_ia32_ls_mode(load));
1383 set_ia32_am_sc(irn, get_ia32_am_sc(load));
1384 if (is_ia32_am_sc_sign(load))
1385 set_ia32_am_sc_sign(irn);
1387 /* connect to Load memory and disconnect Load */
1388 if (am_arity == ia32_am_binary) {
1390 set_irn_n(irn, 4, get_irn_n(load, 2));
1391 set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2));
1394 set_irn_n(irn, 3, get_irn_n(load, 2));
1395 set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2));
1398 /* change node mode and out register requirements */
1399 set_irn_mode(irn, mode_M);
1400 set_ia32_out_req_all(irn, dest_am_out_reqs);
1402 /* connect the memory Proj of the Store to the op */
1403 edges_reroute(store, irn, irg);
1405 /* clear remat flag */
1406 set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
1410 DBG_OPT_AM_D(load, store, irn);
1412 DB((dbg, LEVEL_1, "merged with %+F and %+F into dest AM\n", load, store));
1413 need_exchange_on_fail = 0;
1414 source_possible = 0;
1417 if (source_possible) {
1418 /* normalize ops, we need the load on the right */
1419 if(cand == IA32_AM_CAND_LEFT) {
1420 if(node_is_ia32_comm(irn)) {
1421 exchange_left_right(irn, &left, &right, 3, 2);
1422 need_exchange_on_fail ^= 1;
1423 cand = IA32_AM_CAND_RIGHT;
1425 source_possible = 0;
1430 if (source_possible) {
1431 /* all conditions fullfilled, do transform */
1432 assert(cand & IA32_AM_CAND_RIGHT);
1433 load = get_Proj_pred(right);
1435 if(get_irn_n_edges(right) > 1) {
1436 source_possible = 0;
1438 /* TODO: this isn't really needed, but the code below is buggy
1439 as heights won't get recomputed when the graph is reconstructed
1440 so we can only transform loads with the result proj only */
1441 if(get_irn_n_edges(load) > 1) {
1442 source_possible = 0;
1446 if (source_possible) {
1447 ir_mode *ls_mode = get_ia32_ls_mode(load);
1448 if(get_mode_size_bits(ls_mode) != 32 || ls_mode == mode_D)
1449 source_possible = 0;
1453 if (source_possible) {
1454 addr_b = get_irn_n(load, 0);
1455 addr_i = get_irn_n(load, 1);
1457 /* set new base, index and attributes */
1458 set_irn_n(irn, 0, addr_b);
1459 set_irn_n(irn, 1, addr_i);
1460 add_ia32_am_offs_int(irn, get_ia32_am_offs_int(load));
1461 set_ia32_am_scale(irn, get_ia32_am_scale(load));
1462 set_ia32_am_flavour(irn, get_ia32_am_flavour(load));
1463 set_ia32_op_type(irn, ia32_AddrModeS);
1464 set_ia32_frame_ent(irn, get_ia32_frame_ent(load));
1466 /* set ls_mode if not already present (conv nodes already have ls_mode
1468 if(get_ia32_ls_mode(irn) == NULL) {
1469 set_ia32_ls_mode(irn, get_ia32_ls_mode(load));
1472 set_ia32_am_sc(irn, get_ia32_am_sc(load));
1473 if (is_ia32_am_sc_sign(load))
1474 set_ia32_am_sc_sign(irn);
1476 /* clear remat flag */
1477 set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
1479 if (is_ia32_use_frame(load)) {
1480 if(get_ia32_frame_ent(load) == NULL) {
1481 set_ia32_need_stackent(irn);
1483 set_ia32_use_frame(irn);
1486 /* connect to Load memory and disconnect Load */
1487 if (am_arity == ia32_am_binary) {
1489 right = ia32_get_admissible_noreg(cg, irn, 3);
1490 set_irn_n(irn, 3, right);
1491 set_irn_n(irn, 4, get_irn_n(load, n_ia32_Load_mem));
1494 right = ia32_get_admissible_noreg(cg, irn, 2);
1495 set_irn_n(irn, 2, right);
1496 set_irn_n(irn, 3, get_irn_n(load, n_ia32_Load_mem));
1499 DBG_OPT_AM_S(load, irn);
1501 /* If Load has a memory Proj, connect it to the op */
1502 mem_proj = ia32_get_proj_for_mode(load, mode_M);
1503 if (mem_proj != NULL) {
1505 ir_mode *mode = get_irn_mode(irn);
1507 if(mode != mode_T) {
1508 res_proj = new_rd_Proj(get_irn_dbg_info(irn), irg,
1509 get_nodes_block(irn),
1510 new_Unknown(mode_T), mode, 0);
1511 set_irn_mode(irn, mode_T);
1512 edges_reroute(irn, res_proj, irg);
1513 set_Proj_pred(res_proj, irn);
1515 set_Proj_pred(mem_proj, irn);
1516 set_Proj_proj(mem_proj, 1);
1518 /* hacky: we need some proj number which is not used yet... */
1519 set_Proj_proj(mem_proj, -1);
1520 set_Proj_pred(mem_proj, irn);
1525 need_exchange_on_fail = 0;
1527 /* immediate are only allowed on the right side */
1528 if(is_ia32_Immediate(left)) {
1529 exchange_left_right(irn, &left, &right, 3, 2);
1532 DB((dbg, LEVEL_1, "merged with %+F into source AM\n", load));
1535 /* was exchanged but optimize failed: exchange back */
1536 if (need_exchange_on_fail) {
1537 exchange_left_right(irn, &left, &right, 3, 2);
1542 * Performs conv and address mode optimization.
1544 void ia32_optimize_graph(ia32_code_gen_t *cg) {
1545 /* if we are supposed to do AM or LEA optimization: recalculate edges */
1546 if (! (cg->opt & (IA32_OPT_DOAM | IA32_OPT_LEA))) {
1547 /* no optimizations at all */
1551 /* beware: we cannot optimize LEA and AM in one run because */
1552 /* LEA optimization adds new nodes to the irg which */
1553 /* invalidates the phase data */
1555 if (cg->opt & IA32_OPT_LEA) {
1556 irg_walk_blkwise_graph(cg->irg, NULL, optimize_node, cg);
1560 be_dump(cg->irg, "-lea", dump_ir_block_graph_sched);
1562 /* hack for now, so these don't get created during optimize, because then
1563 * they will be unknown to the heights module
1565 ia32_new_NoReg_gp(cg);
1566 ia32_new_NoReg_fp(cg);
1567 ia32_new_NoReg_vfp(cg);
1569 if (cg->opt & IA32_OPT_DOAM) {
1570 /* we need height information for am optimization */
1571 heights_t *h = heights_new(cg->irg);
1572 ia32_am_opt_env_t env;
1577 irg_walk_blkwise_graph(cg->irg, NULL, optimize_am, &env);
1583 void ia32_init_optimize(void)
1585 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.optimize");