2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Implements several optimizations for IA32.
23 * @author Matthias Braun, Christian Wuerdig
34 #include "firm_types.h"
46 #include "../benode_t.h"
47 #include "../besched_t.h"
48 #include "../bepeephole.h"
50 #include "ia32_new_nodes.h"
51 #include "ia32_optimize.h"
52 #include "bearch_ia32_t.h"
53 #include "gen_ia32_regalloc_if.h"
54 #include "ia32_common_transform.h"
55 #include "ia32_transform.h"
56 #include "ia32_dbg_stat.h"
57 #include "ia32_util.h"
58 #include "ia32_architecture.h"
60 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
62 static const arch_env_t *arch_env;
63 static ia32_code_gen_t *cg;
65 static void copy_mark(const ir_node *old, ir_node *new)
67 if (is_ia32_is_reload(old))
68 set_ia32_is_reload(new);
69 if (is_ia32_is_spill(old))
70 set_ia32_is_spill(new);
71 if (is_ia32_is_remat(old))
72 set_ia32_is_remat(new);
76 * Returns non-zero if the given node produces
79 * @param node the node to check
80 * @param pn if >= 0, the projection number of the used result
82 static int produces_zero_flag(ir_node *node, int pn)
85 const ia32_immediate_attr_t *imm_attr;
87 if (!is_ia32_irn(node))
91 if (pn != pn_ia32_res)
95 switch (get_ia32_irn_opcode(node)) {
110 assert(n_ia32_ShlD_count == n_ia32_ShrD_count);
111 count = get_irn_n(node, n_ia32_ShlD_count);
112 goto check_shift_amount;
117 assert(n_ia32_Shl_count == n_ia32_Shr_count
118 && n_ia32_Shl_count == n_ia32_Sar_count);
119 count = get_irn_n(node, n_ia32_Shl_count);
121 /* when shift count is zero the flags are not affected, so we can only
122 * do this for constants != 0 */
123 if (!is_ia32_Immediate(count))
126 imm_attr = get_ia32_immediate_attr_const(count);
127 if (imm_attr->symconst != NULL)
129 if ((imm_attr->offset & 0x1f) == 0)
140 * If the given node has not mode_T, creates a mode_T version (with a result Proj).
142 * @param node the node to change
144 * @return the new mode_T node (if the mode was changed) or node itself
146 static ir_node *turn_into_mode_t(ir_node *node)
151 const arch_register_t *reg;
153 if(get_irn_mode(node) == mode_T)
156 assert(get_irn_mode(node) == mode_Iu);
158 new_node = exact_copy(node);
159 set_irn_mode(new_node, mode_T);
161 block = get_nodes_block(new_node);
162 res_proj = new_r_Proj(current_ir_graph, block, new_node, mode_Iu,
165 reg = arch_get_irn_register(arch_env, node);
166 arch_set_irn_register(arch_env, res_proj, reg);
168 sched_add_before(node, new_node);
169 be_peephole_exchange(node, res_proj);
174 * Replace Cmp(x, 0) by a Test(x, x)
176 static void peephole_ia32_Cmp(ir_node *const node)
179 ia32_immediate_attr_t const *imm;
186 ia32_attr_t const *attr;
190 arch_register_t const *reg;
191 ir_edge_t const *edge;
192 ir_edge_t const *tmp;
194 if (get_ia32_op_type(node) != ia32_Normal)
197 right = get_irn_n(node, n_ia32_Cmp_right);
198 if (!is_ia32_Immediate(right))
201 imm = get_ia32_immediate_attr_const(right);
202 if (imm->symconst != NULL || imm->offset != 0)
205 dbgi = get_irn_dbg_info(node);
206 irg = current_ir_graph;
207 block = get_nodes_block(node);
208 noreg = ia32_new_NoReg_gp(cg);
209 nomem = get_irg_no_mem(irg);
210 op = get_irn_n(node, n_ia32_Cmp_left);
211 attr = get_irn_generic_attr(node);
212 ins_permuted = attr->data.ins_permuted;
213 cmp_unsigned = attr->data.cmp_unsigned;
215 if (is_ia32_Cmp(node)) {
216 test = new_rd_ia32_Test(dbgi, irg, block, noreg, noreg, nomem,
217 op, op, ins_permuted, cmp_unsigned);
219 test = new_rd_ia32_Test8Bit(dbgi, irg, block, noreg, noreg, nomem,
220 op, op, ins_permuted, cmp_unsigned);
222 set_ia32_ls_mode(test, get_ia32_ls_mode(node));
224 reg = arch_get_irn_register(arch_env, node);
225 arch_set_irn_register(arch_env, test, reg);
227 foreach_out_edge_safe(node, edge, tmp) {
228 ir_node *const user = get_edge_src_irn(edge);
231 exchange(user, test);
234 sched_add_before(node, test);
235 copy_mark(node, test);
236 be_peephole_exchange(node, test);
240 * Peephole optimization for Test instructions.
241 * We can remove the Test, if a zero flags was produced which is still
244 static void peephole_ia32_Test(ir_node *node)
246 ir_node *left = get_irn_n(node, n_ia32_Test_left);
247 ir_node *right = get_irn_n(node, n_ia32_Test_right);
253 const ir_edge_t *edge;
255 assert(n_ia32_Test_left == n_ia32_Test8Bit_left
256 && n_ia32_Test_right == n_ia32_Test8Bit_right);
258 /* we need a test for 0 */
262 block = get_nodes_block(node);
263 if(get_nodes_block(left) != block)
267 pn = get_Proj_proj(left);
268 left = get_Proj_pred(left);
271 /* happens rarely, but if it does code will panic' */
272 if (is_ia32_Unknown_GP(left))
275 /* walk schedule up and abort when we find left or some other node destroys
277 schedpoint = sched_prev(node);
278 while(schedpoint != left) {
279 schedpoint = sched_prev(schedpoint);
280 if(arch_irn_is(arch_env, schedpoint, modify_flags))
282 if(schedpoint == block)
283 panic("couldn't find left");
286 /* make sure only Lg/Eq tests are used */
287 foreach_out_edge(node, edge) {
288 ir_node *user = get_edge_src_irn(edge);
289 int pnc = get_ia32_condcode(user);
291 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg) {
296 if(!produces_zero_flag(left, pn))
299 left = turn_into_mode_t(left);
301 flags_mode = ia32_reg_classes[CLASS_ia32_flags].mode;
302 flags_proj = new_r_Proj(current_ir_graph, block, left, flags_mode,
304 arch_set_irn_register(arch_env, flags_proj, &ia32_flags_regs[REG_EFLAGS]);
306 assert(get_irn_mode(node) != mode_T);
308 be_peephole_exchange(node, flags_proj);
312 * AMD Athlon works faster when RET is not destination of
313 * conditional jump or directly preceded by other jump instruction.
314 * Can be avoided by placing a Rep prefix before the return.
316 static void peephole_ia32_Return(ir_node *node) {
317 ir_node *block, *irn;
319 if (!ia32_cg_config.use_pad_return)
322 block = get_nodes_block(node);
324 /* check if this return is the first on the block */
325 sched_foreach_reverse_from(node, irn) {
326 switch (get_irn_opcode(irn)) {
328 /* the return node itself, ignore */
331 /* ignore the barrier, no code generated */
334 /* arg, IncSP 0 nodes might occur, ignore these */
335 if (be_get_IncSP_offset(irn) == 0)
345 /* ensure, that the 3 byte return is generated */
346 be_Return_set_emit_pop(node, 1);
349 /* only optimize up to 48 stores behind IncSPs */
350 #define MAXPUSH_OPTIMIZE 48
353 * Tries to create Push's from IncSP, Store combinations.
354 * The Stores are replaced by Push's, the IncSP is modified
355 * (possibly into IncSP 0, but not removed).
357 static void peephole_IncSP_Store_to_push(ir_node *irn)
363 ir_node *stores[MAXPUSH_OPTIMIZE];
368 ir_node *first_push = NULL;
369 ir_edge_t const *edge;
370 ir_edge_t const *next;
372 memset(stores, 0, sizeof(stores));
374 assert(be_is_IncSP(irn));
376 inc_ofs = be_get_IncSP_offset(irn);
381 * We first walk the schedule after the IncSP node as long as we find
382 * suitable Stores that could be transformed to a Push.
383 * We save them into the stores array which is sorted by the frame offset/4
384 * attached to the node
387 for (node = sched_next(irn); !sched_is_end(node); node = sched_next(node)) {
392 /* it has to be a Store */
393 if (!is_ia32_Store(node))
396 /* it has to use our sp value */
397 if (get_irn_n(node, n_ia32_base) != irn)
399 /* Store has to be attached to NoMem */
400 mem = get_irn_n(node, n_ia32_mem);
404 /* unfortunately we can't support the full AMs possible for push at the
405 * moment. TODO: fix this */
406 if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
409 offset = get_ia32_am_offs_int(node);
410 /* we should NEVER access uninitialized stack BELOW the current SP */
413 /* storing at half-slots is bad */
414 if ((offset & 3) != 0)
417 if (inc_ofs - 4 < offset || offset >= MAXPUSH_OPTIMIZE * 4)
419 storeslot = offset >> 2;
421 /* storing into the same slot twice is bad (and shouldn't happen...) */
422 if (stores[storeslot] != NULL)
425 stores[storeslot] = node;
426 if (storeslot > maxslot)
432 for (i = -1; i < maxslot; ++i) {
433 if (stores[i + 1] == NULL)
437 /* walk through the Stores and create Pushs for them */
438 block = get_nodes_block(irn);
439 spmode = get_irn_mode(irn);
441 for (; i >= 0; --i) {
442 const arch_register_t *spreg;
444 ir_node *val, *mem, *mem_proj;
445 ir_node *store = stores[i];
446 ir_node *noreg = ia32_new_NoReg_gp(cg);
448 val = get_irn_n(store, n_ia32_unary_op);
449 mem = get_irn_n(store, n_ia32_mem);
450 spreg = arch_get_irn_register(cg->arch_env, curr_sp);
452 push = new_rd_ia32_Push(get_irn_dbg_info(store), irg, block, noreg, noreg, mem, val, curr_sp);
453 copy_mark(store, push);
455 if (first_push == NULL)
458 sched_add_after(curr_sp, push);
460 /* create stackpointer Proj */
461 curr_sp = new_r_Proj(irg, block, push, spmode, pn_ia32_Push_stack);
462 arch_set_irn_register(cg->arch_env, curr_sp, spreg);
464 /* create memory Proj */
465 mem_proj = new_r_Proj(irg, block, push, mode_M, pn_ia32_Push_M);
467 /* use the memproj now */
468 be_peephole_exchange(store, mem_proj);
473 foreach_out_edge_safe(irn, edge, next) {
474 ir_node *const src = get_edge_src_irn(edge);
475 int const pos = get_edge_src_pos(edge);
477 if (src == first_push)
480 set_irn_n(src, pos, curr_sp);
483 be_set_IncSP_offset(irn, inc_ofs);
487 static void peephole_store_incsp(ir_node *store)
496 ir_node *am_base = get_irn_n(store, n_ia32_Store_base);
497 if (!be_is_IncSP(am_base)
498 || get_nodes_block(am_base) != get_nodes_block(store))
500 mem = get_irn_n(store, n_ia32_Store_mem);
501 if (!is_ia32_NoReg_GP(get_irn_n(store, n_ia32_Store_index))
505 int incsp_offset = be_get_IncSP_offset(am_base);
506 if (incsp_offset <= 0)
509 /* we have to be at offset 0 */
510 int my_offset = get_ia32_am_offs_int(store);
511 if (my_offset != 0) {
512 /* TODO here: find out wether there is a store with offset 0 before
513 * us and wether we can move it down to our place */
516 ir_mode *ls_mode = get_ia32_ls_mode(store);
517 int my_store_size = get_mode_size_bytes(ls_mode);
519 if (my_offset + my_store_size > incsp_offset)
522 /* correctness checking:
523 - noone else must write to that stackslot
524 (because after translation incsp won't allocate it anymore)
526 sched_foreach_reverse_from(store, node) {
532 /* make sure noone else can use the space on the stack */
533 arity = get_irn_arity(node);
534 for (i = 0; i < arity; ++i) {
535 ir_node *pred = get_irn_n(node, i);
539 if (i == n_ia32_base &&
540 (get_ia32_op_type(node) == ia32_AddrModeS
541 || get_ia32_op_type(node) == ia32_AddrModeD)) {
542 int node_offset = get_ia32_am_offs_int(node);
543 ir_mode *node_ls_mode = get_ia32_ls_mode(node);
544 int node_size = get_mode_size_bytes(node);
545 /* overlapping with our position? abort */
546 if (node_offset < my_offset + my_store_size
547 && node_offset + node_size >= my_offset)
549 /* otherwise it's fine */
553 /* strange use of esp: abort */
558 /* all ok, change to push */
559 dbgi = get_irn_dbg_info(store);
560 block = get_nodes_block(store);
561 noreg = ia32_new_NoReg_gp(cg);
564 push = new_rd_ia32_Push(dbgi, irg, block, noreg, noreg, mem,
566 create_push(dbgi, current_ir_graph, block, am_base, store);
571 * Return true if a mode can be stored in the GP register set
573 static INLINE int mode_needs_gp_reg(ir_mode *mode) {
574 if (mode == mode_fpcw)
576 if (get_mode_size_bits(mode) > 32)
578 return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
582 * Tries to create Pops from Load, IncSP combinations.
583 * The Loads are replaced by Pops, the IncSP is modified
584 * (possibly into IncSP 0, but not removed).
586 static void peephole_Load_IncSP_to_pop(ir_node *irn)
588 const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
589 int i, maxslot, inc_ofs, ofs;
590 ir_node *node, *pred_sp, *block;
591 ir_node *loads[MAXPUSH_OPTIMIZE];
593 unsigned regmask = 0;
594 unsigned copymask = ~0;
596 memset(loads, 0, sizeof(loads));
597 assert(be_is_IncSP(irn));
599 inc_ofs = -be_get_IncSP_offset(irn);
604 * We first walk the schedule before the IncSP node as long as we find
605 * suitable Loads that could be transformed to a Pop.
606 * We save them into the stores array which is sorted by the frame offset/4
607 * attached to the node
610 pred_sp = be_get_IncSP_pred(irn);
611 for (node = sched_prev(irn); !sched_is_end(node); node = sched_prev(node)) {
614 const arch_register_t *sreg, *dreg;
616 /* it has to be a Load */
617 if (!is_ia32_Load(node)) {
618 if (be_is_Copy(node)) {
619 if (!mode_needs_gp_reg(get_irn_mode(node))) {
620 /* not a GP copy, ignore */
623 dreg = arch_get_irn_register(arch_env, node);
624 sreg = arch_get_irn_register(arch_env, be_get_Copy_op(node));
625 if (regmask & copymask & (1 << sreg->index)) {
628 if (regmask & copymask & (1 << dreg->index)) {
631 /* we CAN skip Copies if neither the destination nor the source
632 * is not in our regmask, ie none of our future Pop will overwrite it */
633 regmask |= (1 << dreg->index) | (1 << sreg->index);
634 copymask &= ~((1 << dreg->index) | (1 << sreg->index));
640 /* we can handle only GP loads */
641 if (!mode_needs_gp_reg(get_ia32_ls_mode(node)))
644 /* it has to use our predecessor sp value */
645 if (get_irn_n(node, n_ia32_base) != pred_sp) {
646 /* it would be ok if this load does not use a Pop result,
647 * but we do not check this */
651 /* should have NO index */
652 if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
655 offset = get_ia32_am_offs_int(node);
656 /* we should NEVER access uninitialized stack BELOW the current SP */
659 /* storing at half-slots is bad */
660 if ((offset & 3) != 0)
663 if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4)
665 /* ignore those outside the possible windows */
666 if (offset > inc_ofs - 4)
668 loadslot = offset >> 2;
670 /* loading from the same slot twice is bad (and shouldn't happen...) */
671 if (loads[loadslot] != NULL)
674 dreg = arch_get_irn_register(arch_env, node);
675 if (regmask & (1 << dreg->index)) {
676 /* this register is already used */
679 regmask |= 1 << dreg->index;
681 loads[loadslot] = node;
682 if (loadslot > maxslot)
689 /* find the first slot */
690 for (i = maxslot; i >= 0; --i) {
691 ir_node *load = loads[i];
697 ofs = inc_ofs - (maxslot + 1) * 4;
700 /* create a new IncSP if needed */
701 block = get_nodes_block(irn);
704 pred_sp = be_new_IncSP(esp, irg, block, pred_sp, -inc_ofs, be_get_IncSP_align(irn));
705 sched_add_before(irn, pred_sp);
708 /* walk through the Loads and create Pops for them */
709 for (++i; i <= maxslot; ++i) {
710 ir_node *load = loads[i];
712 const ir_edge_t *edge, *tmp;
713 const arch_register_t *reg;
715 mem = get_irn_n(load, n_ia32_mem);
716 reg = arch_get_irn_register(arch_env, load);
718 pop = new_rd_ia32_Pop(get_irn_dbg_info(load), irg, block, mem, pred_sp);
719 arch_set_irn_register(arch_env, pop, reg);
721 copy_mark(load, pop);
723 /* create stackpointer Proj */
724 pred_sp = new_r_Proj(irg, block, pop, mode_Iu, pn_ia32_Pop_stack);
725 arch_set_irn_register(arch_env, pred_sp, esp);
727 sched_add_before(irn, pop);
730 foreach_out_edge_safe(load, edge, tmp) {
731 ir_node *proj = get_edge_src_irn(edge);
733 set_Proj_pred(proj, pop);
736 /* we can remove the Load now */
741 be_set_IncSP_offset(irn, -ofs);
742 be_set_IncSP_pred(irn, pred_sp);
747 * Find a free GP register if possible, else return NULL.
749 static const arch_register_t *get_free_gp_reg(void)
753 for(i = 0; i < N_ia32_gp_REGS; ++i) {
754 const arch_register_t *reg = &ia32_gp_regs[i];
755 if(arch_register_type_is(reg, ignore))
758 if(be_peephole_get_value(CLASS_ia32_gp, i) == NULL)
759 return &ia32_gp_regs[i];
766 * Creates a Pop instruction before the given schedule point.
768 * @param dbgi debug info
769 * @param irg the graph
770 * @param block the block
771 * @param stack the previous stack value
772 * @param schedpoint the new node is added before this node
773 * @param reg the register to pop
775 * @return the new stack value
777 static ir_node *create_pop(dbg_info *dbgi, ir_graph *irg, ir_node *block,
778 ir_node *stack, ir_node *schedpoint,
779 const arch_register_t *reg)
781 const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
787 pop = new_rd_ia32_Pop(dbgi, irg, block, new_NoMem(), stack);
789 stack = new_r_Proj(irg, block, pop, mode_Iu, pn_ia32_Pop_stack);
790 arch_set_irn_register(arch_env, stack, esp);
791 val = new_r_Proj(irg, block, pop, mode_Iu, pn_ia32_Pop_res);
792 arch_set_irn_register(arch_env, val, reg);
794 sched_add_before(schedpoint, pop);
797 keep = be_new_Keep(&ia32_reg_classes[CLASS_ia32_gp], irg, block, 1, in);
798 sched_add_before(schedpoint, keep);
804 * Creates a Push instruction before the given schedule point.
806 * @param dbgi debug info
807 * @param irg the graph
808 * @param block the block
809 * @param stack the previous stack value
810 * @param schedpoint the new node is added before this node
811 * @param reg the register to pop
813 * @return the new stack value
815 static ir_node *create_push(dbg_info *dbgi, ir_graph *irg, ir_node *block,
816 ir_node *stack, ir_node *schedpoint)
818 const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
820 ir_node *val = ia32_new_Unknown_gp(cg);
821 ir_node *noreg = ia32_new_NoReg_gp(cg);
822 ir_node *nomem = get_irg_no_mem(irg);
823 ir_node *push = new_rd_ia32_Push(dbgi, irg, block, noreg, noreg, nomem, val, stack);
824 sched_add_before(schedpoint, push);
826 stack = new_r_Proj(irg, block, push, mode_Iu, pn_ia32_Push_stack);
827 arch_set_irn_register(arch_env, stack, esp);
833 * Optimize an IncSp by replacing it with Push/Pop.
835 static void peephole_be_IncSP(ir_node *node)
837 const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
838 const arch_register_t *reg;
839 ir_graph *irg = current_ir_graph;
845 /* first optimize incsp->incsp combinations */
846 node = be_peephole_IncSP_IncSP(node);
848 /* transform IncSP->Store combinations to Push where possible */
849 peephole_IncSP_Store_to_push(node);
851 /* transform Load->IncSP combinations to Pop where possible */
852 peephole_Load_IncSP_to_pop(node);
854 if (arch_get_irn_register(arch_env, node) != esp)
857 /* replace IncSP -4 by Pop freereg when possible */
858 offset = be_get_IncSP_offset(node);
859 if ((offset != -8 || ia32_cg_config.use_add_esp_8) &&
860 (offset != -4 || ia32_cg_config.use_add_esp_4) &&
861 (offset != +4 || ia32_cg_config.use_sub_esp_4) &&
862 (offset != +8 || ia32_cg_config.use_sub_esp_8))
866 /* we need a free register for pop */
867 reg = get_free_gp_reg();
871 dbgi = get_irn_dbg_info(node);
872 block = get_nodes_block(node);
873 stack = be_get_IncSP_pred(node);
875 stack = create_pop(dbgi, irg, block, stack, node, reg);
878 stack = create_pop(dbgi, irg, block, stack, node, reg);
881 dbgi = get_irn_dbg_info(node);
882 block = get_nodes_block(node);
883 stack = be_get_IncSP_pred(node);
884 stack = create_push(dbgi, irg, block, stack, node);
887 stack = create_push(dbgi, irg, block, stack, node);
891 be_peephole_exchange(node, stack);
895 * Peephole optimisation for ia32_Const's
897 static void peephole_ia32_Const(ir_node *node)
899 const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(node);
900 const arch_register_t *reg;
901 ir_graph *irg = current_ir_graph;
908 /* try to transform a mov 0, reg to xor reg reg */
909 if (attr->offset != 0 || attr->symconst != NULL)
911 if (ia32_cg_config.use_mov_0)
913 /* xor destroys the flags, so no-one must be using them */
914 if (be_peephole_get_value(CLASS_ia32_flags, REG_EFLAGS) != NULL)
917 reg = arch_get_irn_register(arch_env, node);
918 assert(be_peephole_get_reg_value(reg) == NULL);
920 /* create xor(produceval, produceval) */
921 block = get_nodes_block(node);
922 dbgi = get_irn_dbg_info(node);
923 produceval = new_rd_ia32_ProduceVal(dbgi, irg, block);
924 arch_set_irn_register(arch_env, produceval, reg);
926 noreg = ia32_new_NoReg_gp(cg);
927 xor = new_rd_ia32_Xor(dbgi, irg, block, noreg, noreg, new_NoMem(),
928 produceval, produceval);
929 arch_set_irn_register(arch_env, xor, reg);
931 sched_add_before(node, produceval);
932 sched_add_before(node, xor);
934 copy_mark(node, xor);
935 be_peephole_exchange(node, xor);
938 static INLINE int is_noreg(ia32_code_gen_t *cg, const ir_node *node)
940 return node == cg->noreg_gp;
943 static ir_node *create_immediate_from_int(ia32_code_gen_t *cg, int val)
945 ir_graph *irg = current_ir_graph;
946 ir_node *start_block = get_irg_start_block(irg);
947 ir_node *immediate = new_rd_ia32_Immediate(NULL, irg, start_block, NULL,
949 arch_set_irn_register(cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
954 static ir_node *create_immediate_from_am(ia32_code_gen_t *cg,
957 ir_graph *irg = get_irn_irg(node);
958 ir_node *block = get_nodes_block(node);
959 int offset = get_ia32_am_offs_int(node);
960 int sc_sign = is_ia32_am_sc_sign(node);
961 ir_entity *entity = get_ia32_am_sc(node);
964 res = new_rd_ia32_Immediate(NULL, irg, block, entity, sc_sign, offset);
965 arch_set_irn_register(cg->arch_env, res, &ia32_gp_regs[REG_GP_NOREG]);
969 static int is_am_one(const ir_node *node)
971 int offset = get_ia32_am_offs_int(node);
972 ir_entity *entity = get_ia32_am_sc(node);
974 return offset == 1 && entity == NULL;
977 static int is_am_minus_one(const ir_node *node)
979 int offset = get_ia32_am_offs_int(node);
980 ir_entity *entity = get_ia32_am_sc(node);
982 return offset == -1 && entity == NULL;
986 * Transforms a LEA into an Add or SHL if possible.
988 static void peephole_ia32_Lea(ir_node *node)
990 const arch_env_t *arch_env = cg->arch_env;
991 ir_graph *irg = current_ir_graph;
994 const arch_register_t *base_reg;
995 const arch_register_t *index_reg;
996 const arch_register_t *out_reg;
1007 assert(is_ia32_Lea(node));
1009 /* we can only do this if are allowed to globber the flags */
1010 if(be_peephole_get_value(CLASS_ia32_flags, REG_EFLAGS) != NULL)
1013 base = get_irn_n(node, n_ia32_Lea_base);
1014 index = get_irn_n(node, n_ia32_Lea_index);
1016 if(is_noreg(cg, base)) {
1020 base_reg = arch_get_irn_register(arch_env, base);
1022 if(is_noreg(cg, index)) {
1026 index_reg = arch_get_irn_register(arch_env, index);
1029 if(base == NULL && index == NULL) {
1030 /* we shouldn't construct these in the first place... */
1031 #ifdef DEBUG_libfirm
1032 ir_fprintf(stderr, "Optimisation warning: found immediate only lea\n");
1037 out_reg = arch_get_irn_register(arch_env, node);
1038 scale = get_ia32_am_scale(node);
1039 assert(!is_ia32_need_stackent(node) || get_ia32_frame_ent(node) != NULL);
1040 /* check if we have immediates values (frame entities should already be
1041 * expressed in the offsets) */
1042 if(get_ia32_am_offs_int(node) != 0 || get_ia32_am_sc(node) != NULL) {
1048 /* we can transform leas where the out register is the same as either the
1049 * base or index register back to an Add or Shl */
1050 if(out_reg == base_reg) {
1052 #ifdef DEBUG_libfirm
1053 if(!has_immediates) {
1054 ir_fprintf(stderr, "Optimisation warning: found lea which is "
1059 goto make_add_immediate;
1061 if(scale == 0 && !has_immediates) {
1066 /* can't create an add */
1068 } else if(out_reg == index_reg) {
1070 if(has_immediates && scale == 0) {
1072 goto make_add_immediate;
1073 } else if(!has_immediates && scale > 0) {
1075 op2 = create_immediate_from_int(cg, scale);
1077 } else if(!has_immediates) {
1078 #ifdef DEBUG_libfirm
1079 ir_fprintf(stderr, "Optimisation warning: found lea which is "
1083 } else if(scale == 0 && !has_immediates) {
1088 /* can't create an add */
1091 /* can't create an add */
1096 if(ia32_cg_config.use_incdec) {
1097 if(is_am_one(node)) {
1098 dbgi = get_irn_dbg_info(node);
1099 block = get_nodes_block(node);
1100 res = new_rd_ia32_Inc(dbgi, irg, block, op1);
1101 arch_set_irn_register(arch_env, res, out_reg);
1104 if(is_am_minus_one(node)) {
1105 dbgi = get_irn_dbg_info(node);
1106 block = get_nodes_block(node);
1107 res = new_rd_ia32_Dec(dbgi, irg, block, op1);
1108 arch_set_irn_register(arch_env, res, out_reg);
1112 op2 = create_immediate_from_am(cg, node);
1115 dbgi = get_irn_dbg_info(node);
1116 block = get_nodes_block(node);
1117 noreg = ia32_new_NoReg_gp(cg);
1118 nomem = new_NoMem();
1119 res = new_rd_ia32_Add(dbgi, irg, block, noreg, noreg, nomem, op1, op2);
1120 arch_set_irn_register(arch_env, res, out_reg);
1121 set_ia32_commutative(res);
1125 dbgi = get_irn_dbg_info(node);
1126 block = get_nodes_block(node);
1127 noreg = ia32_new_NoReg_gp(cg);
1128 nomem = new_NoMem();
1129 res = new_rd_ia32_Shl(dbgi, irg, block, op1, op2);
1130 arch_set_irn_register(arch_env, res, out_reg);
1134 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(cg, node));
1136 /* add new ADD/SHL to schedule */
1137 DBG_OPT_LEA2ADD(node, res);
1139 /* exchange the Add and the LEA */
1140 sched_add_before(node, res);
1141 copy_mark(node, res);
1142 be_peephole_exchange(node, res);
1146 * Split a Imul mem, imm into a Load mem and Imul reg, imm if possible.
1148 static void peephole_ia32_Imul_split(ir_node *imul) {
1149 const ir_node *right = get_irn_n(imul, n_ia32_IMul_right);
1150 const arch_register_t *reg;
1151 ir_node *load, *block, *base, *index, *mem, *res, *noreg;
1155 if (! is_ia32_Immediate(right) || get_ia32_op_type(imul) != ia32_AddrModeS) {
1156 /* no memory, imm form ignore */
1159 /* we need a free register */
1160 reg = get_free_gp_reg();
1164 /* fine, we can rebuild it */
1165 dbgi = get_irn_dbg_info(imul);
1166 block = get_nodes_block(imul);
1167 irg = current_ir_graph;
1168 base = get_irn_n(imul, n_ia32_IMul_base);
1169 index = get_irn_n(imul, n_ia32_IMul_index);
1170 mem = get_irn_n(imul, n_ia32_IMul_mem);
1171 load = new_rd_ia32_Load(dbgi, irg, block, base, index, mem);
1173 /* copy all attributes */
1174 set_irn_pinned(load, get_irn_pinned(imul));
1175 set_ia32_op_type(load, ia32_AddrModeS);
1176 set_ia32_ls_mode(load, get_ia32_ls_mode(imul));
1178 set_ia32_am_scale(load, get_ia32_am_scale(imul));
1179 set_ia32_am_sc(load, get_ia32_am_sc(imul));
1180 set_ia32_am_offs_int(load, get_ia32_am_offs_int(imul));
1181 if (is_ia32_am_sc_sign(imul))
1182 set_ia32_am_sc_sign(load);
1183 if (is_ia32_use_frame(imul))
1184 set_ia32_use_frame(load);
1185 set_ia32_frame_ent(load, get_ia32_frame_ent(imul));
1187 sched_add_before(imul, load);
1189 mem = new_rd_Proj(dbgi, irg, block, load, mode_M, pn_ia32_Load_M);
1190 res = new_rd_Proj(dbgi, irg, block, load, mode_Iu, pn_ia32_Load_res);
1192 arch_set_irn_register(arch_env, res, reg);
1193 be_peephole_new_node(res);
1195 set_irn_n(imul, n_ia32_IMul_mem, mem);
1196 noreg = get_irn_n(imul, n_ia32_IMul_left);
1197 set_irn_n(imul, n_ia32_IMul_left, res);
1198 set_ia32_op_type(imul, ia32_Normal);
1202 * Replace xorps r,r and xorpd r,r by pxor r,r
1204 static void peephole_ia32_xZero(ir_node *xor) {
1205 set_irn_op(xor, op_ia32_xPzero);
1209 * Register a peephole optimisation function.
1211 static void register_peephole_optimisation(ir_op *op, peephole_opt_func func) {
1212 assert(op->ops.generic == NULL);
1213 op->ops.generic = (op_func)func;
1216 /* Perform peephole-optimizations. */
1217 void ia32_peephole_optimization(ia32_code_gen_t *new_cg)
1220 arch_env = cg->arch_env;
1222 /* register peephole optimisations */
1223 clear_irp_opcodes_generic_func();
1224 register_peephole_optimisation(op_ia32_Const, peephole_ia32_Const);
1225 register_peephole_optimisation(op_be_IncSP, peephole_be_IncSP);
1226 register_peephole_optimisation(op_ia32_Lea, peephole_ia32_Lea);
1227 register_peephole_optimisation(op_ia32_Cmp, peephole_ia32_Cmp);
1228 register_peephole_optimisation(op_ia32_Cmp8Bit, peephole_ia32_Cmp);
1229 register_peephole_optimisation(op_ia32_Test, peephole_ia32_Test);
1230 register_peephole_optimisation(op_ia32_Test8Bit, peephole_ia32_Test);
1231 register_peephole_optimisation(op_be_Return, peephole_ia32_Return);
1232 if (! ia32_cg_config.use_imul_mem_imm32)
1233 register_peephole_optimisation(op_ia32_IMul, peephole_ia32_Imul_split);
1234 if (ia32_cg_config.use_pxor)
1235 register_peephole_optimisation(op_ia32_xZero, peephole_ia32_xZero);
1237 be_peephole_opt(cg->birg);
1241 * Removes node from schedule if it is not used anymore. If irn is a mode_T node
1242 * all it's Projs are removed as well.
1243 * @param irn The irn to be removed from schedule
1245 static INLINE void try_kill(ir_node *node)
1247 if(get_irn_mode(node) == mode_T) {
1248 const ir_edge_t *edge, *next;
1249 foreach_out_edge_safe(node, edge, next) {
1250 ir_node *proj = get_edge_src_irn(edge);
1255 if(get_irn_n_edges(node) != 0)
1258 if (sched_is_scheduled(node)) {
1265 static void optimize_conv_store(ir_node *node)
1270 ir_mode *store_mode;
1272 if(!is_ia32_Store(node) && !is_ia32_Store8Bit(node))
1275 assert(n_ia32_Store_val == n_ia32_Store8Bit_val);
1276 pred_proj = get_irn_n(node, n_ia32_Store_val);
1277 if(is_Proj(pred_proj)) {
1278 pred = get_Proj_pred(pred_proj);
1282 if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
1284 if(get_ia32_op_type(pred) != ia32_Normal)
1287 /* the store only stores the lower bits, so we only need the conv
1288 * it it shrinks the mode */
1289 conv_mode = get_ia32_ls_mode(pred);
1290 store_mode = get_ia32_ls_mode(node);
1291 if(get_mode_size_bits(conv_mode) < get_mode_size_bits(store_mode))
1294 set_irn_n(node, n_ia32_Store_val, get_irn_n(pred, n_ia32_Conv_I2I_val));
1295 if(get_irn_n_edges(pred_proj) == 0) {
1296 kill_node(pred_proj);
1297 if(pred != pred_proj)
1302 static void optimize_load_conv(ir_node *node)
1304 ir_node *pred, *predpred;
1308 if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
1311 assert(n_ia32_Conv_I2I_val == n_ia32_Conv_I2I8Bit_val);
1312 pred = get_irn_n(node, n_ia32_Conv_I2I_val);
1316 predpred = get_Proj_pred(pred);
1317 if(!is_ia32_Load(predpred))
1320 /* the load is sign extending the upper bits, so we only need the conv
1321 * if it shrinks the mode */
1322 load_mode = get_ia32_ls_mode(predpred);
1323 conv_mode = get_ia32_ls_mode(node);
1324 if(get_mode_size_bits(conv_mode) < get_mode_size_bits(load_mode))
1327 if(get_mode_sign(conv_mode) != get_mode_sign(load_mode)) {
1328 /* change the load if it has only 1 user */
1329 if(get_irn_n_edges(pred) == 1) {
1331 if(get_mode_sign(conv_mode)) {
1332 newmode = find_signed_mode(load_mode);
1334 newmode = find_unsigned_mode(load_mode);
1336 assert(newmode != NULL);
1337 set_ia32_ls_mode(predpred, newmode);
1339 /* otherwise we have to keep the conv */
1345 exchange(node, pred);
1348 static void optimize_conv_conv(ir_node *node)
1350 ir_node *pred_proj, *pred, *result_conv;
1351 ir_mode *pred_mode, *conv_mode;
1355 if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
1358 assert(n_ia32_Conv_I2I_val == n_ia32_Conv_I2I8Bit_val);
1359 pred_proj = get_irn_n(node, n_ia32_Conv_I2I_val);
1360 if(is_Proj(pred_proj))
1361 pred = get_Proj_pred(pred_proj);
1365 if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
1368 /* we know that after a conv, the upper bits are sign extended
1369 * so we only need the 2nd conv if it shrinks the mode */
1370 conv_mode = get_ia32_ls_mode(node);
1371 conv_mode_bits = get_mode_size_bits(conv_mode);
1372 pred_mode = get_ia32_ls_mode(pred);
1373 pred_mode_bits = get_mode_size_bits(pred_mode);
1375 if(conv_mode_bits == pred_mode_bits
1376 && get_mode_sign(conv_mode) == get_mode_sign(pred_mode)) {
1377 result_conv = pred_proj;
1378 } else if(conv_mode_bits <= pred_mode_bits) {
1379 /* if 2nd conv is smaller then first conv, then we can always take the
1381 if(get_irn_n_edges(pred_proj) == 1) {
1382 result_conv = pred_proj;
1383 set_ia32_ls_mode(pred, conv_mode);
1385 /* Argh:We must change the opcode to 8bit AND copy the register constraints */
1386 if (get_mode_size_bits(conv_mode) == 8) {
1387 set_irn_op(pred, op_ia32_Conv_I2I8Bit);
1388 set_ia32_in_req_all(pred, get_ia32_in_req_all(node));
1391 /* we don't want to end up with 2 loads, so we better do nothing */
1392 if(get_irn_mode(pred) == mode_T) {
1396 result_conv = exact_copy(pred);
1397 set_ia32_ls_mode(result_conv, conv_mode);
1399 /* Argh:We must change the opcode to 8bit AND copy the register constraints */
1400 if (get_mode_size_bits(conv_mode) == 8) {
1401 set_irn_op(result_conv, op_ia32_Conv_I2I8Bit);
1402 set_ia32_in_req_all(result_conv, get_ia32_in_req_all(node));
1406 /* if both convs have the same sign, then we can take the smaller one */
1407 if(get_mode_sign(conv_mode) == get_mode_sign(pred_mode)) {
1408 result_conv = pred_proj;
1410 /* no optimisation possible if smaller conv is sign-extend */
1411 if(mode_is_signed(pred_mode)) {
1414 /* we can take the smaller conv if it is unsigned */
1415 result_conv = pred_proj;
1420 exchange(node, result_conv);
1422 if(get_irn_n_edges(pred_proj) == 0) {
1423 kill_node(pred_proj);
1424 if(pred != pred_proj)
1427 optimize_conv_conv(result_conv);
1430 static void optimize_node(ir_node *node, void *env)
1434 optimize_load_conv(node);
1435 optimize_conv_store(node);
1436 optimize_conv_conv(node);
1440 * Performs conv and address mode optimization.
1442 void ia32_optimize_graph(ia32_code_gen_t *cg)
1444 irg_walk_blkwise_graph(cg->irg, NULL, optimize_node, cg);
1447 be_dump(cg->irg, "-opt", dump_ir_block_graph_sched);
1450 void ia32_init_optimize(void)
1452 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.optimize");