2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Implements several optimizations for IA32.
23 * @author Matthias Braun, Christian Wuerdig
34 #include "firm_types.h"
46 #include "../benode_t.h"
47 #include "../besched_t.h"
48 #include "../bepeephole.h"
50 #include "ia32_new_nodes.h"
51 #include "ia32_optimize.h"
52 #include "bearch_ia32_t.h"
53 #include "gen_ia32_regalloc_if.h"
54 #include "ia32_common_transform.h"
55 #include "ia32_transform.h"
56 #include "ia32_dbg_stat.h"
57 #include "ia32_util.h"
58 #include "ia32_architecture.h"
60 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
62 static const arch_env_t *arch_env;
63 static ia32_code_gen_t *cg;
65 static void copy_mark(const ir_node *old, ir_node *new)
67 if (is_ia32_is_reload(old))
68 set_ia32_is_reload(new);
69 if (is_ia32_is_spill(old))
70 set_ia32_is_spill(new);
71 if (is_ia32_is_remat(old))
72 set_ia32_is_remat(new);
76 * Returns non-zero if the given node produces
79 * @param node the node to check
80 * @param pn if >= 0, the projection number of the used result
82 static int produces_zero_flag(ir_node *node, int pn)
85 const ia32_immediate_attr_t *imm_attr;
87 if (!is_ia32_irn(node))
91 if (pn != pn_ia32_res)
95 switch (get_ia32_irn_opcode(node)) {
110 assert(n_ia32_ShlD_count == n_ia32_ShrD_count);
111 count = get_irn_n(node, n_ia32_ShlD_count);
112 goto check_shift_amount;
117 assert(n_ia32_Shl_count == n_ia32_Shr_count
118 && n_ia32_Shl_count == n_ia32_Sar_count);
119 count = get_irn_n(node, n_ia32_Shl_count);
121 /* when shift count is zero the flags are not affected, so we can only
122 * do this for constants != 0 */
123 if (!is_ia32_Immediate(count))
126 imm_attr = get_ia32_immediate_attr_const(count);
127 if (imm_attr->symconst != NULL)
129 if ((imm_attr->offset & 0x1f) == 0)
140 * If the given node has not mode_T, creates a mode_T version (with a result Proj).
142 * @param node the node to change
144 * @return the new mode_T node (if the mode was changed) or node itself
146 static ir_node *turn_into_mode_t(ir_node *node)
151 const arch_register_t *reg;
153 if(get_irn_mode(node) == mode_T)
156 assert(get_irn_mode(node) == mode_Iu);
158 new_node = exact_copy(node);
159 set_irn_mode(new_node, mode_T);
161 block = get_nodes_block(new_node);
162 res_proj = new_r_Proj(current_ir_graph, block, new_node, mode_Iu,
165 reg = arch_get_irn_register(arch_env, node);
166 arch_set_irn_register(arch_env, res_proj, reg);
168 sched_add_before(node, new_node);
169 be_peephole_exchange(node, res_proj);
174 * Replace Cmp(x, 0) by a Test(x, x)
176 static void peephole_ia32_Cmp(ir_node *const node)
179 ia32_immediate_attr_t const *imm;
186 ia32_attr_t const *attr;
190 arch_register_t const *reg;
191 ir_edge_t const *edge;
192 ir_edge_t const *tmp;
194 if (get_ia32_op_type(node) != ia32_Normal)
197 right = get_irn_n(node, n_ia32_Cmp_right);
198 if (!is_ia32_Immediate(right))
201 imm = get_ia32_immediate_attr_const(right);
202 if (imm->symconst != NULL || imm->offset != 0)
205 dbgi = get_irn_dbg_info(node);
206 irg = current_ir_graph;
207 block = get_nodes_block(node);
208 noreg = ia32_new_NoReg_gp(cg);
209 nomem = get_irg_no_mem(irg);
210 op = get_irn_n(node, n_ia32_Cmp_left);
211 attr = get_irn_generic_attr(node);
212 ins_permuted = attr->data.ins_permuted;
213 cmp_unsigned = attr->data.cmp_unsigned;
215 if (is_ia32_Cmp(node)) {
216 test = new_rd_ia32_Test(dbgi, irg, block, noreg, noreg, nomem,
217 op, op, ins_permuted, cmp_unsigned);
219 test = new_rd_ia32_Test8Bit(dbgi, irg, block, noreg, noreg, nomem,
220 op, op, ins_permuted, cmp_unsigned);
222 set_ia32_ls_mode(test, get_ia32_ls_mode(node));
224 reg = arch_get_irn_register(arch_env, node);
225 arch_set_irn_register(arch_env, test, reg);
227 foreach_out_edge_safe(node, edge, tmp) {
228 ir_node *const user = get_edge_src_irn(edge);
231 exchange(user, test);
234 sched_add_before(node, test);
235 copy_mark(node, test);
236 be_peephole_exchange(node, test);
240 * Peephole optimization for Test instructions.
241 * We can remove the Test, if a zero flags was produced which is still
244 static void peephole_ia32_Test(ir_node *node)
246 ir_node *left = get_irn_n(node, n_ia32_Test_left);
247 ir_node *right = get_irn_n(node, n_ia32_Test_right);
253 const ir_edge_t *edge;
255 assert(n_ia32_Test_left == n_ia32_Test8Bit_left
256 && n_ia32_Test_right == n_ia32_Test8Bit_right);
258 /* we need a test for 0 */
262 block = get_nodes_block(node);
263 if(get_nodes_block(left) != block)
267 pn = get_Proj_proj(left);
268 left = get_Proj_pred(left);
271 /* happens rarely, but if it does code will panic' */
272 if (is_ia32_Unknown_GP(left))
275 /* walk schedule up and abort when we find left or some other node destroys
277 schedpoint = sched_prev(node);
278 while(schedpoint != left) {
279 schedpoint = sched_prev(schedpoint);
280 if(arch_irn_is(arch_env, schedpoint, modify_flags))
282 if(schedpoint == block)
283 panic("couldn't find left");
286 /* make sure only Lg/Eq tests are used */
287 foreach_out_edge(node, edge) {
288 ir_node *user = get_edge_src_irn(edge);
289 int pnc = get_ia32_condcode(user);
291 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg) {
296 if(!produces_zero_flag(left, pn))
299 left = turn_into_mode_t(left);
301 flags_mode = ia32_reg_classes[CLASS_ia32_flags].mode;
302 flags_proj = new_r_Proj(current_ir_graph, block, left, flags_mode,
304 arch_set_irn_register(arch_env, flags_proj, &ia32_flags_regs[REG_EFLAGS]);
306 assert(get_irn_mode(node) != mode_T);
308 be_peephole_exchange(node, flags_proj);
312 * AMD Athlon works faster when RET is not destination of
313 * conditional jump or directly preceded by other jump instruction.
314 * Can be avoided by placing a Rep prefix before the return.
316 static void peephole_ia32_Return(ir_node *node) {
317 ir_node *block, *irn;
319 if (!ia32_cg_config.use_pad_return)
322 block = get_nodes_block(node);
324 /* check if this return is the first on the block */
325 sched_foreach_reverse_from(node, irn) {
326 switch (get_irn_opcode(irn)) {
328 /* the return node itself, ignore */
333 /* ignore no code generated */
336 /* arg, IncSP 0 nodes might occur, ignore these */
337 if (be_get_IncSP_offset(irn) == 0)
347 /* ensure, that the 3 byte return is generated */
348 be_Return_set_emit_pop(node, 1);
351 /* only optimize up to 48 stores behind IncSPs */
352 #define MAXPUSH_OPTIMIZE 48
355 * Tries to create Push's from IncSP, Store combinations.
356 * The Stores are replaced by Push's, the IncSP is modified
357 * (possibly into IncSP 0, but not removed).
359 static void peephole_IncSP_Store_to_push(ir_node *irn)
365 ir_node *stores[MAXPUSH_OPTIMIZE];
370 ir_node *first_push = NULL;
371 ir_edge_t const *edge;
372 ir_edge_t const *next;
374 memset(stores, 0, sizeof(stores));
376 assert(be_is_IncSP(irn));
378 inc_ofs = be_get_IncSP_offset(irn);
383 * We first walk the schedule after the IncSP node as long as we find
384 * suitable Stores that could be transformed to a Push.
385 * We save them into the stores array which is sorted by the frame offset/4
386 * attached to the node
389 for (node = sched_next(irn); !sched_is_end(node); node = sched_next(node)) {
394 /* it has to be a Store */
395 if (!is_ia32_Store(node))
398 /* it has to use our sp value */
399 if (get_irn_n(node, n_ia32_base) != irn)
401 /* Store has to be attached to NoMem */
402 mem = get_irn_n(node, n_ia32_mem);
406 /* unfortunately we can't support the full AMs possible for push at the
407 * moment. TODO: fix this */
408 if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
411 offset = get_ia32_am_offs_int(node);
412 /* we should NEVER access uninitialized stack BELOW the current SP */
415 /* storing at half-slots is bad */
416 if ((offset & 3) != 0)
419 if (inc_ofs - 4 < offset || offset >= MAXPUSH_OPTIMIZE * 4)
421 storeslot = offset >> 2;
423 /* storing into the same slot twice is bad (and shouldn't happen...) */
424 if (stores[storeslot] != NULL)
427 stores[storeslot] = node;
428 if (storeslot > maxslot)
434 for (i = -1; i < maxslot; ++i) {
435 if (stores[i + 1] == NULL)
439 /* walk through the Stores and create Pushs for them */
440 block = get_nodes_block(irn);
441 spmode = get_irn_mode(irn);
443 for (; i >= 0; --i) {
444 const arch_register_t *spreg;
446 ir_node *val, *mem, *mem_proj;
447 ir_node *store = stores[i];
448 ir_node *noreg = ia32_new_NoReg_gp(cg);
450 val = get_irn_n(store, n_ia32_unary_op);
451 mem = get_irn_n(store, n_ia32_mem);
452 spreg = arch_get_irn_register(cg->arch_env, curr_sp);
454 push = new_rd_ia32_Push(get_irn_dbg_info(store), irg, block, noreg, noreg, mem, val, curr_sp);
455 copy_mark(store, push);
457 if (first_push == NULL)
460 sched_add_after(curr_sp, push);
462 /* create stackpointer Proj */
463 curr_sp = new_r_Proj(irg, block, push, spmode, pn_ia32_Push_stack);
464 arch_set_irn_register(cg->arch_env, curr_sp, spreg);
466 /* create memory Proj */
467 mem_proj = new_r_Proj(irg, block, push, mode_M, pn_ia32_Push_M);
469 /* use the memproj now */
470 be_peephole_exchange(store, mem_proj);
475 foreach_out_edge_safe(irn, edge, next) {
476 ir_node *const src = get_edge_src_irn(edge);
477 int const pos = get_edge_src_pos(edge);
479 if (src == first_push)
482 set_irn_n(src, pos, curr_sp);
485 be_set_IncSP_offset(irn, inc_ofs);
489 static void peephole_store_incsp(ir_node *store)
498 ir_node *am_base = get_irn_n(store, n_ia32_Store_base);
499 if (!be_is_IncSP(am_base)
500 || get_nodes_block(am_base) != get_nodes_block(store))
502 mem = get_irn_n(store, n_ia32_Store_mem);
503 if (!is_ia32_NoReg_GP(get_irn_n(store, n_ia32_Store_index))
507 int incsp_offset = be_get_IncSP_offset(am_base);
508 if (incsp_offset <= 0)
511 /* we have to be at offset 0 */
512 int my_offset = get_ia32_am_offs_int(store);
513 if (my_offset != 0) {
514 /* TODO here: find out wether there is a store with offset 0 before
515 * us and wether we can move it down to our place */
518 ir_mode *ls_mode = get_ia32_ls_mode(store);
519 int my_store_size = get_mode_size_bytes(ls_mode);
521 if (my_offset + my_store_size > incsp_offset)
524 /* correctness checking:
525 - noone else must write to that stackslot
526 (because after translation incsp won't allocate it anymore)
528 sched_foreach_reverse_from(store, node) {
534 /* make sure noone else can use the space on the stack */
535 arity = get_irn_arity(node);
536 for (i = 0; i < arity; ++i) {
537 ir_node *pred = get_irn_n(node, i);
541 if (i == n_ia32_base &&
542 (get_ia32_op_type(node) == ia32_AddrModeS
543 || get_ia32_op_type(node) == ia32_AddrModeD)) {
544 int node_offset = get_ia32_am_offs_int(node);
545 ir_mode *node_ls_mode = get_ia32_ls_mode(node);
546 int node_size = get_mode_size_bytes(node);
547 /* overlapping with our position? abort */
548 if (node_offset < my_offset + my_store_size
549 && node_offset + node_size >= my_offset)
551 /* otherwise it's fine */
555 /* strange use of esp: abort */
560 /* all ok, change to push */
561 dbgi = get_irn_dbg_info(store);
562 block = get_nodes_block(store);
563 noreg = ia32_new_NoReg_gp(cg);
566 push = new_rd_ia32_Push(dbgi, irg, block, noreg, noreg, mem,
568 create_push(dbgi, current_ir_graph, block, am_base, store);
573 * Return true if a mode can be stored in the GP register set
575 static INLINE int mode_needs_gp_reg(ir_mode *mode) {
576 if (mode == mode_fpcw)
578 if (get_mode_size_bits(mode) > 32)
580 return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
584 * Tries to create Pops from Load, IncSP combinations.
585 * The Loads are replaced by Pops, the IncSP is modified
586 * (possibly into IncSP 0, but not removed).
588 static void peephole_Load_IncSP_to_pop(ir_node *irn)
590 const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
591 int i, maxslot, inc_ofs, ofs;
592 ir_node *node, *pred_sp, *block;
593 ir_node *loads[MAXPUSH_OPTIMIZE];
595 unsigned regmask = 0;
596 unsigned copymask = ~0;
598 memset(loads, 0, sizeof(loads));
599 assert(be_is_IncSP(irn));
601 inc_ofs = -be_get_IncSP_offset(irn);
606 * We first walk the schedule before the IncSP node as long as we find
607 * suitable Loads that could be transformed to a Pop.
608 * We save them into the stores array which is sorted by the frame offset/4
609 * attached to the node
612 pred_sp = be_get_IncSP_pred(irn);
613 for (node = sched_prev(irn); !sched_is_end(node); node = sched_prev(node)) {
616 const arch_register_t *sreg, *dreg;
618 /* it has to be a Load */
619 if (!is_ia32_Load(node)) {
620 if (be_is_Copy(node)) {
621 if (!mode_needs_gp_reg(get_irn_mode(node))) {
622 /* not a GP copy, ignore */
625 dreg = arch_get_irn_register(arch_env, node);
626 sreg = arch_get_irn_register(arch_env, be_get_Copy_op(node));
627 if (regmask & copymask & (1 << sreg->index)) {
630 if (regmask & copymask & (1 << dreg->index)) {
633 /* we CAN skip Copies if neither the destination nor the source
634 * is not in our regmask, ie none of our future Pop will overwrite it */
635 regmask |= (1 << dreg->index) | (1 << sreg->index);
636 copymask &= ~((1 << dreg->index) | (1 << sreg->index));
642 /* we can handle only GP loads */
643 if (!mode_needs_gp_reg(get_ia32_ls_mode(node)))
646 /* it has to use our predecessor sp value */
647 if (get_irn_n(node, n_ia32_base) != pred_sp) {
648 /* it would be ok if this load does not use a Pop result,
649 * but we do not check this */
653 /* should have NO index */
654 if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
657 offset = get_ia32_am_offs_int(node);
658 /* we should NEVER access uninitialized stack BELOW the current SP */
661 /* storing at half-slots is bad */
662 if ((offset & 3) != 0)
665 if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4)
667 /* ignore those outside the possible windows */
668 if (offset > inc_ofs - 4)
670 loadslot = offset >> 2;
672 /* loading from the same slot twice is bad (and shouldn't happen...) */
673 if (loads[loadslot] != NULL)
676 dreg = arch_get_irn_register(arch_env, node);
677 if (regmask & (1 << dreg->index)) {
678 /* this register is already used */
681 regmask |= 1 << dreg->index;
683 loads[loadslot] = node;
684 if (loadslot > maxslot)
691 /* find the first slot */
692 for (i = maxslot; i >= 0; --i) {
693 ir_node *load = loads[i];
699 ofs = inc_ofs - (maxslot + 1) * 4;
702 /* create a new IncSP if needed */
703 block = get_nodes_block(irn);
706 pred_sp = be_new_IncSP(esp, irg, block, pred_sp, -inc_ofs, be_get_IncSP_align(irn));
707 sched_add_before(irn, pred_sp);
710 /* walk through the Loads and create Pops for them */
711 for (++i; i <= maxslot; ++i) {
712 ir_node *load = loads[i];
714 const ir_edge_t *edge, *tmp;
715 const arch_register_t *reg;
717 mem = get_irn_n(load, n_ia32_mem);
718 reg = arch_get_irn_register(arch_env, load);
720 pop = new_rd_ia32_Pop(get_irn_dbg_info(load), irg, block, mem, pred_sp);
721 arch_set_irn_register(arch_env, pop, reg);
723 copy_mark(load, pop);
725 /* create stackpointer Proj */
726 pred_sp = new_r_Proj(irg, block, pop, mode_Iu, pn_ia32_Pop_stack);
727 arch_set_irn_register(arch_env, pred_sp, esp);
729 sched_add_before(irn, pop);
732 foreach_out_edge_safe(load, edge, tmp) {
733 ir_node *proj = get_edge_src_irn(edge);
735 set_Proj_pred(proj, pop);
738 /* we can remove the Load now */
743 be_set_IncSP_offset(irn, -ofs);
744 be_set_IncSP_pred(irn, pred_sp);
749 * Find a free GP register if possible, else return NULL.
751 static const arch_register_t *get_free_gp_reg(void)
755 for(i = 0; i < N_ia32_gp_REGS; ++i) {
756 const arch_register_t *reg = &ia32_gp_regs[i];
757 if(arch_register_type_is(reg, ignore))
760 if(be_peephole_get_value(CLASS_ia32_gp, i) == NULL)
761 return &ia32_gp_regs[i];
768 * Creates a Pop instruction before the given schedule point.
770 * @param dbgi debug info
771 * @param irg the graph
772 * @param block the block
773 * @param stack the previous stack value
774 * @param schedpoint the new node is added before this node
775 * @param reg the register to pop
777 * @return the new stack value
779 static ir_node *create_pop(dbg_info *dbgi, ir_graph *irg, ir_node *block,
780 ir_node *stack, ir_node *schedpoint,
781 const arch_register_t *reg)
783 const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
789 pop = new_rd_ia32_Pop(dbgi, irg, block, new_NoMem(), stack);
791 stack = new_r_Proj(irg, block, pop, mode_Iu, pn_ia32_Pop_stack);
792 arch_set_irn_register(arch_env, stack, esp);
793 val = new_r_Proj(irg, block, pop, mode_Iu, pn_ia32_Pop_res);
794 arch_set_irn_register(arch_env, val, reg);
796 sched_add_before(schedpoint, pop);
799 keep = be_new_Keep(&ia32_reg_classes[CLASS_ia32_gp], irg, block, 1, in);
800 sched_add_before(schedpoint, keep);
806 * Creates a Push instruction before the given schedule point.
808 * @param dbgi debug info
809 * @param irg the graph
810 * @param block the block
811 * @param stack the previous stack value
812 * @param schedpoint the new node is added before this node
813 * @param reg the register to pop
815 * @return the new stack value
817 static ir_node *create_push(dbg_info *dbgi, ir_graph *irg, ir_node *block,
818 ir_node *stack, ir_node *schedpoint)
820 const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
822 ir_node *val = ia32_new_Unknown_gp(cg);
823 ir_node *noreg = ia32_new_NoReg_gp(cg);
824 ir_node *nomem = get_irg_no_mem(irg);
825 ir_node *push = new_rd_ia32_Push(dbgi, irg, block, noreg, noreg, nomem, val, stack);
826 sched_add_before(schedpoint, push);
828 stack = new_r_Proj(irg, block, push, mode_Iu, pn_ia32_Push_stack);
829 arch_set_irn_register(arch_env, stack, esp);
835 * Optimize an IncSp by replacing it with Push/Pop.
837 static void peephole_be_IncSP(ir_node *node)
839 const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
840 const arch_register_t *reg;
841 ir_graph *irg = current_ir_graph;
847 /* first optimize incsp->incsp combinations */
848 node = be_peephole_IncSP_IncSP(node);
850 /* transform IncSP->Store combinations to Push where possible */
851 peephole_IncSP_Store_to_push(node);
853 /* transform Load->IncSP combinations to Pop where possible */
854 peephole_Load_IncSP_to_pop(node);
856 if (arch_get_irn_register(arch_env, node) != esp)
859 /* replace IncSP -4 by Pop freereg when possible */
860 offset = be_get_IncSP_offset(node);
861 if ((offset != -8 || ia32_cg_config.use_add_esp_8) &&
862 (offset != -4 || ia32_cg_config.use_add_esp_4) &&
863 (offset != +4 || ia32_cg_config.use_sub_esp_4) &&
864 (offset != +8 || ia32_cg_config.use_sub_esp_8))
868 /* we need a free register for pop */
869 reg = get_free_gp_reg();
873 dbgi = get_irn_dbg_info(node);
874 block = get_nodes_block(node);
875 stack = be_get_IncSP_pred(node);
877 stack = create_pop(dbgi, irg, block, stack, node, reg);
880 stack = create_pop(dbgi, irg, block, stack, node, reg);
883 dbgi = get_irn_dbg_info(node);
884 block = get_nodes_block(node);
885 stack = be_get_IncSP_pred(node);
886 stack = create_push(dbgi, irg, block, stack, node);
889 stack = create_push(dbgi, irg, block, stack, node);
893 be_peephole_exchange(node, stack);
897 * Peephole optimisation for ia32_Const's
899 static void peephole_ia32_Const(ir_node *node)
901 const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(node);
902 const arch_register_t *reg;
903 ir_graph *irg = current_ir_graph;
910 /* try to transform a mov 0, reg to xor reg reg */
911 if (attr->offset != 0 || attr->symconst != NULL)
913 if (ia32_cg_config.use_mov_0)
915 /* xor destroys the flags, so no-one must be using them */
916 if (be_peephole_get_value(CLASS_ia32_flags, REG_EFLAGS) != NULL)
919 reg = arch_get_irn_register(arch_env, node);
920 assert(be_peephole_get_reg_value(reg) == NULL);
922 /* create xor(produceval, produceval) */
923 block = get_nodes_block(node);
924 dbgi = get_irn_dbg_info(node);
925 produceval = new_rd_ia32_ProduceVal(dbgi, irg, block);
926 arch_set_irn_register(arch_env, produceval, reg);
928 noreg = ia32_new_NoReg_gp(cg);
929 xor = new_rd_ia32_Xor(dbgi, irg, block, noreg, noreg, new_NoMem(),
930 produceval, produceval);
931 arch_set_irn_register(arch_env, xor, reg);
933 sched_add_before(node, produceval);
934 sched_add_before(node, xor);
936 copy_mark(node, xor);
937 be_peephole_exchange(node, xor);
940 static INLINE int is_noreg(ia32_code_gen_t *cg, const ir_node *node)
942 return node == cg->noreg_gp;
945 static ir_node *create_immediate_from_int(ia32_code_gen_t *cg, int val)
947 ir_graph *irg = current_ir_graph;
948 ir_node *start_block = get_irg_start_block(irg);
949 ir_node *immediate = new_rd_ia32_Immediate(NULL, irg, start_block, NULL,
951 arch_set_irn_register(cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
956 static ir_node *create_immediate_from_am(ia32_code_gen_t *cg,
959 ir_graph *irg = get_irn_irg(node);
960 ir_node *block = get_nodes_block(node);
961 int offset = get_ia32_am_offs_int(node);
962 int sc_sign = is_ia32_am_sc_sign(node);
963 ir_entity *entity = get_ia32_am_sc(node);
966 res = new_rd_ia32_Immediate(NULL, irg, block, entity, sc_sign, offset);
967 arch_set_irn_register(cg->arch_env, res, &ia32_gp_regs[REG_GP_NOREG]);
971 static int is_am_one(const ir_node *node)
973 int offset = get_ia32_am_offs_int(node);
974 ir_entity *entity = get_ia32_am_sc(node);
976 return offset == 1 && entity == NULL;
979 static int is_am_minus_one(const ir_node *node)
981 int offset = get_ia32_am_offs_int(node);
982 ir_entity *entity = get_ia32_am_sc(node);
984 return offset == -1 && entity == NULL;
988 * Transforms a LEA into an Add or SHL if possible.
990 static void peephole_ia32_Lea(ir_node *node)
992 const arch_env_t *arch_env = cg->arch_env;
993 ir_graph *irg = current_ir_graph;
996 const arch_register_t *base_reg;
997 const arch_register_t *index_reg;
998 const arch_register_t *out_reg;
1009 assert(is_ia32_Lea(node));
1011 /* we can only do this if are allowed to globber the flags */
1012 if(be_peephole_get_value(CLASS_ia32_flags, REG_EFLAGS) != NULL)
1015 base = get_irn_n(node, n_ia32_Lea_base);
1016 index = get_irn_n(node, n_ia32_Lea_index);
1018 if(is_noreg(cg, base)) {
1022 base_reg = arch_get_irn_register(arch_env, base);
1024 if(is_noreg(cg, index)) {
1028 index_reg = arch_get_irn_register(arch_env, index);
1031 if(base == NULL && index == NULL) {
1032 /* we shouldn't construct these in the first place... */
1033 #ifdef DEBUG_libfirm
1034 ir_fprintf(stderr, "Optimisation warning: found immediate only lea\n");
1039 out_reg = arch_get_irn_register(arch_env, node);
1040 scale = get_ia32_am_scale(node);
1041 assert(!is_ia32_need_stackent(node) || get_ia32_frame_ent(node) != NULL);
1042 /* check if we have immediates values (frame entities should already be
1043 * expressed in the offsets) */
1044 if(get_ia32_am_offs_int(node) != 0 || get_ia32_am_sc(node) != NULL) {
1050 /* we can transform leas where the out register is the same as either the
1051 * base or index register back to an Add or Shl */
1052 if(out_reg == base_reg) {
1054 #ifdef DEBUG_libfirm
1055 if(!has_immediates) {
1056 ir_fprintf(stderr, "Optimisation warning: found lea which is "
1061 goto make_add_immediate;
1063 if(scale == 0 && !has_immediates) {
1068 /* can't create an add */
1070 } else if(out_reg == index_reg) {
1072 if(has_immediates && scale == 0) {
1074 goto make_add_immediate;
1075 } else if(!has_immediates && scale > 0) {
1077 op2 = create_immediate_from_int(cg, scale);
1079 } else if(!has_immediates) {
1080 #ifdef DEBUG_libfirm
1081 ir_fprintf(stderr, "Optimisation warning: found lea which is "
1085 } else if(scale == 0 && !has_immediates) {
1090 /* can't create an add */
1093 /* can't create an add */
1098 if(ia32_cg_config.use_incdec) {
1099 if(is_am_one(node)) {
1100 dbgi = get_irn_dbg_info(node);
1101 block = get_nodes_block(node);
1102 res = new_rd_ia32_Inc(dbgi, irg, block, op1);
1103 arch_set_irn_register(arch_env, res, out_reg);
1106 if(is_am_minus_one(node)) {
1107 dbgi = get_irn_dbg_info(node);
1108 block = get_nodes_block(node);
1109 res = new_rd_ia32_Dec(dbgi, irg, block, op1);
1110 arch_set_irn_register(arch_env, res, out_reg);
1114 op2 = create_immediate_from_am(cg, node);
1117 dbgi = get_irn_dbg_info(node);
1118 block = get_nodes_block(node);
1119 noreg = ia32_new_NoReg_gp(cg);
1120 nomem = new_NoMem();
1121 res = new_rd_ia32_Add(dbgi, irg, block, noreg, noreg, nomem, op1, op2);
1122 arch_set_irn_register(arch_env, res, out_reg);
1123 set_ia32_commutative(res);
1127 dbgi = get_irn_dbg_info(node);
1128 block = get_nodes_block(node);
1129 noreg = ia32_new_NoReg_gp(cg);
1130 nomem = new_NoMem();
1131 res = new_rd_ia32_Shl(dbgi, irg, block, op1, op2);
1132 arch_set_irn_register(arch_env, res, out_reg);
1136 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(cg, node));
1138 /* add new ADD/SHL to schedule */
1139 DBG_OPT_LEA2ADD(node, res);
1141 /* exchange the Add and the LEA */
1142 sched_add_before(node, res);
1143 copy_mark(node, res);
1144 be_peephole_exchange(node, res);
1148 * Split a Imul mem, imm into a Load mem and Imul reg, imm if possible.
1150 static void peephole_ia32_Imul_split(ir_node *imul) {
1151 const ir_node *right = get_irn_n(imul, n_ia32_IMul_right);
1152 const arch_register_t *reg;
1153 ir_node *load, *block, *base, *index, *mem, *res, *noreg;
1157 if (! is_ia32_Immediate(right) || get_ia32_op_type(imul) != ia32_AddrModeS) {
1158 /* no memory, imm form ignore */
1161 /* we need a free register */
1162 reg = get_free_gp_reg();
1166 /* fine, we can rebuild it */
1167 dbgi = get_irn_dbg_info(imul);
1168 block = get_nodes_block(imul);
1169 irg = current_ir_graph;
1170 base = get_irn_n(imul, n_ia32_IMul_base);
1171 index = get_irn_n(imul, n_ia32_IMul_index);
1172 mem = get_irn_n(imul, n_ia32_IMul_mem);
1173 load = new_rd_ia32_Load(dbgi, irg, block, base, index, mem);
1175 /* copy all attributes */
1176 set_irn_pinned( load, get_irn_pinned(imul));
1177 set_ia32_op_type( load, ia32_AddrModeS);
1178 ia32_copy_am_attrs(load, imul);
1180 set_ia32_am_offs_int( imul, 0);
1181 set_ia32_am_sc( imul, NULL);
1182 set_ia32_am_scale( imul, 0);
1183 clear_ia32_am_sc_sign(imul);
1185 sched_add_before(imul, load);
1187 mem = new_rd_Proj(dbgi, irg, block, load, mode_M, pn_ia32_Load_M);
1188 res = new_rd_Proj(dbgi, irg, block, load, mode_Iu, pn_ia32_Load_res);
1190 arch_set_irn_register(arch_env, res, reg);
1191 be_peephole_new_node(res);
1193 set_irn_n(imul, n_ia32_IMul_mem, mem);
1194 noreg = get_irn_n(imul, n_ia32_IMul_left);
1195 set_irn_n(imul, n_ia32_IMul_base, noreg);
1196 set_irn_n(imul, n_ia32_IMul_index, noreg);
1197 set_irn_n(imul, n_ia32_IMul_left, res);
1198 set_ia32_op_type(imul, ia32_Normal);
1202 * Replace xorps r,r and xorpd r,r by pxor r,r
1204 static void peephole_ia32_xZero(ir_node *xor) {
1205 set_irn_op(xor, op_ia32_xPzero);
1209 * Register a peephole optimisation function.
1211 static void register_peephole_optimisation(ir_op *op, peephole_opt_func func) {
1212 assert(op->ops.generic == NULL);
1213 op->ops.generic = (op_func)func;
1216 /* Perform peephole-optimizations. */
1217 void ia32_peephole_optimization(ia32_code_gen_t *new_cg)
1220 arch_env = cg->arch_env;
1222 /* register peephole optimisations */
1223 clear_irp_opcodes_generic_func();
1224 register_peephole_optimisation(op_ia32_Const, peephole_ia32_Const);
1225 register_peephole_optimisation(op_be_IncSP, peephole_be_IncSP);
1226 register_peephole_optimisation(op_ia32_Lea, peephole_ia32_Lea);
1227 register_peephole_optimisation(op_ia32_Cmp, peephole_ia32_Cmp);
1228 register_peephole_optimisation(op_ia32_Cmp8Bit, peephole_ia32_Cmp);
1229 register_peephole_optimisation(op_ia32_Test, peephole_ia32_Test);
1230 register_peephole_optimisation(op_ia32_Test8Bit, peephole_ia32_Test);
1231 register_peephole_optimisation(op_be_Return, peephole_ia32_Return);
1232 if (! ia32_cg_config.use_imul_mem_imm32)
1233 register_peephole_optimisation(op_ia32_IMul, peephole_ia32_Imul_split);
1234 if (ia32_cg_config.use_pxor)
1235 register_peephole_optimisation(op_ia32_xZero, peephole_ia32_xZero);
1237 be_peephole_opt(cg->birg);
1241 * Removes node from schedule if it is not used anymore. If irn is a mode_T node
1242 * all it's Projs are removed as well.
1243 * @param irn The irn to be removed from schedule
1245 static INLINE void try_kill(ir_node *node)
1247 if(get_irn_mode(node) == mode_T) {
1248 const ir_edge_t *edge, *next;
1249 foreach_out_edge_safe(node, edge, next) {
1250 ir_node *proj = get_edge_src_irn(edge);
1255 if(get_irn_n_edges(node) != 0)
1258 if (sched_is_scheduled(node)) {
1265 static void optimize_conv_store(ir_node *node)
1270 ir_mode *store_mode;
1272 if(!is_ia32_Store(node) && !is_ia32_Store8Bit(node))
1275 assert(n_ia32_Store_val == n_ia32_Store8Bit_val);
1276 pred_proj = get_irn_n(node, n_ia32_Store_val);
1277 if(is_Proj(pred_proj)) {
1278 pred = get_Proj_pred(pred_proj);
1282 if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
1284 if(get_ia32_op_type(pred) != ia32_Normal)
1287 /* the store only stores the lower bits, so we only need the conv
1288 * it it shrinks the mode */
1289 conv_mode = get_ia32_ls_mode(pred);
1290 store_mode = get_ia32_ls_mode(node);
1291 if(get_mode_size_bits(conv_mode) < get_mode_size_bits(store_mode))
1294 set_irn_n(node, n_ia32_Store_val, get_irn_n(pred, n_ia32_Conv_I2I_val));
1295 if(get_irn_n_edges(pred_proj) == 0) {
1296 kill_node(pred_proj);
1297 if(pred != pred_proj)
1302 static void optimize_load_conv(ir_node *node)
1304 ir_node *pred, *predpred;
1308 if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
1311 assert(n_ia32_Conv_I2I_val == n_ia32_Conv_I2I8Bit_val);
1312 pred = get_irn_n(node, n_ia32_Conv_I2I_val);
1316 predpred = get_Proj_pred(pred);
1317 if(!is_ia32_Load(predpred))
1320 /* the load is sign extending the upper bits, so we only need the conv
1321 * if it shrinks the mode */
1322 load_mode = get_ia32_ls_mode(predpred);
1323 conv_mode = get_ia32_ls_mode(node);
1324 if(get_mode_size_bits(conv_mode) < get_mode_size_bits(load_mode))
1327 if(get_mode_sign(conv_mode) != get_mode_sign(load_mode)) {
1328 /* change the load if it has only 1 user */
1329 if(get_irn_n_edges(pred) == 1) {
1331 if(get_mode_sign(conv_mode)) {
1332 newmode = find_signed_mode(load_mode);
1334 newmode = find_unsigned_mode(load_mode);
1336 assert(newmode != NULL);
1337 set_ia32_ls_mode(predpred, newmode);
1339 /* otherwise we have to keep the conv */
1345 exchange(node, pred);
1348 static void optimize_conv_conv(ir_node *node)
1350 ir_node *pred_proj, *pred, *result_conv;
1351 ir_mode *pred_mode, *conv_mode;
1355 if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
1358 assert(n_ia32_Conv_I2I_val == n_ia32_Conv_I2I8Bit_val);
1359 pred_proj = get_irn_n(node, n_ia32_Conv_I2I_val);
1360 if(is_Proj(pred_proj))
1361 pred = get_Proj_pred(pred_proj);
1365 if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
1368 /* we know that after a conv, the upper bits are sign extended
1369 * so we only need the 2nd conv if it shrinks the mode */
1370 conv_mode = get_ia32_ls_mode(node);
1371 conv_mode_bits = get_mode_size_bits(conv_mode);
1372 pred_mode = get_ia32_ls_mode(pred);
1373 pred_mode_bits = get_mode_size_bits(pred_mode);
1375 if(conv_mode_bits == pred_mode_bits
1376 && get_mode_sign(conv_mode) == get_mode_sign(pred_mode)) {
1377 result_conv = pred_proj;
1378 } else if(conv_mode_bits <= pred_mode_bits) {
1379 /* if 2nd conv is smaller then first conv, then we can always take the
1381 if(get_irn_n_edges(pred_proj) == 1) {
1382 result_conv = pred_proj;
1383 set_ia32_ls_mode(pred, conv_mode);
1385 /* Argh:We must change the opcode to 8bit AND copy the register constraints */
1386 if (get_mode_size_bits(conv_mode) == 8) {
1387 set_irn_op(pred, op_ia32_Conv_I2I8Bit);
1388 set_ia32_in_req_all(pred, get_ia32_in_req_all(node));
1391 /* we don't want to end up with 2 loads, so we better do nothing */
1392 if(get_irn_mode(pred) == mode_T) {
1396 result_conv = exact_copy(pred);
1397 set_ia32_ls_mode(result_conv, conv_mode);
1399 /* Argh:We must change the opcode to 8bit AND copy the register constraints */
1400 if (get_mode_size_bits(conv_mode) == 8) {
1401 set_irn_op(result_conv, op_ia32_Conv_I2I8Bit);
1402 set_ia32_in_req_all(result_conv, get_ia32_in_req_all(node));
1406 /* if both convs have the same sign, then we can take the smaller one */
1407 if(get_mode_sign(conv_mode) == get_mode_sign(pred_mode)) {
1408 result_conv = pred_proj;
1410 /* no optimisation possible if smaller conv is sign-extend */
1411 if(mode_is_signed(pred_mode)) {
1414 /* we can take the smaller conv if it is unsigned */
1415 result_conv = pred_proj;
1420 exchange(node, result_conv);
1422 if(get_irn_n_edges(pred_proj) == 0) {
1423 kill_node(pred_proj);
1424 if(pred != pred_proj)
1427 optimize_conv_conv(result_conv);
1430 static void optimize_node(ir_node *node, void *env)
1434 optimize_load_conv(node);
1435 optimize_conv_store(node);
1436 optimize_conv_conv(node);
1440 * Performs conv and address mode optimization.
1442 void ia32_optimize_graph(ia32_code_gen_t *cg)
1444 irg_walk_blkwise_graph(cg->irg, NULL, optimize_node, cg);
1447 be_dump(cg->irg, "-opt", dump_ir_block_graph_sched);
1450 void ia32_init_optimize(void)
1452 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.optimize");