2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_map_regs.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_util.h"
67 #include "ia32_address_mode.h"
68 #include "ia32_architecture.h"
70 #include "gen_ia32_regalloc_if.h"
72 /* define this to construct SSE constants instead of load them */
73 #undef CONSTRUCT_SSE_CONST
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
81 #define ULL_BIAS "18446744073709551616"
83 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
84 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
85 #define ENT_SFP_ABS "C_ia32_sfp_abs"
86 #define ENT_DFP_ABS "C_ia32_dfp_abs"
87 #define ENT_ULL_BIAS "C_ia32_ull_bias"
89 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
90 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
92 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
94 static ir_node *initial_fpcw = NULL;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 if (be_get_irg_options(env_cg->irg)->pic) {
204 return arch_code_generator_get_pic_base(env_cg);
211 * Transforms a Const.
213 static ir_node *gen_Const(ir_node *node)
215 ir_node *old_block = get_nodes_block(node);
216 ir_node *block = be_transform_node(old_block);
217 dbg_info *dbgi = get_irn_dbg_info(node);
218 ir_mode *mode = get_irn_mode(node);
220 assert(is_Const(node));
222 if (mode_is_float(mode)) {
228 if (ia32_cg_config.use_sse2) {
229 tarval *tv = get_Const_tarval(node);
230 if (tarval_is_null(tv)) {
231 load = new_bd_ia32_xZero(dbgi, block);
232 set_ia32_ls_mode(load, mode);
234 #ifdef CONSTRUCT_SSE_CONST
235 } else if (tarval_is_one(tv)) {
236 int cnst = mode == mode_F ? 26 : 55;
237 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
238 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
239 ir_node *pslld, *psrld;
241 load = new_bd_ia32_xAllOnes(dbgi, block);
242 set_ia32_ls_mode(load, mode);
243 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
244 set_ia32_ls_mode(pslld, mode);
245 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
246 set_ia32_ls_mode(psrld, mode);
248 #endif /* CONSTRUCT_SSE_CONST */
249 } else if (mode == mode_F) {
250 /* we can place any 32bit constant by using a movd gp, sse */
251 unsigned val = get_tarval_sub_bits(tv, 0) |
252 (get_tarval_sub_bits(tv, 1) << 8) |
253 (get_tarval_sub_bits(tv, 2) << 16) |
254 (get_tarval_sub_bits(tv, 3) << 24);
255 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
256 load = new_bd_ia32_xMovd(dbgi, block, cnst);
257 set_ia32_ls_mode(load, mode);
260 #ifdef CONSTRUCT_SSE_CONST
261 if (mode == mode_D) {
262 unsigned val = get_tarval_sub_bits(tv, 0) |
263 (get_tarval_sub_bits(tv, 1) << 8) |
264 (get_tarval_sub_bits(tv, 2) << 16) |
265 (get_tarval_sub_bits(tv, 3) << 24);
267 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
268 ir_node *cnst, *psllq;
270 /* fine, lower 32bit are zero, produce 32bit value */
271 val = get_tarval_sub_bits(tv, 4) |
272 (get_tarval_sub_bits(tv, 5) << 8) |
273 (get_tarval_sub_bits(tv, 6) << 16) |
274 (get_tarval_sub_bits(tv, 7) << 24);
275 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
276 load = new_bd_ia32_xMovd(dbgi, block, cnst);
277 set_ia32_ls_mode(load, mode);
278 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
279 set_ia32_ls_mode(psllq, mode);
284 #endif /* CONSTRUCT_SSE_CONST */
285 floatent = create_float_const_entity(node);
287 base = get_symconst_base();
288 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
290 set_ia32_op_type(load, ia32_AddrModeS);
291 set_ia32_am_sc(load, floatent);
292 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
293 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
296 if (is_Const_null(node)) {
297 load = new_bd_ia32_vfldz(dbgi, block);
299 set_ia32_ls_mode(load, mode);
300 } else if (is_Const_one(node)) {
301 load = new_bd_ia32_vfld1(dbgi, block);
303 set_ia32_ls_mode(load, mode);
308 floatent = create_float_const_entity(node);
309 /* create_float_const_ent is smart and sometimes creates
311 ls_mode = get_type_mode(get_entity_type(floatent));
312 base = get_symconst_base();
313 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
315 set_ia32_op_type(load, ia32_AddrModeS);
316 set_ia32_am_sc(load, floatent);
317 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
318 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
321 #ifdef CONSTRUCT_SSE_CONST
323 #endif /* CONSTRUCT_SSE_CONST */
324 SET_IA32_ORIG_NODE(load, node);
326 be_dep_on_frame(load);
328 } else { /* non-float mode */
330 tarval *tv = get_Const_tarval(node);
333 tv = tarval_convert_to(tv, mode_Iu);
335 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
337 panic("couldn't convert constant tarval (%+F)", node);
339 val = get_tarval_long(tv);
341 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
342 SET_IA32_ORIG_NODE(cnst, node);
344 be_dep_on_frame(cnst);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
377 SET_IA32_ORIG_NODE(cnst, node);
379 be_dep_on_frame(cnst);
384 * Create a float type for the given mode and cache it.
386 * @param mode the mode for the float type (might be integer mode for SSE2 types)
387 * @param align alignment
389 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
395 if (mode == mode_Iu) {
396 static ir_type *int_Iu[16] = {NULL, };
398 if (int_Iu[align] == NULL) {
399 int_Iu[align] = tp = new_type_primitive(mode);
400 /* set the specified alignment */
401 set_type_alignment_bytes(tp, align);
403 return int_Iu[align];
404 } else if (mode == mode_Lu) {
405 static ir_type *int_Lu[16] = {NULL, };
407 if (int_Lu[align] == NULL) {
408 int_Lu[align] = tp = new_type_primitive(mode);
409 /* set the specified alignment */
410 set_type_alignment_bytes(tp, align);
412 return int_Lu[align];
413 } else if (mode == mode_F) {
414 static ir_type *float_F[16] = {NULL, };
416 if (float_F[align] == NULL) {
417 float_F[align] = tp = new_type_primitive(mode);
418 /* set the specified alignment */
419 set_type_alignment_bytes(tp, align);
421 return float_F[align];
422 } else if (mode == mode_D) {
423 static ir_type *float_D[16] = {NULL, };
425 if (float_D[align] == NULL) {
426 float_D[align] = tp = new_type_primitive(mode);
427 /* set the specified alignment */
428 set_type_alignment_bytes(tp, align);
430 return float_D[align];
432 static ir_type *float_E[16] = {NULL, };
434 if (float_E[align] == NULL) {
435 float_E[align] = tp = new_type_primitive(mode);
436 /* set the specified alignment */
437 set_type_alignment_bytes(tp, align);
439 return float_E[align];
444 * Create a float[2] array type for the given atomic type.
446 * @param tp the atomic type
448 static ir_type *ia32_create_float_array(ir_type *tp)
450 ir_mode *mode = get_type_mode(tp);
451 unsigned align = get_type_alignment_bytes(tp);
456 if (mode == mode_F) {
457 static ir_type *float_F[16] = {NULL, };
459 if (float_F[align] != NULL)
460 return float_F[align];
461 arr = float_F[align] = new_type_array(1, tp);
462 } else if (mode == mode_D) {
463 static ir_type *float_D[16] = {NULL, };
465 if (float_D[align] != NULL)
466 return float_D[align];
467 arr = float_D[align] = new_type_array(1, tp);
469 static ir_type *float_E[16] = {NULL, };
471 if (float_E[align] != NULL)
472 return float_E[align];
473 arr = float_E[align] = new_type_array(1, tp);
475 set_type_alignment_bytes(arr, align);
476 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
477 set_type_state(arr, layout_fixed);
481 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
482 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
484 static const struct {
485 const char *ent_name;
486 const char *cnst_str;
489 } names [ia32_known_const_max] = {
490 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
491 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
492 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
493 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
494 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
496 static ir_entity *ent_cache[ia32_known_const_max];
498 const char *ent_name, *cnst_str;
504 ent_name = names[kct].ent_name;
505 if (! ent_cache[kct]) {
506 cnst_str = names[kct].cnst_str;
508 switch (names[kct].mode) {
509 case 0: mode = mode_Iu; break;
510 case 1: mode = mode_Lu; break;
511 default: mode = mode_F; break;
513 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
514 tp = ia32_create_float_type(mode, names[kct].align);
516 if (kct == ia32_ULLBIAS)
517 tp = ia32_create_float_array(tp);
518 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
520 set_entity_ld_ident(ent, get_entity_ident(ent));
521 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
522 set_entity_visibility(ent, ir_visibility_private);
524 if (kct == ia32_ULLBIAS) {
525 ir_initializer_t *initializer = create_initializer_compound(2);
527 set_initializer_compound_value(initializer, 0,
528 create_initializer_tarval(get_mode_null(mode)));
529 set_initializer_compound_value(initializer, 1,
530 create_initializer_tarval(tv));
532 set_entity_initializer(ent, initializer);
534 set_entity_initializer(ent, create_initializer_tarval(tv));
537 /* cache the entry */
538 ent_cache[kct] = ent;
541 return ent_cache[kct];
545 * return true if the node is a Proj(Load) and could be used in source address
546 * mode for another node. Will return only true if the @p other node is not
547 * dependent on the memory of the Load (for binary operations use the other
548 * input here, for unary operations use NULL).
550 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
551 ir_node *other, ir_node *other2, match_flags_t flags)
556 /* float constants are always available */
557 if (is_Const(node)) {
558 ir_mode *mode = get_irn_mode(node);
559 if (mode_is_float(mode)) {
560 if (ia32_cg_config.use_sse2) {
561 if (is_simple_sse_Const(node))
564 if (is_simple_x87_Const(node))
567 if (get_irn_n_edges(node) > 1)
575 load = get_Proj_pred(node);
576 pn = get_Proj_proj(node);
577 if (!is_Load(load) || pn != pn_Load_res)
579 if (get_nodes_block(load) != block)
581 /* we only use address mode if we're the only user of the load */
582 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
584 /* in some edge cases with address mode we might reach the load normally
585 * and through some AM sequence, if it is already materialized then we
586 * can't create an AM node from it */
587 if (be_is_transformed(node))
590 /* don't do AM if other node inputs depend on the load (via mem-proj) */
591 if (other != NULL && prevents_AM(block, load, other))
594 if (other2 != NULL && prevents_AM(block, load, other2))
600 typedef struct ia32_address_mode_t ia32_address_mode_t;
601 struct ia32_address_mode_t {
606 ia32_op_type_t op_type;
610 unsigned commutative : 1;
611 unsigned ins_permuted : 1;
614 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
616 /* construct load address */
617 memset(addr, 0, sizeof(addr[0]));
618 ia32_create_address_mode(addr, ptr, 0);
620 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
621 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
622 addr->mem = be_transform_node(mem);
625 static void build_address(ia32_address_mode_t *am, ir_node *node,
626 ia32_create_am_flags_t flags)
628 ia32_address_t *addr = &am->addr;
634 /* floating point immediates */
635 if (is_Const(node)) {
636 ir_entity *entity = create_float_const_entity(node);
637 addr->base = get_symconst_base();
638 addr->index = noreg_GP;
640 addr->symconst_ent = entity;
642 am->ls_mode = get_type_mode(get_entity_type(entity));
643 am->pinned = op_pin_state_floats;
647 load = get_Proj_pred(node);
648 ptr = get_Load_ptr(load);
649 mem = get_Load_mem(load);
650 new_mem = be_transform_node(mem);
651 am->pinned = get_irn_pinned(load);
652 am->ls_mode = get_Load_mode(load);
653 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
656 /* construct load address */
657 ia32_create_address_mode(addr, ptr, flags);
659 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
660 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
664 static void set_address(ir_node *node, const ia32_address_t *addr)
666 set_ia32_am_scale(node, addr->scale);
667 set_ia32_am_sc(node, addr->symconst_ent);
668 set_ia32_am_offs_int(node, addr->offset);
669 if (addr->symconst_sign)
670 set_ia32_am_sc_sign(node);
672 set_ia32_use_frame(node);
673 set_ia32_frame_ent(node, addr->frame_entity);
677 * Apply attributes of a given address mode to a node.
679 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
681 set_address(node, &am->addr);
683 set_ia32_op_type(node, am->op_type);
684 set_ia32_ls_mode(node, am->ls_mode);
685 if (am->pinned == op_pin_state_pinned) {
686 /* beware: some nodes are already pinned and did not allow to change the state */
687 if (get_irn_pinned(node) != op_pin_state_pinned)
688 set_irn_pinned(node, op_pin_state_pinned);
691 set_ia32_commutative(node);
695 * Check, if a given node is a Down-Conv, ie. a integer Conv
696 * from a mode with a mode with more bits to a mode with lesser bits.
697 * Moreover, we return only true if the node has not more than 1 user.
699 * @param node the node
700 * @return non-zero if node is a Down-Conv
702 static int is_downconv(const ir_node *node)
710 /* we only want to skip the conv when we're the only user
711 * (because this test is used in the context of address-mode selection
712 * and we don't want to use address mode for multiple users) */
713 if (get_irn_n_edges(node) > 1)
716 src_mode = get_irn_mode(get_Conv_op(node));
717 dest_mode = get_irn_mode(node);
719 ia32_mode_needs_gp_reg(src_mode) &&
720 ia32_mode_needs_gp_reg(dest_mode) &&
721 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
724 /** Skip all Down-Conv's on a given node and return the resulting node. */
725 ir_node *ia32_skip_downconv(ir_node *node)
727 while (is_downconv(node))
728 node = get_Conv_op(node);
733 static bool is_sameconv(ir_node *node)
741 /* we only want to skip the conv when we're the only user
742 * (because this test is used in the context of address-mode selection
743 * and we don't want to use address mode for multiple users) */
744 if (get_irn_n_edges(node) > 1)
747 src_mode = get_irn_mode(get_Conv_op(node));
748 dest_mode = get_irn_mode(node);
750 ia32_mode_needs_gp_reg(src_mode) &&
751 ia32_mode_needs_gp_reg(dest_mode) &&
752 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
755 /** Skip all signedness convs */
756 static ir_node *ia32_skip_sameconv(ir_node *node)
758 while (is_sameconv(node))
759 node = get_Conv_op(node);
764 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
766 ir_mode *mode = get_irn_mode(node);
771 if (mode_is_signed(mode)) {
776 block = get_nodes_block(node);
777 dbgi = get_irn_dbg_info(node);
779 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
783 * matches operands of a node into ia32 addressing/operand modes. This covers
784 * usage of source address mode, immediates, operations with non 32-bit modes,
786 * The resulting data is filled into the @p am struct. block is the block
787 * of the node whose arguments are matched. op1, op2 are the first and second
788 * input that are matched (op1 may be NULL). other_op is another unrelated
789 * input that is not matched! but which is needed sometimes to check if AM
790 * for op1/op2 is legal.
791 * @p flags describes the supported modes of the operation in detail.
793 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
794 ir_node *op1, ir_node *op2, ir_node *other_op,
797 ia32_address_t *addr = &am->addr;
798 ir_mode *mode = get_irn_mode(op2);
799 int mode_bits = get_mode_size_bits(mode);
800 ir_node *new_op1, *new_op2;
802 unsigned commutative;
803 int use_am_and_immediates;
806 memset(am, 0, sizeof(am[0]));
808 commutative = (flags & match_commutative) != 0;
809 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
810 use_am = (flags & match_am) != 0;
811 use_immediate = (flags & match_immediate) != 0;
812 assert(!use_am_and_immediates || use_immediate);
815 assert(!commutative || op1 != NULL);
816 assert(use_am || !(flags & match_8bit_am));
817 assert(use_am || !(flags & match_16bit_am));
819 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
820 (mode_bits == 16 && !(flags & match_16bit_am))) {
824 /* we can simply skip downconvs for mode neutral nodes: the upper bits
825 * can be random for these operations */
826 if (flags & match_mode_neutral) {
827 op2 = ia32_skip_downconv(op2);
829 op1 = ia32_skip_downconv(op1);
832 op2 = ia32_skip_sameconv(op2);
834 op1 = ia32_skip_sameconv(op1);
838 /* match immediates. firm nodes are normalized: constants are always on the
841 if (!(flags & match_try_am) && use_immediate) {
842 new_op2 = try_create_Immediate(op2, 0);
845 if (new_op2 == NULL &&
846 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
847 build_address(am, op2, 0);
848 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
849 if (mode_is_float(mode)) {
850 new_op2 = ia32_new_NoReg_vfp(env_cg);
854 am->op_type = ia32_AddrModeS;
855 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
857 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
859 build_address(am, op1, 0);
861 if (mode_is_float(mode)) {
862 noreg = ia32_new_NoReg_vfp(env_cg);
867 if (new_op2 != NULL) {
870 new_op1 = be_transform_node(op2);
872 am->ins_permuted = 1;
874 am->op_type = ia32_AddrModeS;
877 am->op_type = ia32_Normal;
879 if (flags & match_try_am) {
885 mode = get_irn_mode(op2);
886 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
887 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
889 new_op2 = create_upconv(op2, NULL);
890 am->ls_mode = mode_Iu;
892 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
894 new_op2 = be_transform_node(op2);
895 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
898 if (addr->base == NULL)
899 addr->base = noreg_GP;
900 if (addr->index == NULL)
901 addr->index = noreg_GP;
902 if (addr->mem == NULL)
905 am->new_op1 = new_op1;
906 am->new_op2 = new_op2;
907 am->commutative = commutative;
911 * "Fixes" a node that uses address mode by turning it into mode_T
912 * and returning a pn_ia32_res Proj.
914 * @param node the node
915 * @param am its address mode
917 * @return a Proj(pn_ia32_res) if a memory address mode is used,
920 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
925 if (am->mem_proj == NULL)
928 /* we have to create a mode_T so the old MemProj can attach to us */
929 mode = get_irn_mode(node);
930 load = get_Proj_pred(am->mem_proj);
932 be_set_transformed_node(load, node);
934 if (mode != mode_T) {
935 set_irn_mode(node, mode_T);
936 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
943 * Construct a standard binary operation, set AM and immediate if required.
945 * @param node The original node for which the binop is created
946 * @param op1 The first operand
947 * @param op2 The second operand
948 * @param func The node constructor function
949 * @return The constructed ia32 node.
951 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
952 construct_binop_func *func, match_flags_t flags)
955 ir_node *block, *new_block, *new_node;
956 ia32_address_mode_t am;
957 ia32_address_t *addr = &am.addr;
959 block = get_nodes_block(node);
960 match_arguments(&am, block, op1, op2, NULL, flags);
962 dbgi = get_irn_dbg_info(node);
963 new_block = be_transform_node(block);
964 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
965 am.new_op1, am.new_op2);
966 set_am_attributes(new_node, &am);
967 /* we can't use source address mode anymore when using immediates */
968 if (!(flags & match_am_and_immediates) &&
969 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
970 set_ia32_am_support(new_node, ia32_am_none);
971 SET_IA32_ORIG_NODE(new_node, node);
973 new_node = fix_mem_proj(new_node, &am);
979 * Generic names for the inputs of an ia32 binary op.
982 n_ia32_l_binop_left, /**< ia32 left input */
983 n_ia32_l_binop_right, /**< ia32 right input */
984 n_ia32_l_binop_eflags /**< ia32 eflags input */
986 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
987 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
988 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
989 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
990 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
991 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
994 * Construct a binary operation which also consumes the eflags.
996 * @param node The node to transform
997 * @param func The node constructor function
998 * @param flags The match flags
999 * @return The constructor ia32 node
1001 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1002 match_flags_t flags)
1004 ir_node *src_block = get_nodes_block(node);
1005 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1006 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1007 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1009 ir_node *block, *new_node, *new_eflags;
1010 ia32_address_mode_t am;
1011 ia32_address_t *addr = &am.addr;
1013 match_arguments(&am, src_block, op1, op2, eflags, flags);
1015 dbgi = get_irn_dbg_info(node);
1016 block = be_transform_node(src_block);
1017 new_eflags = be_transform_node(eflags);
1018 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1019 am.new_op1, am.new_op2, new_eflags);
1020 set_am_attributes(new_node, &am);
1021 /* we can't use source address mode anymore when using immediates */
1022 if (!(flags & match_am_and_immediates) &&
1023 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1024 set_ia32_am_support(new_node, ia32_am_none);
1025 SET_IA32_ORIG_NODE(new_node, node);
1027 new_node = fix_mem_proj(new_node, &am);
1032 static ir_node *get_fpcw(void)
1035 if (initial_fpcw != NULL)
1036 return initial_fpcw;
1038 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(env_cg->irg),
1039 &ia32_fp_cw_regs[REG_FPCW]);
1040 initial_fpcw = be_transform_node(fpcw);
1042 return initial_fpcw;
1046 * Construct a standard binary operation, set AM and immediate if required.
1048 * @param op1 The first operand
1049 * @param op2 The second operand
1050 * @param func The node constructor function
1051 * @return The constructed ia32 node.
1053 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1054 construct_binop_float_func *func)
1056 ir_mode *mode = get_irn_mode(node);
1058 ir_node *block, *new_block, *new_node;
1059 ia32_address_mode_t am;
1060 ia32_address_t *addr = &am.addr;
1061 ia32_x87_attr_t *attr;
1062 /* All operations are considered commutative, because there are reverse
1064 match_flags_t flags = match_commutative;
1066 /* happens for div nodes... */
1068 mode = get_divop_resmod(node);
1070 /* cannot use address mode with long double on x87 */
1071 if (get_mode_size_bits(mode) <= 64)
1074 block = get_nodes_block(node);
1075 match_arguments(&am, block, op1, op2, NULL, flags);
1077 dbgi = get_irn_dbg_info(node);
1078 new_block = be_transform_node(block);
1079 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1080 am.new_op1, am.new_op2, get_fpcw());
1081 set_am_attributes(new_node, &am);
1083 attr = get_ia32_x87_attr(new_node);
1084 attr->attr.data.ins_permuted = am.ins_permuted;
1086 SET_IA32_ORIG_NODE(new_node, node);
1088 new_node = fix_mem_proj(new_node, &am);
1094 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1096 * @param op1 The first operand
1097 * @param op2 The second operand
1098 * @param func The node constructor function
1099 * @return The constructed ia32 node.
1101 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1102 construct_shift_func *func,
1103 match_flags_t flags)
1106 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1108 assert(! mode_is_float(get_irn_mode(node)));
1109 assert(flags & match_immediate);
1110 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1112 if (flags & match_mode_neutral) {
1113 op1 = ia32_skip_downconv(op1);
1114 new_op1 = be_transform_node(op1);
1115 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1116 new_op1 = create_upconv(op1, node);
1118 new_op1 = be_transform_node(op1);
1121 /* the shift amount can be any mode that is bigger than 5 bits, since all
1122 * other bits are ignored anyway */
1123 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1124 ir_node *const op = get_Conv_op(op2);
1125 if (mode_is_float(get_irn_mode(op)))
1128 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1130 new_op2 = create_immediate_or_transform(op2, 0);
1132 dbgi = get_irn_dbg_info(node);
1133 block = get_nodes_block(node);
1134 new_block = be_transform_node(block);
1135 new_node = func(dbgi, new_block, new_op1, new_op2);
1136 SET_IA32_ORIG_NODE(new_node, node);
1138 /* lowered shift instruction may have a dependency operand, handle it here */
1139 if (get_irn_arity(node) == 3) {
1140 /* we have a dependency */
1141 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1142 add_irn_dep(new_node, new_dep);
1150 * Construct a standard unary operation, set AM and immediate if required.
1152 * @param op The operand
1153 * @param func The node constructor function
1154 * @return The constructed ia32 node.
1156 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1157 match_flags_t flags)
1160 ir_node *block, *new_block, *new_op, *new_node;
1162 assert(flags == 0 || flags == match_mode_neutral);
1163 if (flags & match_mode_neutral) {
1164 op = ia32_skip_downconv(op);
1167 new_op = be_transform_node(op);
1168 dbgi = get_irn_dbg_info(node);
1169 block = get_nodes_block(node);
1170 new_block = be_transform_node(block);
1171 new_node = func(dbgi, new_block, new_op);
1173 SET_IA32_ORIG_NODE(new_node, node);
1178 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1179 ia32_address_t *addr)
1181 ir_node *base, *index, *res;
1187 base = be_transform_node(base);
1190 index = addr->index;
1191 if (index == NULL) {
1194 index = be_transform_node(index);
1197 res = new_bd_ia32_Lea(dbgi, block, base, index);
1198 set_address(res, addr);
1204 * Returns non-zero if a given address mode has a symbolic or
1205 * numerical offset != 0.
1207 static int am_has_immediates(const ia32_address_t *addr)
1209 return addr->offset != 0 || addr->symconst_ent != NULL
1210 || addr->frame_entity || addr->use_frame;
1214 * Creates an ia32 Add.
1216 * @return the created ia32 Add node
1218 static ir_node *gen_Add(ir_node *node)
1220 ir_mode *mode = get_irn_mode(node);
1221 ir_node *op1 = get_Add_left(node);
1222 ir_node *op2 = get_Add_right(node);
1224 ir_node *block, *new_block, *new_node, *add_immediate_op;
1225 ia32_address_t addr;
1226 ia32_address_mode_t am;
1228 if (mode_is_float(mode)) {
1229 if (ia32_cg_config.use_sse2)
1230 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1231 match_commutative | match_am);
1233 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1236 ia32_mark_non_am(node);
1238 op2 = ia32_skip_downconv(op2);
1239 op1 = ia32_skip_downconv(op1);
1243 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1244 * 1. Add with immediate -> Lea
1245 * 2. Add with possible source address mode -> Add
1246 * 3. Otherwise -> Lea
1248 memset(&addr, 0, sizeof(addr));
1249 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1250 add_immediate_op = NULL;
1252 dbgi = get_irn_dbg_info(node);
1253 block = get_nodes_block(node);
1254 new_block = be_transform_node(block);
1257 if (addr.base == NULL && addr.index == NULL) {
1258 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1259 addr.symconst_sign, 0, addr.offset);
1260 be_dep_on_frame(new_node);
1261 SET_IA32_ORIG_NODE(new_node, node);
1264 /* add with immediate? */
1265 if (addr.index == NULL) {
1266 add_immediate_op = addr.base;
1267 } else if (addr.base == NULL && addr.scale == 0) {
1268 add_immediate_op = addr.index;
1271 if (add_immediate_op != NULL) {
1272 if (!am_has_immediates(&addr)) {
1273 #ifdef DEBUG_libfirm
1274 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1277 return be_transform_node(add_immediate_op);
1280 new_node = create_lea_from_address(dbgi, new_block, &addr);
1281 SET_IA32_ORIG_NODE(new_node, node);
1285 /* test if we can use source address mode */
1286 match_arguments(&am, block, op1, op2, NULL, match_commutative
1287 | match_mode_neutral | match_am | match_immediate | match_try_am);
1289 /* construct an Add with source address mode */
1290 if (am.op_type == ia32_AddrModeS) {
1291 ia32_address_t *am_addr = &am.addr;
1292 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1293 am_addr->index, am_addr->mem, am.new_op1,
1295 set_am_attributes(new_node, &am);
1296 SET_IA32_ORIG_NODE(new_node, node);
1298 new_node = fix_mem_proj(new_node, &am);
1303 /* otherwise construct a lea */
1304 new_node = create_lea_from_address(dbgi, new_block, &addr);
1305 SET_IA32_ORIG_NODE(new_node, node);
1310 * Creates an ia32 Mul.
1312 * @return the created ia32 Mul node
1314 static ir_node *gen_Mul(ir_node *node)
1316 ir_node *op1 = get_Mul_left(node);
1317 ir_node *op2 = get_Mul_right(node);
1318 ir_mode *mode = get_irn_mode(node);
1320 if (mode_is_float(mode)) {
1321 if (ia32_cg_config.use_sse2)
1322 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1323 match_commutative | match_am);
1325 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1327 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1328 match_commutative | match_am | match_mode_neutral |
1329 match_immediate | match_am_and_immediates);
1333 * Creates an ia32 Mulh.
1334 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1335 * this result while Mul returns the lower 32 bit.
1337 * @return the created ia32 Mulh node
1339 static ir_node *gen_Mulh(ir_node *node)
1341 dbg_info *dbgi = get_irn_dbg_info(node);
1342 ir_node *op1 = get_Mulh_left(node);
1343 ir_node *op2 = get_Mulh_right(node);
1344 ir_mode *mode = get_irn_mode(node);
1346 ir_node *proj_res_high;
1348 if (get_mode_size_bits(mode) != 32) {
1349 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1352 if (mode_is_signed(mode)) {
1353 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1354 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1356 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1357 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1359 return proj_res_high;
1363 * Creates an ia32 And.
1365 * @return The created ia32 And node
1367 static ir_node *gen_And(ir_node *node)
1369 ir_node *op1 = get_And_left(node);
1370 ir_node *op2 = get_And_right(node);
1371 assert(! mode_is_float(get_irn_mode(node)));
1373 /* is it a zero extension? */
1374 if (is_Const(op2)) {
1375 tarval *tv = get_Const_tarval(op2);
1376 long v = get_tarval_long(tv);
1378 if (v == 0xFF || v == 0xFFFF) {
1379 dbg_info *dbgi = get_irn_dbg_info(node);
1380 ir_node *block = get_nodes_block(node);
1387 assert(v == 0xFFFF);
1390 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1395 return gen_binop(node, op1, op2, new_bd_ia32_And,
1396 match_commutative | match_mode_neutral | match_am | match_immediate);
1402 * Creates an ia32 Or.
1404 * @return The created ia32 Or node
1406 static ir_node *gen_Or(ir_node *node)
1408 ir_node *op1 = get_Or_left(node);
1409 ir_node *op2 = get_Or_right(node);
1411 assert (! mode_is_float(get_irn_mode(node)));
1412 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1413 | match_mode_neutral | match_am | match_immediate);
1419 * Creates an ia32 Eor.
1421 * @return The created ia32 Eor node
1423 static ir_node *gen_Eor(ir_node *node)
1425 ir_node *op1 = get_Eor_left(node);
1426 ir_node *op2 = get_Eor_right(node);
1428 assert(! mode_is_float(get_irn_mode(node)));
1429 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1430 | match_mode_neutral | match_am | match_immediate);
1435 * Creates an ia32 Sub.
1437 * @return The created ia32 Sub node
1439 static ir_node *gen_Sub(ir_node *node)
1441 ir_node *op1 = get_Sub_left(node);
1442 ir_node *op2 = get_Sub_right(node);
1443 ir_mode *mode = get_irn_mode(node);
1445 if (mode_is_float(mode)) {
1446 if (ia32_cg_config.use_sse2)
1447 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1449 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1452 if (is_Const(op2)) {
1453 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1457 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1458 | match_am | match_immediate);
1461 static ir_node *transform_AM_mem(ir_node *const block,
1462 ir_node *const src_val,
1463 ir_node *const src_mem,
1464 ir_node *const am_mem)
1466 if (is_NoMem(am_mem)) {
1467 return be_transform_node(src_mem);
1468 } else if (is_Proj(src_val) &&
1470 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1471 /* avoid memory loop */
1473 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1474 ir_node *const ptr_pred = get_Proj_pred(src_val);
1475 int const arity = get_Sync_n_preds(src_mem);
1480 NEW_ARR_A(ir_node*, ins, arity + 1);
1482 /* NOTE: This sometimes produces dead-code because the old sync in
1483 * src_mem might not be used anymore, we should detect this case
1484 * and kill the sync... */
1485 for (i = arity - 1; i >= 0; --i) {
1486 ir_node *const pred = get_Sync_pred(src_mem, i);
1488 /* avoid memory loop */
1489 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1492 ins[n++] = be_transform_node(pred);
1497 return new_r_Sync(block, n, ins);
1501 ins[0] = be_transform_node(src_mem);
1503 return new_r_Sync(block, 2, ins);
1508 * Create a 32bit to 64bit signed extension.
1510 * @param dbgi debug info
1511 * @param block the block where node nodes should be placed
1512 * @param val the value to extend
1513 * @param orig the original node
1515 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1516 ir_node *val, const ir_node *orig)
1521 if (ia32_cg_config.use_short_sex_eax) {
1522 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1523 be_dep_on_frame(pval);
1524 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1526 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1527 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1529 SET_IA32_ORIG_NODE(res, orig);
1534 * Generates an ia32 DivMod with additional infrastructure for the
1535 * register allocator if needed.
1537 static ir_node *create_Div(ir_node *node)
1539 dbg_info *dbgi = get_irn_dbg_info(node);
1540 ir_node *block = get_nodes_block(node);
1541 ir_node *new_block = be_transform_node(block);
1548 ir_node *sign_extension;
1549 ia32_address_mode_t am;
1550 ia32_address_t *addr = &am.addr;
1552 /* the upper bits have random contents for smaller modes */
1553 switch (get_irn_opcode(node)) {
1555 op1 = get_Div_left(node);
1556 op2 = get_Div_right(node);
1557 mem = get_Div_mem(node);
1558 mode = get_Div_resmode(node);
1561 op1 = get_Mod_left(node);
1562 op2 = get_Mod_right(node);
1563 mem = get_Mod_mem(node);
1564 mode = get_Mod_resmode(node);
1567 op1 = get_DivMod_left(node);
1568 op2 = get_DivMod_right(node);
1569 mem = get_DivMod_mem(node);
1570 mode = get_DivMod_resmode(node);
1573 panic("invalid divmod node %+F", node);
1576 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1578 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1579 is the memory of the consumed address. We can have only the second op as address
1580 in Div nodes, so check only op2. */
1581 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1583 if (mode_is_signed(mode)) {
1584 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1585 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1586 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1588 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1589 be_dep_on_frame(sign_extension);
1591 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1592 addr->index, new_mem, am.new_op2,
1593 am.new_op1, sign_extension);
1596 set_irn_pinned(new_node, get_irn_pinned(node));
1598 set_am_attributes(new_node, &am);
1599 SET_IA32_ORIG_NODE(new_node, node);
1601 new_node = fix_mem_proj(new_node, &am);
1607 * Generates an ia32 Mod.
1609 static ir_node *gen_Mod(ir_node *node)
1611 return create_Div(node);
1615 * Generates an ia32 Div.
1617 static ir_node *gen_Div(ir_node *node)
1619 return create_Div(node);
1623 * Generates an ia32 DivMod.
1625 static ir_node *gen_DivMod(ir_node *node)
1627 return create_Div(node);
1633 * Creates an ia32 floating Div.
1635 * @return The created ia32 xDiv node
1637 static ir_node *gen_Quot(ir_node *node)
1639 ir_node *op1 = get_Quot_left(node);
1640 ir_node *op2 = get_Quot_right(node);
1642 if (ia32_cg_config.use_sse2) {
1643 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1645 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1651 * Creates an ia32 Shl.
1653 * @return The created ia32 Shl node
1655 static ir_node *gen_Shl(ir_node *node)
1657 ir_node *left = get_Shl_left(node);
1658 ir_node *right = get_Shl_right(node);
1660 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1661 match_mode_neutral | match_immediate);
1665 * Creates an ia32 Shr.
1667 * @return The created ia32 Shr node
1669 static ir_node *gen_Shr(ir_node *node)
1671 ir_node *left = get_Shr_left(node);
1672 ir_node *right = get_Shr_right(node);
1674 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1680 * Creates an ia32 Sar.
1682 * @return The created ia32 Shrs node
1684 static ir_node *gen_Shrs(ir_node *node)
1686 ir_node *left = get_Shrs_left(node);
1687 ir_node *right = get_Shrs_right(node);
1689 if (is_Const(right)) {
1690 tarval *tv = get_Const_tarval(right);
1691 long val = get_tarval_long(tv);
1693 /* this is a sign extension */
1694 dbg_info *dbgi = get_irn_dbg_info(node);
1695 ir_node *block = be_transform_node(get_nodes_block(node));
1696 ir_node *new_op = be_transform_node(left);
1698 return create_sex_32_64(dbgi, block, new_op, node);
1702 /* 8 or 16 bit sign extension? */
1703 if (is_Const(right) && is_Shl(left)) {
1704 ir_node *shl_left = get_Shl_left(left);
1705 ir_node *shl_right = get_Shl_right(left);
1706 if (is_Const(shl_right)) {
1707 tarval *tv1 = get_Const_tarval(right);
1708 tarval *tv2 = get_Const_tarval(shl_right);
1709 if (tv1 == tv2 && tarval_is_long(tv1)) {
1710 long val = get_tarval_long(tv1);
1711 if (val == 16 || val == 24) {
1712 dbg_info *dbgi = get_irn_dbg_info(node);
1713 ir_node *block = get_nodes_block(node);
1723 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1732 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1738 * Creates an ia32 Rol.
1740 * @param op1 The first operator
1741 * @param op2 The second operator
1742 * @return The created ia32 RotL node
1744 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1746 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1752 * Creates an ia32 Ror.
1753 * NOTE: There is no RotR with immediate because this would always be a RotL
1754 * "imm-mode_size_bits" which can be pre-calculated.
1756 * @param op1 The first operator
1757 * @param op2 The second operator
1758 * @return The created ia32 RotR node
1760 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1762 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1768 * Creates an ia32 RotR or RotL (depending on the found pattern).
1770 * @return The created ia32 RotL or RotR node
1772 static ir_node *gen_Rotl(ir_node *node)
1774 ir_node *rotate = NULL;
1775 ir_node *op1 = get_Rotl_left(node);
1776 ir_node *op2 = get_Rotl_right(node);
1778 /* Firm has only RotL, so we are looking for a right (op2)
1779 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1780 that means we can create a RotR instead of an Add and a RotL */
1784 ir_node *left = get_Add_left(add);
1785 ir_node *right = get_Add_right(add);
1786 if (is_Const(right)) {
1787 tarval *tv = get_Const_tarval(right);
1788 ir_mode *mode = get_irn_mode(node);
1789 long bits = get_mode_size_bits(mode);
1791 if (is_Minus(left) &&
1792 tarval_is_long(tv) &&
1793 get_tarval_long(tv) == bits &&
1796 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1797 rotate = gen_Ror(node, op1, get_Minus_op(left));
1802 if (rotate == NULL) {
1803 rotate = gen_Rol(node, op1, op2);
1812 * Transforms a Minus node.
1814 * @return The created ia32 Minus node
1816 static ir_node *gen_Minus(ir_node *node)
1818 ir_node *op = get_Minus_op(node);
1819 ir_node *block = be_transform_node(get_nodes_block(node));
1820 dbg_info *dbgi = get_irn_dbg_info(node);
1821 ir_mode *mode = get_irn_mode(node);
1826 if (mode_is_float(mode)) {
1827 ir_node *new_op = be_transform_node(op);
1828 if (ia32_cg_config.use_sse2) {
1829 /* TODO: non-optimal... if we have many xXors, then we should
1830 * rather create a load for the const and use that instead of
1831 * several AM nodes... */
1832 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1834 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1835 noreg_GP, nomem, new_op, noreg_xmm);
1837 size = get_mode_size_bits(mode);
1838 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1840 set_ia32_am_sc(new_node, ent);
1841 set_ia32_op_type(new_node, ia32_AddrModeS);
1842 set_ia32_ls_mode(new_node, mode);
1844 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1847 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1850 SET_IA32_ORIG_NODE(new_node, node);
1856 * Transforms a Not node.
1858 * @return The created ia32 Not node
1860 static ir_node *gen_Not(ir_node *node)
1862 ir_node *op = get_Not_op(node);
1864 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1865 assert (! mode_is_float(get_irn_mode(node)));
1867 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1873 * Transforms an Abs node.
1875 * @return The created ia32 Abs node
1877 static ir_node *gen_Abs(ir_node *node)
1879 ir_node *block = get_nodes_block(node);
1880 ir_node *new_block = be_transform_node(block);
1881 ir_node *op = get_Abs_op(node);
1882 dbg_info *dbgi = get_irn_dbg_info(node);
1883 ir_mode *mode = get_irn_mode(node);
1889 if (mode_is_float(mode)) {
1890 new_op = be_transform_node(op);
1892 if (ia32_cg_config.use_sse2) {
1893 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1894 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1895 noreg_GP, nomem, new_op, noreg_fp);
1897 size = get_mode_size_bits(mode);
1898 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1900 set_ia32_am_sc(new_node, ent);
1902 SET_IA32_ORIG_NODE(new_node, node);
1904 set_ia32_op_type(new_node, ia32_AddrModeS);
1905 set_ia32_ls_mode(new_node, mode);
1907 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1908 SET_IA32_ORIG_NODE(new_node, node);
1911 ir_node *xor, *sign_extension;
1913 if (get_mode_size_bits(mode) == 32) {
1914 new_op = be_transform_node(op);
1916 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1919 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1921 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1922 nomem, new_op, sign_extension);
1923 SET_IA32_ORIG_NODE(xor, node);
1925 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1926 nomem, xor, sign_extension);
1927 SET_IA32_ORIG_NODE(new_node, node);
1934 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1936 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1938 dbg_info *dbgi = get_irn_dbg_info(cmp);
1939 ir_node *block = get_nodes_block(cmp);
1940 ir_node *new_block = be_transform_node(block);
1941 ir_node *op1 = be_transform_node(x);
1942 ir_node *op2 = be_transform_node(n);
1944 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1948 * Transform a node returning a "flag" result.
1950 * @param node the node to transform
1951 * @param pnc_out the compare mode to use
1953 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1960 /* we have a Cmp as input */
1961 if (is_Proj(node)) {
1962 ir_node *pred = get_Proj_pred(node);
1964 pn_Cmp pnc = get_Proj_proj(node);
1965 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1966 ir_node *l = get_Cmp_left(pred);
1967 ir_node *r = get_Cmp_right(pred);
1969 ir_node *la = get_And_left(l);
1970 ir_node *ra = get_And_right(l);
1972 ir_node *c = get_Shl_left(la);
1973 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1974 /* (1 << n) & ra) */
1975 ir_node *n = get_Shl_right(la);
1976 flags = gen_bt(pred, ra, n);
1977 /* we must generate a Jc/Jnc jump */
1978 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1981 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1986 ir_node *c = get_Shl_left(ra);
1987 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1988 /* la & (1 << n)) */
1989 ir_node *n = get_Shl_right(ra);
1990 flags = gen_bt(pred, la, n);
1991 /* we must generate a Jc/Jnc jump */
1992 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1995 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
2001 /* add ia32 compare flags */
2003 ir_node *l = get_Cmp_left(pred);
2004 ir_mode *mode = get_irn_mode(l);
2005 if (mode_is_float(mode))
2006 pnc |= ia32_pn_Cmp_float;
2007 else if (! mode_is_signed(mode))
2008 pnc |= ia32_pn_Cmp_unsigned;
2011 flags = be_transform_node(pred);
2016 /* a mode_b value, we have to compare it against 0 */
2017 dbgi = get_irn_dbg_info(node);
2018 new_block = be_transform_node(get_nodes_block(node));
2019 new_op = be_transform_node(node);
2020 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
2021 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
2022 *pnc_out = pn_Cmp_Lg;
2027 * Transforms a Load.
2029 * @return the created ia32 Load node
2031 static ir_node *gen_Load(ir_node *node)
2033 ir_node *old_block = get_nodes_block(node);
2034 ir_node *block = be_transform_node(old_block);
2035 ir_node *ptr = get_Load_ptr(node);
2036 ir_node *mem = get_Load_mem(node);
2037 ir_node *new_mem = be_transform_node(mem);
2040 dbg_info *dbgi = get_irn_dbg_info(node);
2041 ir_mode *mode = get_Load_mode(node);
2043 ia32_address_t addr;
2045 /* construct load address */
2046 memset(&addr, 0, sizeof(addr));
2047 ia32_create_address_mode(&addr, ptr, 0);
2054 base = be_transform_node(base);
2057 if (index == NULL) {
2060 index = be_transform_node(index);
2063 if (mode_is_float(mode)) {
2064 if (ia32_cg_config.use_sse2) {
2065 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2068 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2072 assert(mode != mode_b);
2074 /* create a conv node with address mode for smaller modes */
2075 if (get_mode_size_bits(mode) < 32) {
2076 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2077 new_mem, noreg_GP, mode);
2079 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2083 set_irn_pinned(new_node, get_irn_pinned(node));
2084 set_ia32_op_type(new_node, ia32_AddrModeS);
2085 set_ia32_ls_mode(new_node, mode);
2086 set_address(new_node, &addr);
2088 if (get_irn_pinned(node) == op_pin_state_floats) {
2089 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2090 && pn_ia32_vfld_res == pn_ia32_Load_res
2091 && pn_ia32_Load_res == pn_ia32_res);
2092 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2095 SET_IA32_ORIG_NODE(new_node, node);
2097 be_dep_on_frame(new_node);
2101 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2102 ir_node *ptr, ir_node *other)
2109 /* we only use address mode if we're the only user of the load */
2110 if (get_irn_n_edges(node) > 1)
2113 load = get_Proj_pred(node);
2116 if (get_nodes_block(load) != block)
2119 /* store should have the same pointer as the load */
2120 if (get_Load_ptr(load) != ptr)
2123 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2124 if (other != NULL &&
2125 get_nodes_block(other) == block &&
2126 heights_reachable_in_block(heights, other, load)) {
2130 if (prevents_AM(block, load, mem))
2132 /* Store should be attached to the load via mem */
2133 assert(heights_reachable_in_block(heights, mem, load));
2138 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2139 ir_node *mem, ir_node *ptr, ir_mode *mode,
2140 construct_binop_dest_func *func,
2141 construct_binop_dest_func *func8bit,
2142 match_flags_t flags)
2144 ir_node *src_block = get_nodes_block(node);
2152 ia32_address_mode_t am;
2153 ia32_address_t *addr = &am.addr;
2154 memset(&am, 0, sizeof(am));
2156 assert(flags & match_immediate); /* there is no destam node without... */
2157 commutative = (flags & match_commutative) != 0;
2159 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2160 build_address(&am, op1, ia32_create_am_double_use);
2161 new_op = create_immediate_or_transform(op2, 0);
2162 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2163 build_address(&am, op2, ia32_create_am_double_use);
2164 new_op = create_immediate_or_transform(op1, 0);
2169 if (addr->base == NULL)
2170 addr->base = noreg_GP;
2171 if (addr->index == NULL)
2172 addr->index = noreg_GP;
2173 if (addr->mem == NULL)
2176 dbgi = get_irn_dbg_info(node);
2177 block = be_transform_node(src_block);
2178 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2180 if (get_mode_size_bits(mode) == 8) {
2181 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2183 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2185 set_address(new_node, addr);
2186 set_ia32_op_type(new_node, ia32_AddrModeD);
2187 set_ia32_ls_mode(new_node, mode);
2188 SET_IA32_ORIG_NODE(new_node, node);
2190 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2191 mem_proj = be_transform_node(am.mem_proj);
2192 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2197 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2198 ir_node *ptr, ir_mode *mode,
2199 construct_unop_dest_func *func)
2201 ir_node *src_block = get_nodes_block(node);
2207 ia32_address_mode_t am;
2208 ia32_address_t *addr = &am.addr;
2210 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2213 memset(&am, 0, sizeof(am));
2214 build_address(&am, op, ia32_create_am_double_use);
2216 dbgi = get_irn_dbg_info(node);
2217 block = be_transform_node(src_block);
2218 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2219 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2220 set_address(new_node, addr);
2221 set_ia32_op_type(new_node, ia32_AddrModeD);
2222 set_ia32_ls_mode(new_node, mode);
2223 SET_IA32_ORIG_NODE(new_node, node);
2225 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2226 mem_proj = be_transform_node(am.mem_proj);
2227 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2232 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2234 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2235 return get_negated_pnc(pnc, mode);
2238 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2240 ir_mode *mode = get_irn_mode(node);
2241 ir_node *mux_true = get_Mux_true(node);
2242 ir_node *mux_false = get_Mux_false(node);
2251 ia32_address_t addr;
2253 if (get_mode_size_bits(mode) != 8)
2256 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2258 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2264 cond = get_Mux_sel(node);
2265 flags = get_flags_node(cond, &pnc);
2266 /* we can't handle the float special cases with SetM */
2267 if (pnc & ia32_pn_Cmp_float)
2270 pnc = ia32_get_negated_pnc(pnc);
2272 build_address_ptr(&addr, ptr, mem);
2274 dbgi = get_irn_dbg_info(node);
2275 block = get_nodes_block(node);
2276 new_block = be_transform_node(block);
2277 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2278 addr.index, addr.mem, flags, pnc);
2279 set_address(new_node, &addr);
2280 set_ia32_op_type(new_node, ia32_AddrModeD);
2281 set_ia32_ls_mode(new_node, mode);
2282 SET_IA32_ORIG_NODE(new_node, node);
2287 static ir_node *try_create_dest_am(ir_node *node)
2289 ir_node *val = get_Store_value(node);
2290 ir_node *mem = get_Store_mem(node);
2291 ir_node *ptr = get_Store_ptr(node);
2292 ir_mode *mode = get_irn_mode(val);
2293 unsigned bits = get_mode_size_bits(mode);
2298 /* handle only GP modes for now... */
2299 if (!ia32_mode_needs_gp_reg(mode))
2303 /* store must be the only user of the val node */
2304 if (get_irn_n_edges(val) > 1)
2306 /* skip pointless convs */
2308 ir_node *conv_op = get_Conv_op(val);
2309 ir_mode *pred_mode = get_irn_mode(conv_op);
2310 if (!ia32_mode_needs_gp_reg(pred_mode))
2312 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2320 /* value must be in the same block */
2321 if (get_nodes_block(node) != get_nodes_block(val))
2324 switch (get_irn_opcode(val)) {
2326 op1 = get_Add_left(val);
2327 op2 = get_Add_right(val);
2328 if (ia32_cg_config.use_incdec) {
2329 if (is_Const_1(op2)) {
2330 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2332 } else if (is_Const_Minus_1(op2)) {
2333 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2337 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2338 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2339 match_commutative | match_immediate);
2342 op1 = get_Sub_left(val);
2343 op2 = get_Sub_right(val);
2344 if (is_Const(op2)) {
2345 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2347 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2348 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2352 op1 = get_And_left(val);
2353 op2 = get_And_right(val);
2354 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2355 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2356 match_commutative | match_immediate);
2359 op1 = get_Or_left(val);
2360 op2 = get_Or_right(val);
2361 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2362 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2363 match_commutative | match_immediate);
2366 op1 = get_Eor_left(val);
2367 op2 = get_Eor_right(val);
2368 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2369 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2370 match_commutative | match_immediate);
2373 op1 = get_Shl_left(val);
2374 op2 = get_Shl_right(val);
2375 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2376 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2380 op1 = get_Shr_left(val);
2381 op2 = get_Shr_right(val);
2382 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2383 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2387 op1 = get_Shrs_left(val);
2388 op2 = get_Shrs_right(val);
2389 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2390 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2394 op1 = get_Rotl_left(val);
2395 op2 = get_Rotl_right(val);
2396 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2397 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2400 /* TODO: match ROR patterns... */
2402 new_node = try_create_SetMem(val, ptr, mem);
2406 op1 = get_Minus_op(val);
2407 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2410 /* should be lowered already */
2411 assert(mode != mode_b);
2412 op1 = get_Not_op(val);
2413 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2419 if (new_node != NULL) {
2420 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2421 get_irn_pinned(node) == op_pin_state_pinned) {
2422 set_irn_pinned(new_node, op_pin_state_pinned);
2429 static bool possible_int_mode_for_fp(ir_mode *mode)
2433 if (!mode_is_signed(mode))
2435 size = get_mode_size_bits(mode);
2436 if (size != 16 && size != 32)
2441 static int is_float_to_int_conv(const ir_node *node)
2443 ir_mode *mode = get_irn_mode(node);
2447 if (!possible_int_mode_for_fp(mode))
2452 conv_op = get_Conv_op(node);
2453 conv_mode = get_irn_mode(conv_op);
2455 if (!mode_is_float(conv_mode))
2462 * Transform a Store(floatConst) into a sequence of
2465 * @return the created ia32 Store node
2467 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2469 ir_mode *mode = get_irn_mode(cns);
2470 unsigned size = get_mode_size_bytes(mode);
2471 tarval *tv = get_Const_tarval(cns);
2472 ir_node *block = get_nodes_block(node);
2473 ir_node *new_block = be_transform_node(block);
2474 ir_node *ptr = get_Store_ptr(node);
2475 ir_node *mem = get_Store_mem(node);
2476 dbg_info *dbgi = get_irn_dbg_info(node);
2480 ia32_address_t addr;
2482 assert(size % 4 == 0);
2485 build_address_ptr(&addr, ptr, mem);
2489 get_tarval_sub_bits(tv, ofs) |
2490 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2491 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2492 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2493 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2495 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2496 addr.index, addr.mem, imm);
2498 set_irn_pinned(new_node, get_irn_pinned(node));
2499 set_ia32_op_type(new_node, ia32_AddrModeD);
2500 set_ia32_ls_mode(new_node, mode_Iu);
2501 set_address(new_node, &addr);
2502 SET_IA32_ORIG_NODE(new_node, node);
2505 ins[i++] = new_node;
2510 } while (size != 0);
2513 return new_rd_Sync(dbgi, new_block, i, ins);
2520 * Generate a vfist or vfisttp instruction.
2522 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2523 ir_node *mem, ir_node *val, ir_node **fist)
2527 if (ia32_cg_config.use_fisttp) {
2528 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2529 if other users exists */
2530 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2531 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2532 be_new_Keep(block, 1, &value);
2534 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2537 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2540 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2546 * Transforms a general (no special case) Store.
2548 * @return the created ia32 Store node
2550 static ir_node *gen_general_Store(ir_node *node)
2552 ir_node *val = get_Store_value(node);
2553 ir_mode *mode = get_irn_mode(val);
2554 ir_node *block = get_nodes_block(node);
2555 ir_node *new_block = be_transform_node(block);
2556 ir_node *ptr = get_Store_ptr(node);
2557 ir_node *mem = get_Store_mem(node);
2558 dbg_info *dbgi = get_irn_dbg_info(node);
2559 ir_node *new_val, *new_node, *store;
2560 ia32_address_t addr;
2562 /* check for destination address mode */
2563 new_node = try_create_dest_am(node);
2564 if (new_node != NULL)
2567 /* construct store address */
2568 memset(&addr, 0, sizeof(addr));
2569 ia32_create_address_mode(&addr, ptr, 0);
2571 if (addr.base == NULL) {
2572 addr.base = noreg_GP;
2574 addr.base = be_transform_node(addr.base);
2577 if (addr.index == NULL) {
2578 addr.index = noreg_GP;
2580 addr.index = be_transform_node(addr.index);
2582 addr.mem = be_transform_node(mem);
2584 if (mode_is_float(mode)) {
2585 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2587 while (is_Conv(val) && mode == get_irn_mode(val)) {
2588 ir_node *op = get_Conv_op(val);
2589 if (!mode_is_float(get_irn_mode(op)))
2593 new_val = be_transform_node(val);
2594 if (ia32_cg_config.use_sse2) {
2595 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2596 addr.index, addr.mem, new_val);
2598 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2599 addr.index, addr.mem, new_val, mode);
2602 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2603 val = get_Conv_op(val);
2605 /* TODO: is this optimisation still necessary at all (middleend)? */
2606 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2607 while (is_Conv(val)) {
2608 ir_node *op = get_Conv_op(val);
2609 if (!mode_is_float(get_irn_mode(op)))
2611 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2615 new_val = be_transform_node(val);
2616 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2618 new_val = create_immediate_or_transform(val, 0);
2619 assert(mode != mode_b);
2621 if (get_mode_size_bits(mode) == 8) {
2622 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2623 addr.index, addr.mem, new_val);
2625 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2626 addr.index, addr.mem, new_val);
2631 set_irn_pinned(store, get_irn_pinned(node));
2632 set_ia32_op_type(store, ia32_AddrModeD);
2633 set_ia32_ls_mode(store, mode);
2635 set_address(store, &addr);
2636 SET_IA32_ORIG_NODE(store, node);
2642 * Transforms a Store.
2644 * @return the created ia32 Store node
2646 static ir_node *gen_Store(ir_node *node)
2648 ir_node *val = get_Store_value(node);
2649 ir_mode *mode = get_irn_mode(val);
2651 if (mode_is_float(mode) && is_Const(val)) {
2652 /* We can transform every floating const store
2653 into a sequence of integer stores.
2654 If the constant is already in a register,
2655 it would be better to use it, but we don't
2656 have this information here. */
2657 return gen_float_const_Store(node, val);
2659 return gen_general_Store(node);
2663 * Transforms a Switch.
2665 * @return the created ia32 SwitchJmp node
2667 static ir_node *create_Switch(ir_node *node)
2669 dbg_info *dbgi = get_irn_dbg_info(node);
2670 ir_node *block = be_transform_node(get_nodes_block(node));
2671 ir_node *sel = get_Cond_selector(node);
2672 ir_node *new_sel = be_transform_node(sel);
2673 long switch_min = LONG_MAX;
2674 long switch_max = LONG_MIN;
2675 long default_pn = get_Cond_default_proj(node);
2677 const ir_edge_t *edge;
2679 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2681 /* determine the smallest switch case value */
2682 foreach_out_edge(node, edge) {
2683 ir_node *proj = get_edge_src_irn(edge);
2684 long pn = get_Proj_proj(proj);
2685 if (pn == default_pn)
2688 if (pn < switch_min)
2690 if (pn > switch_max)
2694 if ((unsigned long) (switch_max - switch_min) > 128000) {
2695 panic("Size of switch %+F bigger than 128000", node);
2698 if (switch_min != 0) {
2699 /* if smallest switch case is not 0 we need an additional sub */
2700 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2701 add_ia32_am_offs_int(new_sel, -switch_min);
2702 set_ia32_op_type(new_sel, ia32_AddrModeS);
2704 SET_IA32_ORIG_NODE(new_sel, node);
2707 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2708 SET_IA32_ORIG_NODE(new_node, node);
2714 * Transform a Cond node.
2716 static ir_node *gen_Cond(ir_node *node)
2718 ir_node *block = get_nodes_block(node);
2719 ir_node *new_block = be_transform_node(block);
2720 dbg_info *dbgi = get_irn_dbg_info(node);
2721 ir_node *sel = get_Cond_selector(node);
2722 ir_mode *sel_mode = get_irn_mode(sel);
2723 ir_node *flags = NULL;
2727 if (sel_mode != mode_b) {
2728 return create_Switch(node);
2731 /* we get flags from a Cmp */
2732 flags = get_flags_node(sel, &pnc);
2734 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2735 SET_IA32_ORIG_NODE(new_node, node);
2741 * Transform a be_Copy.
2743 static ir_node *gen_be_Copy(ir_node *node)
2745 ir_node *new_node = be_duplicate_node(node);
2746 ir_mode *mode = get_irn_mode(new_node);
2748 if (ia32_mode_needs_gp_reg(mode)) {
2749 set_irn_mode(new_node, mode_Iu);
2755 static ir_node *create_Fucom(ir_node *node)
2757 dbg_info *dbgi = get_irn_dbg_info(node);
2758 ir_node *block = get_nodes_block(node);
2759 ir_node *new_block = be_transform_node(block);
2760 ir_node *left = get_Cmp_left(node);
2761 ir_node *new_left = be_transform_node(left);
2762 ir_node *right = get_Cmp_right(node);
2766 if (ia32_cg_config.use_fucomi) {
2767 new_right = be_transform_node(right);
2768 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2770 set_ia32_commutative(new_node);
2771 SET_IA32_ORIG_NODE(new_node, node);
2773 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2774 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2776 new_right = be_transform_node(right);
2777 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2780 set_ia32_commutative(new_node);
2782 SET_IA32_ORIG_NODE(new_node, node);
2784 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2785 SET_IA32_ORIG_NODE(new_node, node);
2791 static ir_node *create_Ucomi(ir_node *node)
2793 dbg_info *dbgi = get_irn_dbg_info(node);
2794 ir_node *src_block = get_nodes_block(node);
2795 ir_node *new_block = be_transform_node(src_block);
2796 ir_node *left = get_Cmp_left(node);
2797 ir_node *right = get_Cmp_right(node);
2799 ia32_address_mode_t am;
2800 ia32_address_t *addr = &am.addr;
2802 match_arguments(&am, src_block, left, right, NULL,
2803 match_commutative | match_am);
2805 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2806 addr->mem, am.new_op1, am.new_op2,
2808 set_am_attributes(new_node, &am);
2810 SET_IA32_ORIG_NODE(new_node, node);
2812 new_node = fix_mem_proj(new_node, &am);
2818 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2819 * to fold an and into a test node
2821 static bool can_fold_test_and(ir_node *node)
2823 const ir_edge_t *edge;
2825 /** we can only have eq and lg projs */
2826 foreach_out_edge(node, edge) {
2827 ir_node *proj = get_edge_src_irn(edge);
2828 pn_Cmp pnc = get_Proj_proj(proj);
2829 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2837 * returns true if it is assured, that the upper bits of a node are "clean"
2838 * which means for a 16 or 8 bit value, that the upper bits in the register
2839 * are 0 for unsigned and a copy of the last significant bit for signed
2842 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2844 assert(ia32_mode_needs_gp_reg(mode));
2845 if (get_mode_size_bits(mode) >= 32)
2848 if (is_Proj(transformed_node))
2849 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2851 switch (get_ia32_irn_opcode(transformed_node)) {
2852 case iro_ia32_Conv_I2I:
2853 case iro_ia32_Conv_I2I8Bit: {
2854 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2855 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2857 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2864 if (mode_is_signed(mode)) {
2865 return false; /* TODO handle signed modes */
2867 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2868 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2869 const ia32_immediate_attr_t *attr
2870 = get_ia32_immediate_attr_const(right);
2871 if (attr->symconst == 0 &&
2872 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2876 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2880 /* TODO too conservative if shift amount is constant */
2881 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2884 if (!mode_is_signed(mode)) {
2886 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2887 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2889 /* TODO if one is known to be zero extended, then || is sufficient */
2894 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2895 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2897 case iro_ia32_Const:
2898 case iro_ia32_Immediate: {
2899 const ia32_immediate_attr_t *attr =
2900 get_ia32_immediate_attr_const(transformed_node);
2901 if (mode_is_signed(mode)) {
2902 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2903 return shifted == 0 || shifted == -1;
2905 unsigned long shifted = (unsigned long)attr->offset;
2906 shifted >>= get_mode_size_bits(mode);
2907 return shifted == 0;
2917 * Generate code for a Cmp.
2919 static ir_node *gen_Cmp(ir_node *node)
2921 dbg_info *dbgi = get_irn_dbg_info(node);
2922 ir_node *block = get_nodes_block(node);
2923 ir_node *new_block = be_transform_node(block);
2924 ir_node *left = get_Cmp_left(node);
2925 ir_node *right = get_Cmp_right(node);
2926 ir_mode *cmp_mode = get_irn_mode(left);
2928 ia32_address_mode_t am;
2929 ia32_address_t *addr = &am.addr;
2932 if (mode_is_float(cmp_mode)) {
2933 if (ia32_cg_config.use_sse2) {
2934 return create_Ucomi(node);
2936 return create_Fucom(node);
2940 assert(ia32_mode_needs_gp_reg(cmp_mode));
2942 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2943 cmp_unsigned = !mode_is_signed(cmp_mode);
2944 if (is_Const_0(right) &&
2946 get_irn_n_edges(left) == 1 &&
2947 can_fold_test_and(node)) {
2948 /* Test(and_left, and_right) */
2949 ir_node *and_left = get_And_left(left);
2950 ir_node *and_right = get_And_right(left);
2952 /* matze: code here used mode instead of cmd_mode, I think it is always
2953 * the same as cmp_mode, but I leave this here to see if this is really
2956 assert(get_irn_mode(and_left) == cmp_mode);
2958 match_arguments(&am, block, and_left, and_right, NULL,
2960 match_am | match_8bit_am | match_16bit_am |
2961 match_am_and_immediates | match_immediate);
2963 /* use 32bit compare mode if possible since the opcode is smaller */
2964 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2965 upper_bits_clean(am.new_op2, cmp_mode)) {
2966 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2969 if (get_mode_size_bits(cmp_mode) == 8) {
2970 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2971 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2974 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2975 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2978 /* Cmp(left, right) */
2979 match_arguments(&am, block, left, right, NULL,
2980 match_commutative | match_am | match_8bit_am |
2981 match_16bit_am | match_am_and_immediates |
2983 /* use 32bit compare mode if possible since the opcode is smaller */
2984 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2985 upper_bits_clean(am.new_op2, cmp_mode)) {
2986 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2989 if (get_mode_size_bits(cmp_mode) == 8) {
2990 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2991 addr->index, addr->mem, am.new_op1,
2992 am.new_op2, am.ins_permuted,
2995 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2996 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2999 set_am_attributes(new_node, &am);
3000 set_ia32_ls_mode(new_node, cmp_mode);
3002 SET_IA32_ORIG_NODE(new_node, node);
3004 new_node = fix_mem_proj(new_node, &am);
3009 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3012 dbg_info *dbgi = get_irn_dbg_info(node);
3013 ir_node *block = get_nodes_block(node);
3014 ir_node *new_block = be_transform_node(block);
3015 ir_node *val_true = get_Mux_true(node);
3016 ir_node *val_false = get_Mux_false(node);
3018 ia32_address_mode_t am;
3019 ia32_address_t *addr;
3021 assert(ia32_cg_config.use_cmov);
3022 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3026 match_arguments(&am, block, val_false, val_true, flags,
3027 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3029 if (am.ins_permuted)
3030 pnc = ia32_get_negated_pnc(pnc);
3032 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3033 addr->mem, am.new_op1, am.new_op2, new_flags,
3035 set_am_attributes(new_node, &am);
3037 SET_IA32_ORIG_NODE(new_node, node);
3039 new_node = fix_mem_proj(new_node, &am);
3045 * Creates a ia32 Setcc instruction.
3047 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3048 ir_node *flags, pn_Cmp pnc,
3051 ir_mode *mode = get_irn_mode(orig_node);
3054 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3055 SET_IA32_ORIG_NODE(new_node, orig_node);
3057 /* we might need to conv the result up */
3058 if (get_mode_size_bits(mode) > 8) {
3059 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3060 nomem, new_node, mode_Bu);
3061 SET_IA32_ORIG_NODE(new_node, orig_node);
3068 * Create instruction for an unsigned Difference or Zero.
3070 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3072 ir_mode *mode = get_irn_mode(psi);
3082 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3083 match_mode_neutral | match_am | match_immediate | match_two_users);
3085 block = get_nodes_block(new_node);
3087 if (is_Proj(new_node)) {
3088 sub = get_Proj_pred(new_node);
3089 assert(is_ia32_Sub(sub));
3092 set_irn_mode(sub, mode_T);
3093 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3095 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3097 dbgi = get_irn_dbg_info(psi);
3098 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3099 not = new_bd_ia32_Not(dbgi, block, sbb);
3101 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3102 set_ia32_commutative(new_node);
3107 * Create an const array of two float consts.
3109 * @param c0 the first constant
3110 * @param c1 the second constant
3111 * @param new_mode IN/OUT for the mode of the constants, if NULL
3112 * smallest possible mode will be used
3114 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3117 ir_mode *mode = *new_mode;
3119 ir_initializer_t *initializer;
3120 tarval *tv0 = get_Const_tarval(c0);
3121 tarval *tv1 = get_Const_tarval(c1);
3124 /* detect the best mode for the constants */
3125 mode = get_tarval_mode(tv0);
3127 if (mode != mode_F) {
3128 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3129 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3131 tv0 = tarval_convert_to(tv0, mode);
3132 tv1 = tarval_convert_to(tv1, mode);
3133 } else if (mode != mode_D) {
3134 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3135 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3137 tv0 = tarval_convert_to(tv0, mode);
3138 tv1 = tarval_convert_to(tv1, mode);
3145 tp = ia32_create_float_type(mode, 4);
3146 tp = ia32_create_float_array(tp);
3148 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3150 set_entity_ld_ident(ent, get_entity_ident(ent));
3151 set_entity_visibility(ent, ir_visibility_private);
3152 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3154 initializer = create_initializer_compound(2);
3156 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3157 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3159 set_entity_initializer(ent, initializer);
3166 * Possible transformations for creating a Setcc.
3168 enum setcc_transform_insn {
3181 typedef struct setcc_transform {
3183 unsigned permutate_cmp_ins;
3186 enum setcc_transform_insn transform;
3190 } setcc_transform_t;
3193 * Setcc can only handle 0 and 1 result.
3194 * Find a transformation that creates 0 and 1 from
3197 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f,
3198 setcc_transform_t *res)
3203 res->permutate_cmp_ins = 0;
3205 if (tarval_is_null(t)) {
3209 pnc = ia32_get_negated_pnc(pnc);
3210 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3211 // now, t is the bigger one
3215 pnc = ia32_get_negated_pnc(pnc);
3219 if (! tarval_is_null(f)) {
3220 tarval *t_sub = tarval_sub(t, f, NULL);
3223 res->steps[step].transform = SETCC_TR_ADD;
3225 if (t == tarval_bad)
3226 panic("constant subtract failed");
3227 if (! tarval_is_long(f))
3228 panic("tarval is not long");
3230 res->steps[step].val = get_tarval_long(f);
3232 f = tarval_sub(f, f, NULL);
3233 assert(tarval_is_null(f));
3236 if (tarval_is_one(t)) {
3237 res->steps[step].transform = SETCC_TR_SET;
3238 res->num_steps = ++step;
3242 if (tarval_is_minus_one(t)) {
3243 res->steps[step].transform = SETCC_TR_NEG;
3245 res->steps[step].transform = SETCC_TR_SET;
3246 res->num_steps = ++step;
3249 if (tarval_is_long(t)) {
3250 long v = get_tarval_long(t);
3252 res->steps[step].val = 0;
3255 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3257 res->steps[step].transform = SETCC_TR_LEAxx;
3258 res->steps[step].scale = 3; /* (a << 3) + a */
3261 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3263 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3264 res->steps[step].scale = 3; /* (a << 3) */
3267 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3269 res->steps[step].transform = SETCC_TR_LEAxx;
3270 res->steps[step].scale = 2; /* (a << 2) + a */
3273 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3275 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3276 res->steps[step].scale = 2; /* (a << 2) */
3279 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3281 res->steps[step].transform = SETCC_TR_LEAxx;
3282 res->steps[step].scale = 1; /* (a << 1) + a */
3285 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3287 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3288 res->steps[step].scale = 1; /* (a << 1) */
3291 res->num_steps = step;
3294 if (! tarval_is_single_bit(t)) {
3295 res->steps[step].transform = SETCC_TR_AND;
3296 res->steps[step].val = v;
3298 res->steps[step].transform = SETCC_TR_NEG;
3300 int v = get_tarval_lowest_bit(t);
3303 res->steps[step].transform = SETCC_TR_SHL;
3304 res->steps[step].scale = v;
3308 res->steps[step].transform = SETCC_TR_SET;
3309 res->num_steps = ++step;
3312 panic("tarval is not long");
3316 * Transforms a Mux node into some code sequence.
3318 * @return The transformed node.
3320 static ir_node *gen_Mux(ir_node *node)
3322 dbg_info *dbgi = get_irn_dbg_info(node);
3323 ir_node *block = get_nodes_block(node);
3324 ir_node *new_block = be_transform_node(block);
3325 ir_node *mux_true = get_Mux_true(node);
3326 ir_node *mux_false = get_Mux_false(node);
3327 ir_node *cond = get_Mux_sel(node);
3328 ir_mode *mode = get_irn_mode(node);
3333 assert(get_irn_mode(cond) == mode_b);
3335 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3336 if (mode_is_float(mode)) {
3337 ir_node *cmp = get_Proj_pred(cond);
3338 ir_node *cmp_left = get_Cmp_left(cmp);
3339 ir_node *cmp_right = get_Cmp_right(cmp);
3340 pn_Cmp pnc = get_Proj_proj(cond);
3342 if (ia32_cg_config.use_sse2) {
3343 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3344 if (cmp_left == mux_true && cmp_right == mux_false) {
3345 /* Mux(a <= b, a, b) => MIN */
3346 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3347 match_commutative | match_am | match_two_users);
3348 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3349 /* Mux(a <= b, b, a) => MAX */
3350 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3351 match_commutative | match_am | match_two_users);
3353 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3354 if (cmp_left == mux_true && cmp_right == mux_false) {
3355 /* Mux(a >= b, a, b) => MAX */
3356 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3357 match_commutative | match_am | match_two_users);
3358 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3359 /* Mux(a >= b, b, a) => MIN */
3360 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3361 match_commutative | match_am | match_two_users);
3366 if (is_Const(mux_true) && is_Const(mux_false)) {
3367 ia32_address_mode_t am;
3372 flags = get_flags_node(cond, &pnc);
3373 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3375 if (ia32_cg_config.use_sse2) {
3376 /* cannot load from different mode on SSE */
3379 /* x87 can load any mode */
3383 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3385 switch (get_mode_size_bytes(new_mode)) {
3395 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3396 set_ia32_am_scale(new_node, 2);
3401 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3402 set_ia32_am_scale(new_node, 1);
3405 /* arg, shift 16 NOT supported */
3407 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3410 panic("Unsupported constant size");
3413 am.ls_mode = new_mode;
3414 am.addr.base = get_symconst_base();
3415 am.addr.index = new_node;
3416 am.addr.mem = nomem;
3418 am.addr.scale = scale;
3419 am.addr.use_frame = 0;
3420 am.addr.frame_entity = NULL;
3421 am.addr.symconst_sign = 0;
3422 am.mem_proj = am.addr.mem;
3423 am.op_type = ia32_AddrModeS;
3426 am.pinned = op_pin_state_floats;
3428 am.ins_permuted = 0;
3430 if (ia32_cg_config.use_sse2)
3431 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3433 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3434 set_am_attributes(load, &am);
3436 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3438 panic("cannot transform floating point Mux");
3441 assert(ia32_mode_needs_gp_reg(mode));
3443 if (is_Proj(cond)) {
3444 ir_node *cmp = get_Proj_pred(cond);
3446 ir_node *cmp_left = get_Cmp_left(cmp);
3447 ir_node *cmp_right = get_Cmp_right(cmp);
3448 pn_Cmp pnc = get_Proj_proj(cond);
3450 if (is_Const(mux_true) && is_Const_null(mux_true)) {
3451 ir_node *tmp = mux_false;
3452 mux_false = mux_true;
3454 pnc = get_negated_pnc(pnc, mode);
3456 if (is_Const_0(mux_false) && is_Sub(mux_true)) {
3457 if ((pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge)
3458 && get_Sub_left(mux_true) == cmp_left
3459 && get_Sub_right(mux_true) == cmp_right) {
3460 return create_doz(node, cmp_left, cmp_right);
3462 if ((pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le)
3463 && get_Sub_left(mux_true) == cmp_right
3464 && get_Sub_right(mux_true) == cmp_left) {
3465 return create_doz(node, cmp_right, cmp_left);
3471 flags = get_flags_node(cond, &pnc);
3473 if (is_Const(mux_true) && is_Const(mux_false)) {
3474 /* both are const, good */
3475 tarval *tv_true = get_Const_tarval(mux_true);
3476 tarval *tv_false = get_Const_tarval(mux_false);
3477 setcc_transform_t res;
3480 find_const_transform(pnc, tv_true, tv_false, &res);
3482 if (res.permutate_cmp_ins) {
3483 ia32_attr_t *attr = get_ia32_attr(flags);
3484 attr->data.ins_permuted ^= 1;
3486 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3489 switch (res.steps[step].transform) {
3491 imm = ia32_immediate_from_long(res.steps[step].val);
3492 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3494 case SETCC_TR_ADDxx:
3495 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3498 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3499 set_ia32_am_scale(new_node, res.steps[step].scale);
3500 set_ia32_am_offs_int(new_node, res.steps[step].val);
3502 case SETCC_TR_LEAxx:
3503 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3504 set_ia32_am_scale(new_node, res.steps[step].scale);
3505 set_ia32_am_offs_int(new_node, res.steps[step].val);
3508 imm = ia32_immediate_from_long(res.steps[step].scale);
3509 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3512 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3515 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3518 imm = ia32_immediate_from_long(res.steps[step].val);
3519 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3522 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, new_node);
3525 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3528 panic("unknown setcc transform");
3532 new_node = create_CMov(node, cond, flags, pnc);
3540 * Create a conversion from x87 state register to general purpose.
3542 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3544 ir_node *block = be_transform_node(get_nodes_block(node));
3545 ir_node *op = get_Conv_op(node);
3546 ir_node *new_op = be_transform_node(op);
3547 ir_graph *irg = current_ir_graph;
3548 dbg_info *dbgi = get_irn_dbg_info(node);
3549 ir_mode *mode = get_irn_mode(node);
3550 ir_node *fist, *load, *mem;
3552 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3553 set_irn_pinned(fist, op_pin_state_floats);
3554 set_ia32_use_frame(fist);
3555 set_ia32_op_type(fist, ia32_AddrModeD);
3557 assert(get_mode_size_bits(mode) <= 32);
3558 /* exception we can only store signed 32 bit integers, so for unsigned
3559 we store a 64bit (signed) integer and load the lower bits */
3560 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3561 set_ia32_ls_mode(fist, mode_Ls);
3563 set_ia32_ls_mode(fist, mode_Is);
3565 SET_IA32_ORIG_NODE(fist, node);
3568 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3570 set_irn_pinned(load, op_pin_state_floats);
3571 set_ia32_use_frame(load);
3572 set_ia32_op_type(load, ia32_AddrModeS);
3573 set_ia32_ls_mode(load, mode_Is);
3574 if (get_ia32_ls_mode(fist) == mode_Ls) {
3575 ia32_attr_t *attr = get_ia32_attr(load);
3576 attr->data.need_64bit_stackent = 1;
3578 ia32_attr_t *attr = get_ia32_attr(load);
3579 attr->data.need_32bit_stackent = 1;
3581 SET_IA32_ORIG_NODE(load, node);
3583 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3587 * Creates a x87 strict Conv by placing a Store and a Load
3589 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3591 ir_node *block = get_nodes_block(node);
3592 ir_graph *irg = get_Block_irg(block);
3593 dbg_info *dbgi = get_irn_dbg_info(node);
3594 ir_node *frame = get_irg_frame(irg);
3595 ir_node *store, *load;
3598 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3599 set_ia32_use_frame(store);
3600 set_ia32_op_type(store, ia32_AddrModeD);
3601 SET_IA32_ORIG_NODE(store, node);
3603 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3604 set_ia32_use_frame(load);
3605 set_ia32_op_type(load, ia32_AddrModeS);
3606 SET_IA32_ORIG_NODE(load, node);
3608 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3612 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3613 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3615 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3617 func = get_mode_size_bits(mode) == 8 ?
3618 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3619 return func(dbgi, block, base, index, mem, val, mode);
3623 * Create a conversion from general purpose to x87 register
3625 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3627 ir_node *src_block = get_nodes_block(node);
3628 ir_node *block = be_transform_node(src_block);
3629 ir_graph *irg = get_Block_irg(block);
3630 dbg_info *dbgi = get_irn_dbg_info(node);
3631 ir_node *op = get_Conv_op(node);
3632 ir_node *new_op = NULL;
3634 ir_mode *store_mode;
3639 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3640 if (possible_int_mode_for_fp(src_mode)) {
3641 ia32_address_mode_t am;
3643 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3644 if (am.op_type == ia32_AddrModeS) {
3645 ia32_address_t *addr = &am.addr;
3647 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3648 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3650 set_am_attributes(fild, &am);
3651 SET_IA32_ORIG_NODE(fild, node);
3653 fix_mem_proj(fild, &am);
3658 if (new_op == NULL) {
3659 new_op = be_transform_node(op);
3662 mode = get_irn_mode(op);
3664 /* first convert to 32 bit signed if necessary */
3665 if (get_mode_size_bits(src_mode) < 32) {
3666 if (!upper_bits_clean(new_op, src_mode)) {
3667 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3668 SET_IA32_ORIG_NODE(new_op, node);
3673 assert(get_mode_size_bits(mode) == 32);
3676 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3678 set_ia32_use_frame(store);
3679 set_ia32_op_type(store, ia32_AddrModeD);
3680 set_ia32_ls_mode(store, mode_Iu);
3682 /* exception for 32bit unsigned, do a 64bit spill+load */
3683 if (!mode_is_signed(mode)) {
3686 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3688 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3689 noreg_GP, nomem, zero_const);
3691 set_ia32_use_frame(zero_store);
3692 set_ia32_op_type(zero_store, ia32_AddrModeD);
3693 add_ia32_am_offs_int(zero_store, 4);
3694 set_ia32_ls_mode(zero_store, mode_Iu);
3699 store = new_rd_Sync(dbgi, block, 2, in);
3700 store_mode = mode_Ls;
3702 store_mode = mode_Is;
3706 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3708 set_ia32_use_frame(fild);
3709 set_ia32_op_type(fild, ia32_AddrModeS);
3710 set_ia32_ls_mode(fild, store_mode);
3712 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3718 * Create a conversion from one integer mode into another one
3720 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3721 dbg_info *dbgi, ir_node *block, ir_node *op,
3724 ir_node *new_block = be_transform_node(block);
3726 ir_mode *smaller_mode;
3727 ia32_address_mode_t am;
3728 ia32_address_t *addr = &am.addr;
3731 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3732 smaller_mode = src_mode;
3734 smaller_mode = tgt_mode;
3737 #ifdef DEBUG_libfirm
3739 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3744 match_arguments(&am, block, NULL, op, NULL,
3745 match_am | match_8bit_am | match_16bit_am);
3747 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3748 /* unnecessary conv. in theory it shouldn't have been AM */
3749 assert(is_ia32_NoReg_GP(addr->base));
3750 assert(is_ia32_NoReg_GP(addr->index));
3751 assert(is_NoMem(addr->mem));
3752 assert(am.addr.offset == 0);
3753 assert(am.addr.symconst_ent == NULL);
3757 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3758 addr->mem, am.new_op2, smaller_mode);
3759 set_am_attributes(new_node, &am);
3760 /* match_arguments assume that out-mode = in-mode, this isn't true here
3762 set_ia32_ls_mode(new_node, smaller_mode);
3763 SET_IA32_ORIG_NODE(new_node, node);
3764 new_node = fix_mem_proj(new_node, &am);
3769 * Transforms a Conv node.
3771 * @return The created ia32 Conv node
3773 static ir_node *gen_Conv(ir_node *node)
3775 ir_node *block = get_nodes_block(node);
3776 ir_node *new_block = be_transform_node(block);
3777 ir_node *op = get_Conv_op(node);
3778 ir_node *new_op = NULL;
3779 dbg_info *dbgi = get_irn_dbg_info(node);
3780 ir_mode *src_mode = get_irn_mode(op);
3781 ir_mode *tgt_mode = get_irn_mode(node);
3782 int src_bits = get_mode_size_bits(src_mode);
3783 int tgt_bits = get_mode_size_bits(tgt_mode);
3784 ir_node *res = NULL;
3786 assert(!mode_is_int(src_mode) || src_bits <= 32);
3787 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3789 /* modeB -> X should already be lowered by the lower_mode_b pass */
3790 if (src_mode == mode_b) {
3791 panic("ConvB not lowered %+F", node);
3794 if (src_mode == tgt_mode) {
3795 if (get_Conv_strict(node)) {
3796 if (ia32_cg_config.use_sse2) {
3797 /* when we are in SSE mode, we can kill all strict no-op conversion */
3798 return be_transform_node(op);
3801 /* this should be optimized already, but who knows... */
3802 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3803 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3804 return be_transform_node(op);
3808 if (mode_is_float(src_mode)) {
3809 new_op = be_transform_node(op);
3810 /* we convert from float ... */
3811 if (mode_is_float(tgt_mode)) {
3813 if (ia32_cg_config.use_sse2) {
3814 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3815 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3817 set_ia32_ls_mode(res, tgt_mode);
3819 if (get_Conv_strict(node)) {
3820 /* if fp_no_float_fold is not set then we assume that we
3821 * don't have any float operations in a non
3822 * mode_float_arithmetic mode and can skip strict upconvs */
3823 if (src_bits < tgt_bits
3824 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3825 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3828 res = gen_x87_strict_conv(tgt_mode, new_op);
3829 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3833 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3838 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3839 if (ia32_cg_config.use_sse2) {
3840 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3842 set_ia32_ls_mode(res, src_mode);
3844 return gen_x87_fp_to_gp(node);
3848 /* we convert from int ... */
3849 if (mode_is_float(tgt_mode)) {
3851 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3852 if (ia32_cg_config.use_sse2) {
3853 new_op = be_transform_node(op);
3854 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3856 set_ia32_ls_mode(res, tgt_mode);
3858 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3859 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3860 res = gen_x87_gp_to_fp(node, src_mode);
3862 /* we need a strict-Conv, if the int mode has more bits than the
3864 if (float_mantissa < int_mantissa) {
3865 res = gen_x87_strict_conv(tgt_mode, res);
3866 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3870 } else if (tgt_mode == mode_b) {
3871 /* mode_b lowering already took care that we only have 0/1 values */
3872 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3873 src_mode, tgt_mode));
3874 return be_transform_node(op);
3877 if (src_bits == tgt_bits) {
3878 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3879 src_mode, tgt_mode));
3880 return be_transform_node(op);
3883 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3891 static ir_node *create_immediate_or_transform(ir_node *node,
3892 char immediate_constraint_type)
3894 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3895 if (new_node == NULL) {
3896 new_node = be_transform_node(node);
3902 * Transforms a FrameAddr into an ia32 Add.
3904 static ir_node *gen_be_FrameAddr(ir_node *node)
3906 ir_node *block = be_transform_node(get_nodes_block(node));
3907 ir_node *op = be_get_FrameAddr_frame(node);
3908 ir_node *new_op = be_transform_node(op);
3909 dbg_info *dbgi = get_irn_dbg_info(node);
3912 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3913 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3914 set_ia32_use_frame(new_node);
3916 SET_IA32_ORIG_NODE(new_node, node);
3922 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3924 static ir_node *gen_be_Return(ir_node *node)
3926 ir_graph *irg = current_ir_graph;
3927 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3928 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3929 ir_entity *ent = get_irg_entity(irg);
3930 ir_type *tp = get_entity_type(ent);
3935 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3936 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3938 int pn_ret_val, pn_ret_mem, arity, i;
3940 assert(ret_val != NULL);
3941 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3942 return be_duplicate_node(node);
3945 res_type = get_method_res_type(tp, 0);
3947 if (! is_Primitive_type(res_type)) {
3948 return be_duplicate_node(node);
3951 mode = get_type_mode(res_type);
3952 if (! mode_is_float(mode)) {
3953 return be_duplicate_node(node);
3956 assert(get_method_n_ress(tp) == 1);
3958 pn_ret_val = get_Proj_proj(ret_val);
3959 pn_ret_mem = get_Proj_proj(ret_mem);
3961 /* get the Barrier */
3962 barrier = get_Proj_pred(ret_val);
3964 /* get result input of the Barrier */
3965 ret_val = get_irn_n(barrier, pn_ret_val);
3966 new_ret_val = be_transform_node(ret_val);
3968 /* get memory input of the Barrier */
3969 ret_mem = get_irn_n(barrier, pn_ret_mem);
3970 new_ret_mem = be_transform_node(ret_mem);
3972 frame = get_irg_frame(irg);
3974 dbgi = get_irn_dbg_info(barrier);
3975 block = be_transform_node(get_nodes_block(barrier));
3977 /* store xmm0 onto stack */
3978 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3979 new_ret_mem, new_ret_val);
3980 set_ia32_ls_mode(sse_store, mode);
3981 set_ia32_op_type(sse_store, ia32_AddrModeD);
3982 set_ia32_use_frame(sse_store);
3984 /* load into x87 register */
3985 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3986 set_ia32_op_type(fld, ia32_AddrModeS);
3987 set_ia32_use_frame(fld);
3989 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3990 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3992 /* create a new barrier */
3993 arity = get_irn_arity(barrier);
3994 in = ALLOCAN(ir_node*, arity);
3995 for (i = 0; i < arity; ++i) {
3998 if (i == pn_ret_val) {
4000 } else if (i == pn_ret_mem) {
4003 ir_node *in = get_irn_n(barrier, i);
4004 new_in = be_transform_node(in);
4009 new_barrier = new_ir_node(dbgi, irg, block,
4010 get_irn_op(barrier), get_irn_mode(barrier),
4012 copy_node_attr(irg, barrier, new_barrier);
4013 be_duplicate_deps(barrier, new_barrier);
4014 be_set_transformed_node(barrier, new_barrier);
4016 /* transform normally */
4017 return be_duplicate_node(node);
4021 * Transform a be_AddSP into an ia32_SubSP.
4023 static ir_node *gen_be_AddSP(ir_node *node)
4025 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4026 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4028 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4029 match_am | match_immediate);
4033 * Transform a be_SubSP into an ia32_AddSP
4035 static ir_node *gen_be_SubSP(ir_node *node)
4037 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4038 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4040 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4041 match_am | match_immediate);
4045 * Change some phi modes
4047 static ir_node *gen_Phi(ir_node *node)
4049 const arch_register_req_t *req;
4050 ir_node *block = be_transform_node(get_nodes_block(node));
4051 ir_graph *irg = current_ir_graph;
4052 dbg_info *dbgi = get_irn_dbg_info(node);
4053 ir_mode *mode = get_irn_mode(node);
4056 if (ia32_mode_needs_gp_reg(mode)) {
4057 /* we shouldn't have any 64bit stuff around anymore */
4058 assert(get_mode_size_bits(mode) <= 32);
4059 /* all integer operations are on 32bit registers now */
4061 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4062 } else if (mode_is_float(mode)) {
4063 if (ia32_cg_config.use_sse2) {
4065 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4068 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4071 req = arch_no_register_req;
4074 /* phi nodes allow loops, so we use the old arguments for now
4075 * and fix this later */
4076 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4077 get_irn_in(node) + 1);
4078 copy_node_attr(irg, node, phi);
4079 be_duplicate_deps(node, phi);
4081 arch_set_out_register_req(phi, 0, req);
4083 be_enqueue_preds(node);
4088 static ir_node *gen_Jmp(ir_node *node)
4090 ir_node *block = get_nodes_block(node);
4091 ir_node *new_block = be_transform_node(block);
4092 dbg_info *dbgi = get_irn_dbg_info(node);
4095 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4096 SET_IA32_ORIG_NODE(new_node, node);
4104 static ir_node *gen_IJmp(ir_node *node)
4106 ir_node *block = get_nodes_block(node);
4107 ir_node *new_block = be_transform_node(block);
4108 dbg_info *dbgi = get_irn_dbg_info(node);
4109 ir_node *op = get_IJmp_target(node);
4111 ia32_address_mode_t am;
4112 ia32_address_t *addr = &am.addr;
4114 assert(get_irn_mode(op) == mode_P);
4116 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4118 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4119 addr->mem, am.new_op2);
4120 set_am_attributes(new_node, &am);
4121 SET_IA32_ORIG_NODE(new_node, node);
4123 new_node = fix_mem_proj(new_node, &am);
4129 * Transform a Bound node.
4131 static ir_node *gen_Bound(ir_node *node)
4134 ir_node *lower = get_Bound_lower(node);
4135 dbg_info *dbgi = get_irn_dbg_info(node);
4137 if (is_Const_0(lower)) {
4138 /* typical case for Java */
4139 ir_node *sub, *res, *flags, *block;
4141 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4142 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
4144 block = get_nodes_block(res);
4145 if (! is_Proj(res)) {
4147 set_irn_mode(sub, mode_T);
4148 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4150 sub = get_Proj_pred(res);
4152 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4153 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4154 SET_IA32_ORIG_NODE(new_node, node);
4156 panic("generic Bound not supported in ia32 Backend");
4162 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4164 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4165 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4167 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4168 match_immediate | match_mode_neutral);
4171 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4173 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4174 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4175 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4179 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4181 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4182 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4183 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4187 static ir_node *gen_ia32_l_Add(ir_node *node)
4189 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4190 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4191 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4192 match_commutative | match_am | match_immediate |
4193 match_mode_neutral);
4195 if (is_Proj(lowered)) {
4196 lowered = get_Proj_pred(lowered);
4198 assert(is_ia32_Add(lowered));
4199 set_irn_mode(lowered, mode_T);
4205 static ir_node *gen_ia32_l_Adc(ir_node *node)
4207 return gen_binop_flags(node, new_bd_ia32_Adc,
4208 match_commutative | match_am | match_immediate |
4209 match_mode_neutral);
4213 * Transforms a l_MulS into a "real" MulS node.
4215 * @return the created ia32 Mul node
4217 static ir_node *gen_ia32_l_Mul(ir_node *node)
4219 ir_node *left = get_binop_left(node);
4220 ir_node *right = get_binop_right(node);
4222 return gen_binop(node, left, right, new_bd_ia32_Mul,
4223 match_commutative | match_am | match_mode_neutral);
4227 * Transforms a l_IMulS into a "real" IMul1OPS node.
4229 * @return the created ia32 IMul1OP node
4231 static ir_node *gen_ia32_l_IMul(ir_node *node)
4233 ir_node *left = get_binop_left(node);
4234 ir_node *right = get_binop_right(node);
4236 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4237 match_commutative | match_am | match_mode_neutral);
4240 static ir_node *gen_ia32_l_Sub(ir_node *node)
4242 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4243 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4244 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4245 match_am | match_immediate | match_mode_neutral);
4247 if (is_Proj(lowered)) {
4248 lowered = get_Proj_pred(lowered);
4250 assert(is_ia32_Sub(lowered));
4251 set_irn_mode(lowered, mode_T);
4257 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4259 return gen_binop_flags(node, new_bd_ia32_Sbb,
4260 match_am | match_immediate | match_mode_neutral);
4264 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4265 * op1 - target to be shifted
4266 * op2 - contains bits to be shifted into target
4268 * Only op3 can be an immediate.
4270 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4271 ir_node *low, ir_node *count)
4273 ir_node *block = get_nodes_block(node);
4274 ir_node *new_block = be_transform_node(block);
4275 dbg_info *dbgi = get_irn_dbg_info(node);
4276 ir_node *new_high = be_transform_node(high);
4277 ir_node *new_low = be_transform_node(low);
4281 /* the shift amount can be any mode that is bigger than 5 bits, since all
4282 * other bits are ignored anyway */
4283 while (is_Conv(count) &&
4284 get_irn_n_edges(count) == 1 &&
4285 mode_is_int(get_irn_mode(count))) {
4286 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4287 count = get_Conv_op(count);
4289 new_count = create_immediate_or_transform(count, 0);
4291 if (is_ia32_l_ShlD(node)) {
4292 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4295 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4298 SET_IA32_ORIG_NODE(new_node, node);
4303 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4305 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4306 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4307 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4308 return gen_lowered_64bit_shifts(node, high, low, count);
4311 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4313 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4314 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4315 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4316 return gen_lowered_64bit_shifts(node, high, low, count);
4319 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4321 ir_node *src_block = get_nodes_block(node);
4322 ir_node *block = be_transform_node(src_block);
4323 ir_graph *irg = current_ir_graph;
4324 dbg_info *dbgi = get_irn_dbg_info(node);
4325 ir_node *frame = get_irg_frame(irg);
4326 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4327 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4328 ir_node *new_val_low = be_transform_node(val_low);
4329 ir_node *new_val_high = be_transform_node(val_high);
4331 ir_node *sync, *fild, *res;
4332 ir_node *store_low, *store_high;
4334 if (ia32_cg_config.use_sse2) {
4335 panic("ia32_l_LLtoFloat not implemented for SSE2");
4339 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4341 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4343 SET_IA32_ORIG_NODE(store_low, node);
4344 SET_IA32_ORIG_NODE(store_high, node);
4346 set_ia32_use_frame(store_low);
4347 set_ia32_use_frame(store_high);
4348 set_ia32_op_type(store_low, ia32_AddrModeD);
4349 set_ia32_op_type(store_high, ia32_AddrModeD);
4350 set_ia32_ls_mode(store_low, mode_Iu);
4351 set_ia32_ls_mode(store_high, mode_Is);
4352 add_ia32_am_offs_int(store_high, 4);
4356 sync = new_rd_Sync(dbgi, block, 2, in);
4359 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4361 set_ia32_use_frame(fild);
4362 set_ia32_op_type(fild, ia32_AddrModeS);
4363 set_ia32_ls_mode(fild, mode_Ls);
4365 SET_IA32_ORIG_NODE(fild, node);
4367 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4369 if (! mode_is_signed(get_irn_mode(val_high))) {
4370 ia32_address_mode_t am;
4372 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4375 am.addr.base = get_symconst_base();
4376 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4377 am.addr.mem = nomem;
4380 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4381 am.addr.use_frame = 0;
4382 am.addr.frame_entity = NULL;
4383 am.addr.symconst_sign = 0;
4384 am.ls_mode = mode_F;
4385 am.mem_proj = nomem;
4386 am.op_type = ia32_AddrModeS;
4388 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4389 am.pinned = op_pin_state_floats;
4391 am.ins_permuted = 0;
4393 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4394 am.new_op1, am.new_op2, get_fpcw());
4395 set_am_attributes(fadd, &am);
4397 set_irn_mode(fadd, mode_T);
4398 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4403 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4405 ir_node *src_block = get_nodes_block(node);
4406 ir_node *block = be_transform_node(src_block);
4407 ir_graph *irg = get_Block_irg(block);
4408 dbg_info *dbgi = get_irn_dbg_info(node);
4409 ir_node *frame = get_irg_frame(irg);
4410 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4411 ir_node *new_val = be_transform_node(val);
4412 ir_node *fist, *mem;
4414 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4415 SET_IA32_ORIG_NODE(fist, node);
4416 set_ia32_use_frame(fist);
4417 set_ia32_op_type(fist, ia32_AddrModeD);
4418 set_ia32_ls_mode(fist, mode_Ls);
4423 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4425 ir_node *block = be_transform_node(get_nodes_block(node));
4426 ir_graph *irg = get_Block_irg(block);
4427 ir_node *pred = get_Proj_pred(node);
4428 ir_node *new_pred = be_transform_node(pred);
4429 ir_node *frame = get_irg_frame(irg);
4430 dbg_info *dbgi = get_irn_dbg_info(node);
4431 long pn = get_Proj_proj(node);
4436 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4437 SET_IA32_ORIG_NODE(load, node);
4438 set_ia32_use_frame(load);
4439 set_ia32_op_type(load, ia32_AddrModeS);
4440 set_ia32_ls_mode(load, mode_Iu);
4441 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4442 * 32 bit from it with this particular load */
4443 attr = get_ia32_attr(load);
4444 attr->data.need_64bit_stackent = 1;
4446 if (pn == pn_ia32_l_FloattoLL_res_high) {
4447 add_ia32_am_offs_int(load, 4);
4449 assert(pn == pn_ia32_l_FloattoLL_res_low);
4452 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4458 * Transform the Projs of an AddSP.
4460 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4462 ir_node *pred = get_Proj_pred(node);
4463 ir_node *new_pred = be_transform_node(pred);
4464 dbg_info *dbgi = get_irn_dbg_info(node);
4465 long proj = get_Proj_proj(node);
4467 if (proj == pn_be_AddSP_sp) {
4468 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4469 pn_ia32_SubSP_stack);
4470 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4472 } else if (proj == pn_be_AddSP_res) {
4473 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4474 pn_ia32_SubSP_addr);
4475 } else if (proj == pn_be_AddSP_M) {
4476 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4479 panic("No idea how to transform proj->AddSP");
4483 * Transform the Projs of a SubSP.
4485 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4487 ir_node *pred = get_Proj_pred(node);
4488 ir_node *new_pred = be_transform_node(pred);
4489 dbg_info *dbgi = get_irn_dbg_info(node);
4490 long proj = get_Proj_proj(node);
4492 if (proj == pn_be_SubSP_sp) {
4493 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4494 pn_ia32_AddSP_stack);
4495 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4497 } else if (proj == pn_be_SubSP_M) {
4498 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4501 panic("No idea how to transform proj->SubSP");
4505 * Transform and renumber the Projs from a Load.
4507 static ir_node *gen_Proj_Load(ir_node *node)
4510 ir_node *block = be_transform_node(get_nodes_block(node));
4511 ir_node *pred = get_Proj_pred(node);
4512 dbg_info *dbgi = get_irn_dbg_info(node);
4513 long proj = get_Proj_proj(node);
4515 /* loads might be part of source address mode matches, so we don't
4516 * transform the ProjMs yet (with the exception of loads whose result is
4519 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4522 /* this is needed, because sometimes we have loops that are only
4523 reachable through the ProjM */
4524 be_enqueue_preds(node);
4525 /* do it in 2 steps, to silence firm verifier */
4526 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4527 set_Proj_proj(res, pn_ia32_mem);
4531 /* renumber the proj */
4532 new_pred = be_transform_node(pred);
4533 if (is_ia32_Load(new_pred)) {
4536 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4538 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4539 case pn_Load_X_regular:
4540 return new_rd_Jmp(dbgi, block);
4541 case pn_Load_X_except:
4542 /* This Load might raise an exception. Mark it. */
4543 set_ia32_exc_label(new_pred, 1);
4544 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4548 } else if (is_ia32_Conv_I2I(new_pred) ||
4549 is_ia32_Conv_I2I8Bit(new_pred)) {
4550 set_irn_mode(new_pred, mode_T);
4551 if (proj == pn_Load_res) {
4552 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4553 } else if (proj == pn_Load_M) {
4554 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4556 } else if (is_ia32_xLoad(new_pred)) {
4559 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4561 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4562 case pn_Load_X_regular:
4563 return new_rd_Jmp(dbgi, block);
4564 case pn_Load_X_except:
4565 /* This Load might raise an exception. Mark it. */
4566 set_ia32_exc_label(new_pred, 1);
4567 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4571 } else if (is_ia32_vfld(new_pred)) {
4574 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4576 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4577 case pn_Load_X_regular:
4578 return new_rd_Jmp(dbgi, block);
4579 case pn_Load_X_except:
4580 /* This Load might raise an exception. Mark it. */
4581 set_ia32_exc_label(new_pred, 1);
4582 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4587 /* can happen for ProJMs when source address mode happened for the
4590 /* however it should not be the result proj, as that would mean the
4591 load had multiple users and should not have been used for
4593 if (proj != pn_Load_M) {
4594 panic("internal error: transformed node not a Load");
4596 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4599 panic("No idea how to transform proj");
4603 * Transform and renumber the Projs from a DivMod like instruction.
4605 static ir_node *gen_Proj_DivMod(ir_node *node)
4607 ir_node *block = be_transform_node(get_nodes_block(node));
4608 ir_node *pred = get_Proj_pred(node);
4609 ir_node *new_pred = be_transform_node(pred);
4610 dbg_info *dbgi = get_irn_dbg_info(node);
4611 long proj = get_Proj_proj(node);
4613 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4615 switch (get_irn_opcode(pred)) {
4619 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4621 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4622 case pn_Div_X_regular:
4623 return new_rd_Jmp(dbgi, block);
4624 case pn_Div_X_except:
4625 set_ia32_exc_label(new_pred, 1);
4626 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4634 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4636 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4637 case pn_Mod_X_except:
4638 set_ia32_exc_label(new_pred, 1);
4639 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4647 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4648 case pn_DivMod_res_div:
4649 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4650 case pn_DivMod_res_mod:
4651 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4652 case pn_DivMod_X_regular:
4653 return new_rd_Jmp(dbgi, block);
4654 case pn_DivMod_X_except:
4655 set_ia32_exc_label(new_pred, 1);
4656 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4665 panic("No idea how to transform proj->DivMod");
4669 * Transform and renumber the Projs from a CopyB.
4671 static ir_node *gen_Proj_CopyB(ir_node *node)
4673 ir_node *pred = get_Proj_pred(node);
4674 ir_node *new_pred = be_transform_node(pred);
4675 dbg_info *dbgi = get_irn_dbg_info(node);
4676 long proj = get_Proj_proj(node);
4680 if (is_ia32_CopyB_i(new_pred)) {
4681 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4682 } else if (is_ia32_CopyB(new_pred)) {
4683 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4690 panic("No idea how to transform proj->CopyB");
4694 * Transform and renumber the Projs from a Quot.
4696 static ir_node *gen_Proj_Quot(ir_node *node)
4698 ir_node *pred = get_Proj_pred(node);
4699 ir_node *new_pred = be_transform_node(pred);
4700 dbg_info *dbgi = get_irn_dbg_info(node);
4701 long proj = get_Proj_proj(node);
4705 if (is_ia32_xDiv(new_pred)) {
4706 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4707 } else if (is_ia32_vfdiv(new_pred)) {
4708 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4712 if (is_ia32_xDiv(new_pred)) {
4713 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4714 } else if (is_ia32_vfdiv(new_pred)) {
4715 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4718 case pn_Quot_X_regular:
4719 case pn_Quot_X_except:
4724 panic("No idea how to transform proj->Quot");
4727 static ir_node *gen_be_Call(ir_node *node)
4729 dbg_info *const dbgi = get_irn_dbg_info(node);
4730 ir_node *const src_block = get_nodes_block(node);
4731 ir_node *const block = be_transform_node(src_block);
4732 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4733 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4734 ir_node *const sp = be_transform_node(src_sp);
4735 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4736 ia32_address_mode_t am;
4737 ia32_address_t *const addr = &am.addr;
4742 ir_node * eax = noreg_GP;
4743 ir_node * ecx = noreg_GP;
4744 ir_node * edx = noreg_GP;
4745 unsigned const pop = be_Call_get_pop(node);
4746 ir_type *const call_tp = be_Call_get_type(node);
4747 int old_no_pic_adjust;
4749 /* Run the x87 simulator if the call returns a float value */
4750 if (get_method_n_ress(call_tp) > 0) {
4751 ir_type *const res_type = get_method_res_type(call_tp, 0);
4752 ir_mode *const res_mode = get_type_mode(res_type);
4754 if (res_mode != NULL && mode_is_float(res_mode)) {
4755 env_cg->do_x87_sim = 1;
4759 /* We do not want be_Call direct calls */
4760 assert(be_Call_get_entity(node) == NULL);
4762 /* special case for PIC trampoline calls */
4763 old_no_pic_adjust = no_pic_adjust;
4764 no_pic_adjust = be_get_irg_options(env_cg->irg)->pic;
4766 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4767 match_am | match_immediate);
4769 no_pic_adjust = old_no_pic_adjust;
4771 i = get_irn_arity(node) - 1;
4772 fpcw = be_transform_node(get_irn_n(node, i--));
4773 for (; i >= be_pos_Call_first_arg; --i) {
4774 arch_register_req_t const *const req = arch_get_register_req(node, i);
4775 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4777 assert(req->type == arch_register_req_type_limited);
4778 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4780 switch (*req->limited) {
4781 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4782 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4783 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4784 default: panic("Invalid GP register for register parameter");
4788 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4789 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4790 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4791 set_am_attributes(call, &am);
4792 call = fix_mem_proj(call, &am);
4794 if (get_irn_pinned(node) == op_pin_state_pinned)
4795 set_irn_pinned(call, op_pin_state_pinned);
4797 SET_IA32_ORIG_NODE(call, node);
4799 if (ia32_cg_config.use_sse2) {
4800 /* remember this call for post-processing */
4801 ARR_APP1(ir_node *, call_list, call);
4802 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4809 * Transform Builtin trap
4811 static ir_node *gen_trap(ir_node *node)
4813 dbg_info *dbgi = get_irn_dbg_info(node);
4814 ir_node *block = be_transform_node(get_nodes_block(node));
4815 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4817 return new_bd_ia32_UD2(dbgi, block, mem);
4821 * Transform Builtin debugbreak
4823 static ir_node *gen_debugbreak(ir_node *node)
4825 dbg_info *dbgi = get_irn_dbg_info(node);
4826 ir_node *block = be_transform_node(get_nodes_block(node));
4827 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4829 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4833 * Transform Builtin return_address
4835 static ir_node *gen_return_address(ir_node *node)
4837 ir_node *param = get_Builtin_param(node, 0);
4838 ir_node *frame = get_Builtin_param(node, 1);
4839 dbg_info *dbgi = get_irn_dbg_info(node);
4840 tarval *tv = get_Const_tarval(param);
4841 unsigned long value = get_tarval_long(tv);
4843 ir_node *block = be_transform_node(get_nodes_block(node));
4844 ir_node *ptr = be_transform_node(frame);
4848 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4849 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4850 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4853 /* load the return address from this frame */
4854 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4856 set_irn_pinned(load, get_irn_pinned(node));
4857 set_ia32_op_type(load, ia32_AddrModeS);
4858 set_ia32_ls_mode(load, mode_Iu);
4860 set_ia32_am_offs_int(load, 0);
4861 set_ia32_use_frame(load);
4862 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4864 if (get_irn_pinned(node) == op_pin_state_floats) {
4865 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4866 && pn_ia32_vfld_res == pn_ia32_Load_res
4867 && pn_ia32_Load_res == pn_ia32_res);
4868 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4871 SET_IA32_ORIG_NODE(load, node);
4872 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4876 * Transform Builtin frame_address
4878 static ir_node *gen_frame_address(ir_node *node)
4880 ir_node *param = get_Builtin_param(node, 0);
4881 ir_node *frame = get_Builtin_param(node, 1);
4882 dbg_info *dbgi = get_irn_dbg_info(node);
4883 tarval *tv = get_Const_tarval(param);
4884 unsigned long value = get_tarval_long(tv);
4886 ir_node *block = be_transform_node(get_nodes_block(node));
4887 ir_node *ptr = be_transform_node(frame);
4892 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4893 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4894 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4897 /* load the frame address from this frame */
4898 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4900 set_irn_pinned(load, get_irn_pinned(node));
4901 set_ia32_op_type(load, ia32_AddrModeS);
4902 set_ia32_ls_mode(load, mode_Iu);
4904 ent = ia32_get_frame_address_entity();
4906 set_ia32_am_offs_int(load, 0);
4907 set_ia32_use_frame(load);
4908 set_ia32_frame_ent(load, ent);
4910 /* will fail anyway, but gcc does this: */
4911 set_ia32_am_offs_int(load, 0);
4914 if (get_irn_pinned(node) == op_pin_state_floats) {
4915 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4916 && pn_ia32_vfld_res == pn_ia32_Load_res
4917 && pn_ia32_Load_res == pn_ia32_res);
4918 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4921 SET_IA32_ORIG_NODE(load, node);
4922 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4926 * Transform Builtin frame_address
4928 static ir_node *gen_prefetch(ir_node *node)
4931 ir_node *ptr, *block, *mem, *base, *index;
4932 ir_node *param, *new_node;
4935 ia32_address_t addr;
4937 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4938 /* no prefetch at all, route memory */
4939 return be_transform_node(get_Builtin_mem(node));
4942 param = get_Builtin_param(node, 1);
4943 tv = get_Const_tarval(param);
4944 rw = get_tarval_long(tv);
4946 /* construct load address */
4947 memset(&addr, 0, sizeof(addr));
4948 ptr = get_Builtin_param(node, 0);
4949 ia32_create_address_mode(&addr, ptr, 0);
4956 base = be_transform_node(base);
4959 if (index == NULL) {
4962 index = be_transform_node(index);
4965 dbgi = get_irn_dbg_info(node);
4966 block = be_transform_node(get_nodes_block(node));
4967 mem = be_transform_node(get_Builtin_mem(node));
4969 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4970 /* we have 3DNow!, this was already checked above */
4971 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4972 } else if (ia32_cg_config.use_sse_prefetch) {
4973 /* note: rw == 1 is IGNORED in that case */
4974 param = get_Builtin_param(node, 2);
4975 tv = get_Const_tarval(param);
4976 locality = get_tarval_long(tv);
4978 /* SSE style prefetch */
4981 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4984 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4987 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4990 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4994 assert(ia32_cg_config.use_3dnow_prefetch);
4995 /* 3DNow! style prefetch */
4996 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4999 set_irn_pinned(new_node, get_irn_pinned(node));
5000 set_ia32_op_type(new_node, ia32_AddrModeS);
5001 set_ia32_ls_mode(new_node, mode_Bu);
5002 set_address(new_node, &addr);
5004 SET_IA32_ORIG_NODE(new_node, node);
5006 be_dep_on_frame(new_node);
5007 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5011 * Transform bsf like node
5013 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5015 ir_node *param = get_Builtin_param(node, 0);
5016 dbg_info *dbgi = get_irn_dbg_info(node);
5018 ir_node *block = get_nodes_block(node);
5019 ir_node *new_block = be_transform_node(block);
5021 ia32_address_mode_t am;
5022 ia32_address_t *addr = &am.addr;
5025 match_arguments(&am, block, NULL, param, NULL, match_am);
5027 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5028 set_am_attributes(cnt, &am);
5029 set_ia32_ls_mode(cnt, get_irn_mode(param));
5031 SET_IA32_ORIG_NODE(cnt, node);
5032 return fix_mem_proj(cnt, &am);
5036 * Transform builtin ffs.
5038 static ir_node *gen_ffs(ir_node *node)
5040 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5041 ir_node *real = skip_Proj(bsf);
5042 dbg_info *dbgi = get_irn_dbg_info(real);
5043 ir_node *block = get_nodes_block(real);
5044 ir_node *flag, *set, *conv, *neg, *or;
5047 if (get_irn_mode(real) != mode_T) {
5048 set_irn_mode(real, mode_T);
5049 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5052 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5055 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5056 SET_IA32_ORIG_NODE(set, node);
5059 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5060 SET_IA32_ORIG_NODE(conv, node);
5063 neg = new_bd_ia32_Neg(dbgi, block, conv);
5066 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5067 set_ia32_commutative(or);
5070 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5074 * Transform builtin clz.
5076 static ir_node *gen_clz(ir_node *node)
5078 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5079 ir_node *real = skip_Proj(bsr);
5080 dbg_info *dbgi = get_irn_dbg_info(real);
5081 ir_node *block = get_nodes_block(real);
5082 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5084 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5088 * Transform builtin ctz.
5090 static ir_node *gen_ctz(ir_node *node)
5092 return gen_unop_AM(node, new_bd_ia32_Bsf);
5096 * Transform builtin parity.
5098 static ir_node *gen_parity(ir_node *node)
5100 ir_node *param = get_Builtin_param(node, 0);
5101 dbg_info *dbgi = get_irn_dbg_info(node);
5103 ir_node *block = get_nodes_block(node);
5105 ir_node *new_block = be_transform_node(block);
5106 ir_node *imm, *cmp, *new_node;
5108 ia32_address_mode_t am;
5109 ia32_address_t *addr = &am.addr;
5113 match_arguments(&am, block, NULL, param, NULL, match_am);
5114 imm = ia32_create_Immediate(NULL, 0, 0);
5115 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5116 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5117 set_am_attributes(cmp, &am);
5118 set_ia32_ls_mode(cmp, mode_Iu);
5120 SET_IA32_ORIG_NODE(cmp, node);
5122 cmp = fix_mem_proj(cmp, &am);
5125 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5126 SET_IA32_ORIG_NODE(new_node, node);
5129 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5130 nomem, new_node, mode_Bu);
5131 SET_IA32_ORIG_NODE(new_node, node);
5136 * Transform builtin popcount
5138 static ir_node *gen_popcount(ir_node *node)
5140 ir_node *param = get_Builtin_param(node, 0);
5141 dbg_info *dbgi = get_irn_dbg_info(node);
5143 ir_node *block = get_nodes_block(node);
5144 ir_node *new_block = be_transform_node(block);
5147 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5149 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5150 if (ia32_cg_config.use_popcnt) {
5151 ia32_address_mode_t am;
5152 ia32_address_t *addr = &am.addr;
5155 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5157 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5158 set_am_attributes(cnt, &am);
5159 set_ia32_ls_mode(cnt, get_irn_mode(param));
5161 SET_IA32_ORIG_NODE(cnt, node);
5162 return fix_mem_proj(cnt, &am);
5165 new_param = be_transform_node(param);
5167 /* do the standard popcount algo */
5169 /* m1 = x & 0x55555555 */
5170 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5171 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5174 simm = ia32_create_Immediate(NULL, 0, 1);
5175 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5177 /* m2 = s1 & 0x55555555 */
5178 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5181 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5183 /* m4 = m3 & 0x33333333 */
5184 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5185 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5188 simm = ia32_create_Immediate(NULL, 0, 2);
5189 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5191 /* m5 = s2 & 0x33333333 */
5192 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5195 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5197 /* m7 = m6 & 0x0F0F0F0F */
5198 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5199 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5202 simm = ia32_create_Immediate(NULL, 0, 4);
5203 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5205 /* m8 = s3 & 0x0F0F0F0F */
5206 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5209 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5211 /* m10 = m9 & 0x00FF00FF */
5212 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5213 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5216 simm = ia32_create_Immediate(NULL, 0, 8);
5217 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5219 /* m11 = s4 & 0x00FF00FF */
5220 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5222 /* m12 = m10 + m11 */
5223 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5225 /* m13 = m12 & 0x0000FFFF */
5226 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5227 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5229 /* s5 = m12 >> 16 */
5230 simm = ia32_create_Immediate(NULL, 0, 16);
5231 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5233 /* res = m13 + s5 */
5234 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5238 * Transform builtin byte swap.
5240 static ir_node *gen_bswap(ir_node *node)
5242 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5243 dbg_info *dbgi = get_irn_dbg_info(node);
5245 ir_node *block = get_nodes_block(node);
5246 ir_node *new_block = be_transform_node(block);
5247 ir_mode *mode = get_irn_mode(param);
5248 unsigned size = get_mode_size_bits(mode);
5249 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5253 if (ia32_cg_config.use_i486) {
5254 /* swap available */
5255 return new_bd_ia32_Bswap(dbgi, new_block, param);
5257 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5258 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5260 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5261 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5263 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5265 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5266 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5268 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5269 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5272 /* swap16 always available */
5273 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5276 panic("Invalid bswap size (%d)", size);
5281 * Transform builtin outport.
5283 static ir_node *gen_outport(ir_node *node)
5285 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5286 ir_node *oldv = get_Builtin_param(node, 1);
5287 ir_mode *mode = get_irn_mode(oldv);
5288 ir_node *value = be_transform_node(oldv);
5289 ir_node *block = be_transform_node(get_nodes_block(node));
5290 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5291 dbg_info *dbgi = get_irn_dbg_info(node);
5293 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5294 set_ia32_ls_mode(res, mode);
5299 * Transform builtin inport.
5301 static ir_node *gen_inport(ir_node *node)
5303 ir_type *tp = get_Builtin_type(node);
5304 ir_type *rstp = get_method_res_type(tp, 0);
5305 ir_mode *mode = get_type_mode(rstp);
5306 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5307 ir_node *block = be_transform_node(get_nodes_block(node));
5308 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5309 dbg_info *dbgi = get_irn_dbg_info(node);
5311 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5312 set_ia32_ls_mode(res, mode);
5314 /* check for missing Result Proj */
5319 * Transform a builtin inner trampoline
5321 static ir_node *gen_inner_trampoline(ir_node *node)
5323 ir_node *ptr = get_Builtin_param(node, 0);
5324 ir_node *callee = get_Builtin_param(node, 1);
5325 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5326 ir_node *mem = get_Builtin_mem(node);
5327 ir_node *block = get_nodes_block(node);
5328 ir_node *new_block = be_transform_node(block);
5332 ir_node *trampoline;
5334 dbg_info *dbgi = get_irn_dbg_info(node);
5335 ia32_address_t addr;
5337 /* construct store address */
5338 memset(&addr, 0, sizeof(addr));
5339 ia32_create_address_mode(&addr, ptr, 0);
5341 if (addr.base == NULL) {
5342 addr.base = noreg_GP;
5344 addr.base = be_transform_node(addr.base);
5347 if (addr.index == NULL) {
5348 addr.index = noreg_GP;
5350 addr.index = be_transform_node(addr.index);
5352 addr.mem = be_transform_node(mem);
5354 /* mov ecx, <env> */
5355 val = ia32_create_Immediate(NULL, 0, 0xB9);
5356 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5357 addr.index, addr.mem, val);
5358 set_irn_pinned(store, get_irn_pinned(node));
5359 set_ia32_op_type(store, ia32_AddrModeD);
5360 set_ia32_ls_mode(store, mode_Bu);
5361 set_address(store, &addr);
5365 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5366 addr.index, addr.mem, env);
5367 set_irn_pinned(store, get_irn_pinned(node));
5368 set_ia32_op_type(store, ia32_AddrModeD);
5369 set_ia32_ls_mode(store, mode_Iu);
5370 set_address(store, &addr);
5374 /* jmp rel <callee> */
5375 val = ia32_create_Immediate(NULL, 0, 0xE9);
5376 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5377 addr.index, addr.mem, val);
5378 set_irn_pinned(store, get_irn_pinned(node));
5379 set_ia32_op_type(store, ia32_AddrModeD);
5380 set_ia32_ls_mode(store, mode_Bu);
5381 set_address(store, &addr);
5385 trampoline = be_transform_node(ptr);
5387 /* the callee is typically an immediate */
5388 if (is_SymConst(callee)) {
5389 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5391 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5393 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5395 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5396 addr.index, addr.mem, rel);
5397 set_irn_pinned(store, get_irn_pinned(node));
5398 set_ia32_op_type(store, ia32_AddrModeD);
5399 set_ia32_ls_mode(store, mode_Iu);
5400 set_address(store, &addr);
5405 return new_r_Tuple(new_block, 2, in);
5409 * Transform Builtin node.
5411 static ir_node *gen_Builtin(ir_node *node)
5413 ir_builtin_kind kind = get_Builtin_kind(node);
5417 return gen_trap(node);
5418 case ir_bk_debugbreak:
5419 return gen_debugbreak(node);
5420 case ir_bk_return_address:
5421 return gen_return_address(node);
5422 case ir_bk_frame_address:
5423 return gen_frame_address(node);
5424 case ir_bk_prefetch:
5425 return gen_prefetch(node);
5427 return gen_ffs(node);
5429 return gen_clz(node);
5431 return gen_ctz(node);
5433 return gen_parity(node);
5434 case ir_bk_popcount:
5435 return gen_popcount(node);
5437 return gen_bswap(node);
5439 return gen_outport(node);
5441 return gen_inport(node);
5442 case ir_bk_inner_trampoline:
5443 return gen_inner_trampoline(node);
5445 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5449 * Transform Proj(Builtin) node.
5451 static ir_node *gen_Proj_Builtin(ir_node *proj)
5453 ir_node *node = get_Proj_pred(proj);
5454 ir_node *new_node = be_transform_node(node);
5455 ir_builtin_kind kind = get_Builtin_kind(node);
5458 case ir_bk_return_address:
5459 case ir_bk_frame_address:
5464 case ir_bk_popcount:
5466 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5469 case ir_bk_debugbreak:
5470 case ir_bk_prefetch:
5472 assert(get_Proj_proj(proj) == pn_Builtin_M);
5475 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5476 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5478 assert(get_Proj_proj(proj) == pn_Builtin_M);
5479 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5481 case ir_bk_inner_trampoline:
5482 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5483 return get_Tuple_pred(new_node, 1);
5485 assert(get_Proj_proj(proj) == pn_Builtin_M);
5486 return get_Tuple_pred(new_node, 0);
5489 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5492 static ir_node *gen_be_IncSP(ir_node *node)
5494 ir_node *res = be_duplicate_node(node);
5495 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5501 * Transform the Projs from a be_Call.
5503 static ir_node *gen_Proj_be_Call(ir_node *node)
5505 ir_node *call = get_Proj_pred(node);
5506 ir_node *new_call = be_transform_node(call);
5507 dbg_info *dbgi = get_irn_dbg_info(node);
5508 long proj = get_Proj_proj(node);
5509 ir_mode *mode = get_irn_mode(node);
5512 if (proj == pn_be_Call_M_regular) {
5513 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5515 /* transform call modes */
5516 if (mode_is_data(mode)) {
5517 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5521 /* Map from be_Call to ia32_Call proj number */
5522 if (proj == pn_be_Call_sp) {
5523 proj = pn_ia32_Call_stack;
5524 } else if (proj == pn_be_Call_M_regular) {
5525 proj = pn_ia32_Call_M;
5527 arch_register_req_t const *const req = arch_get_register_req_out(node);
5528 int const n_outs = arch_irn_get_n_outs(new_call);
5531 assert(proj >= pn_be_Call_first_res);
5532 assert(req->type & arch_register_req_type_limited);
5534 for (i = 0; i < n_outs; ++i) {
5535 arch_register_req_t const *const new_req
5536 = arch_get_out_register_req(new_call, i);
5538 if (!(new_req->type & arch_register_req_type_limited) ||
5539 new_req->cls != req->cls ||
5540 *new_req->limited != *req->limited)
5549 res = new_rd_Proj(dbgi, new_call, mode, proj);
5551 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5553 case pn_ia32_Call_stack:
5554 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5557 case pn_ia32_Call_fpcw:
5558 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5566 * Transform the Projs from a Cmp.
5568 static ir_node *gen_Proj_Cmp(ir_node *node)
5570 /* this probably means not all mode_b nodes were lowered... */
5571 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5576 * Transform the Projs from a Bound.
5578 static ir_node *gen_Proj_Bound(ir_node *node)
5581 ir_node *pred = get_Proj_pred(node);
5583 switch (get_Proj_proj(node)) {
5585 return be_transform_node(get_Bound_mem(pred));
5586 case pn_Bound_X_regular:
5587 new_node = be_transform_node(pred);
5588 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5589 case pn_Bound_X_except:
5590 new_node = be_transform_node(pred);
5591 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5593 return be_transform_node(get_Bound_index(pred));
5595 panic("unsupported Proj from Bound");
5599 static ir_node *gen_Proj_ASM(ir_node *node)
5601 ir_mode *mode = get_irn_mode(node);
5602 ir_node *pred = get_Proj_pred(node);
5603 ir_node *new_pred = be_transform_node(pred);
5604 long pos = get_Proj_proj(node);
5606 if (mode == mode_M) {
5607 pos = arch_irn_get_n_outs(new_pred)-1;
5608 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5610 } else if (mode_is_float(mode)) {
5613 panic("unexpected proj mode at ASM");
5616 return new_r_Proj(new_pred, mode, pos);
5620 * Transform and potentially renumber Proj nodes.
5622 static ir_node *gen_Proj(ir_node *node)
5624 ir_node *pred = get_Proj_pred(node);
5627 switch (get_irn_opcode(pred)) {
5629 proj = get_Proj_proj(node);
5630 if (proj == pn_Store_M) {
5631 return be_transform_node(pred);
5633 panic("No idea how to transform proj->Store");
5636 return gen_Proj_Load(node);
5638 return gen_Proj_ASM(node);
5640 return gen_Proj_Builtin(node);
5644 return gen_Proj_DivMod(node);
5646 return gen_Proj_CopyB(node);
5648 return gen_Proj_Quot(node);
5650 return gen_Proj_be_SubSP(node);
5652 return gen_Proj_be_AddSP(node);
5654 return gen_Proj_be_Call(node);
5656 return gen_Proj_Cmp(node);
5658 return gen_Proj_Bound(node);
5660 proj = get_Proj_proj(node);
5662 case pn_Start_X_initial_exec: {
5663 ir_node *block = get_nodes_block(pred);
5664 ir_node *new_block = be_transform_node(block);
5665 dbg_info *dbgi = get_irn_dbg_info(node);
5666 /* we exchange the ProjX with a jump */
5667 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5672 case pn_Start_P_tls:
5673 return gen_Proj_tls(node);
5678 if (is_ia32_l_FloattoLL(pred)) {
5679 return gen_Proj_l_FloattoLL(node);
5681 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5685 ir_mode *mode = get_irn_mode(node);
5686 if (ia32_mode_needs_gp_reg(mode)) {
5687 ir_node *new_pred = be_transform_node(pred);
5688 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5689 get_Proj_proj(node));
5690 new_proj->node_nr = node->node_nr;
5695 return be_duplicate_node(node);
5699 * Enters all transform functions into the generic pointer
5701 static void register_transformers(void)
5703 /* first clear the generic function pointer for all ops */
5704 be_start_transform_setup();
5706 be_set_transform_function(op_Abs, gen_Abs);
5707 be_set_transform_function(op_Add, gen_Add);
5708 be_set_transform_function(op_And, gen_And);
5709 be_set_transform_function(op_ASM, gen_ASM);
5710 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5711 be_set_transform_function(op_be_Call, gen_be_Call);
5712 be_set_transform_function(op_be_Copy, gen_be_Copy);
5713 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5714 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5715 be_set_transform_function(op_be_Return, gen_be_Return);
5716 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5717 be_set_transform_function(op_Bound, gen_Bound);
5718 be_set_transform_function(op_Builtin, gen_Builtin);
5719 be_set_transform_function(op_Cmp, gen_Cmp);
5720 be_set_transform_function(op_Cond, gen_Cond);
5721 be_set_transform_function(op_Const, gen_Const);
5722 be_set_transform_function(op_Conv, gen_Conv);
5723 be_set_transform_function(op_CopyB, gen_CopyB);
5724 be_set_transform_function(op_Div, gen_Div);
5725 be_set_transform_function(op_DivMod, gen_DivMod);
5726 be_set_transform_function(op_Eor, gen_Eor);
5727 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5728 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5729 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5730 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5731 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5732 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5733 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5734 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5735 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5736 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5737 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5738 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5739 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5740 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5741 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5742 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5743 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5744 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5745 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5746 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5747 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5748 be_set_transform_function(op_IJmp, gen_IJmp);
5749 be_set_transform_function(op_Jmp, gen_Jmp);
5750 be_set_transform_function(op_Load, gen_Load);
5751 be_set_transform_function(op_Minus, gen_Minus);
5752 be_set_transform_function(op_Mod, gen_Mod);
5753 be_set_transform_function(op_Mul, gen_Mul);
5754 be_set_transform_function(op_Mulh, gen_Mulh);
5755 be_set_transform_function(op_Mux, gen_Mux);
5756 be_set_transform_function(op_Not, gen_Not);
5757 be_set_transform_function(op_Or, gen_Or);
5758 be_set_transform_function(op_Phi, gen_Phi);
5759 be_set_transform_function(op_Proj, gen_Proj);
5760 be_set_transform_function(op_Quot, gen_Quot);
5761 be_set_transform_function(op_Rotl, gen_Rotl);
5762 be_set_transform_function(op_Shl, gen_Shl);
5763 be_set_transform_function(op_Shr, gen_Shr);
5764 be_set_transform_function(op_Shrs, gen_Shrs);
5765 be_set_transform_function(op_Store, gen_Store);
5766 be_set_transform_function(op_Sub, gen_Sub);
5767 be_set_transform_function(op_SymConst, gen_SymConst);
5768 be_set_transform_function(op_Unknown, gen_Unknown);
5772 * Pre-transform all unknown and noreg nodes.
5774 static void ia32_pretransform_node(void)
5776 ia32_code_gen_t *cg = env_cg;
5778 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5779 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5780 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5782 nomem = get_irg_no_mem(current_ir_graph);
5783 noreg_GP = ia32_new_NoReg_gp(cg);
5789 * Post-process all calls if we are in SSE mode.
5790 * The ABI requires that the results are in st0, copy them
5791 * to a xmm register.
5793 static void postprocess_fp_call_results(void)
5797 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5798 ir_node *call = call_list[i];
5799 ir_type *mtp = call_types[i];
5802 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5803 ir_type *res_tp = get_method_res_type(mtp, j);
5804 ir_node *res, *new_res;
5805 const ir_edge_t *edge, *next;
5808 if (! is_atomic_type(res_tp)) {
5809 /* no floating point return */
5812 mode = get_type_mode(res_tp);
5813 if (! mode_is_float(mode)) {
5814 /* no floating point return */
5818 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5821 /* now patch the users */
5822 foreach_out_edge_safe(res, edge, next) {
5823 ir_node *succ = get_edge_src_irn(edge);
5826 if (be_is_Keep(succ))
5829 if (is_ia32_xStore(succ)) {
5830 /* an xStore can be patched into an vfst */
5831 dbg_info *db = get_irn_dbg_info(succ);
5832 ir_node *block = get_nodes_block(succ);
5833 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5834 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5835 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5836 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5837 ir_mode *mode = get_ia32_ls_mode(succ);
5839 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5840 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5841 if (is_ia32_use_frame(succ))
5842 set_ia32_use_frame(st);
5843 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5844 set_irn_pinned(st, get_irn_pinned(succ));
5845 set_ia32_op_type(st, ia32_AddrModeD);
5849 if (new_res == NULL) {
5850 dbg_info *db = get_irn_dbg_info(call);
5851 ir_node *block = get_nodes_block(call);
5852 ir_node *frame = get_irg_frame(current_ir_graph);
5853 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5854 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5855 ir_node *vfst, *xld, *new_mem;
5857 /* store st(0) on stack */
5858 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5859 set_ia32_op_type(vfst, ia32_AddrModeD);
5860 set_ia32_use_frame(vfst);
5862 /* load into SSE register */
5863 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5864 set_ia32_op_type(xld, ia32_AddrModeS);
5865 set_ia32_use_frame(xld);
5867 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5868 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5870 if (old_mem != NULL) {
5871 edges_reroute(old_mem, new_mem, current_ir_graph);
5875 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5882 /* do the transformation */
5883 void ia32_transform_graph(ia32_code_gen_t *cg)
5887 register_transformers();
5889 initial_fpcw = NULL;
5892 be_timer_push(T_HEIGHTS);
5893 heights = heights_new(cg->irg);
5894 be_timer_pop(T_HEIGHTS);
5895 ia32_calculate_non_address_mode_nodes(cg->irg);
5897 /* the transform phase is not safe for CSE (yet) because several nodes get
5898 * attributes set after their creation */
5899 cse_last = get_opt_cse();
5902 call_list = NEW_ARR_F(ir_node *, 0);
5903 call_types = NEW_ARR_F(ir_type *, 0);
5904 be_transform_graph(cg->irg, ia32_pretransform_node);
5906 if (ia32_cg_config.use_sse2)
5907 postprocess_fp_call_results();
5908 DEL_ARR_F(call_types);
5909 DEL_ARR_F(call_list);
5911 set_opt_cse(cse_last);
5913 ia32_free_non_address_mode_nodes();
5914 heights_free(heights);
5918 void ia32_init_transform(void)
5920 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");