2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_map_regs.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_util.h"
67 #include "ia32_address_mode.h"
68 #include "ia32_architecture.h"
70 #include "gen_ia32_regalloc_if.h"
72 /* define this to construct SSE constants instead of load them */
73 #undef CONSTRUCT_SSE_CONST
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
81 #define ULL_BIAS "18446744073709551616"
83 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
84 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
85 #define ENT_SFP_ABS "C_ia32_sfp_abs"
86 #define ENT_DFP_ABS "C_ia32_dfp_abs"
87 #define ENT_ULL_BIAS "C_ia32_ull_bias"
89 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
90 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
92 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
94 static ir_node *initial_fpcw = NULL;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 if (be_get_irg_options(env_cg->irg)->pic) {
204 return arch_code_generator_get_pic_base(env_cg);
211 * Transforms a Const.
213 static ir_node *gen_Const(ir_node *node)
215 ir_node *old_block = get_nodes_block(node);
216 ir_node *block = be_transform_node(old_block);
217 dbg_info *dbgi = get_irn_dbg_info(node);
218 ir_mode *mode = get_irn_mode(node);
220 assert(is_Const(node));
222 if (mode_is_float(mode)) {
228 if (ia32_cg_config.use_sse2) {
229 tarval *tv = get_Const_tarval(node);
230 if (tarval_is_null(tv)) {
231 load = new_bd_ia32_xZero(dbgi, block);
232 set_ia32_ls_mode(load, mode);
234 #ifdef CONSTRUCT_SSE_CONST
235 } else if (tarval_is_one(tv)) {
236 int cnst = mode == mode_F ? 26 : 55;
237 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
238 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
239 ir_node *pslld, *psrld;
241 load = new_bd_ia32_xAllOnes(dbgi, block);
242 set_ia32_ls_mode(load, mode);
243 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
244 set_ia32_ls_mode(pslld, mode);
245 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
246 set_ia32_ls_mode(psrld, mode);
248 #endif /* CONSTRUCT_SSE_CONST */
249 } else if (mode == mode_F) {
250 /* we can place any 32bit constant by using a movd gp, sse */
251 unsigned val = get_tarval_sub_bits(tv, 0) |
252 (get_tarval_sub_bits(tv, 1) << 8) |
253 (get_tarval_sub_bits(tv, 2) << 16) |
254 (get_tarval_sub_bits(tv, 3) << 24);
255 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
256 load = new_bd_ia32_xMovd(dbgi, block, cnst);
257 set_ia32_ls_mode(load, mode);
260 #ifdef CONSTRUCT_SSE_CONST
261 if (mode == mode_D) {
262 unsigned val = get_tarval_sub_bits(tv, 0) |
263 (get_tarval_sub_bits(tv, 1) << 8) |
264 (get_tarval_sub_bits(tv, 2) << 16) |
265 (get_tarval_sub_bits(tv, 3) << 24);
267 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
268 ir_node *cnst, *psllq;
270 /* fine, lower 32bit are zero, produce 32bit value */
271 val = get_tarval_sub_bits(tv, 4) |
272 (get_tarval_sub_bits(tv, 5) << 8) |
273 (get_tarval_sub_bits(tv, 6) << 16) |
274 (get_tarval_sub_bits(tv, 7) << 24);
275 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
276 load = new_bd_ia32_xMovd(dbgi, block, cnst);
277 set_ia32_ls_mode(load, mode);
278 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
279 set_ia32_ls_mode(psllq, mode);
284 #endif /* CONSTRUCT_SSE_CONST */
285 floatent = create_float_const_entity(node);
287 base = get_symconst_base();
288 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
290 set_ia32_op_type(load, ia32_AddrModeS);
291 set_ia32_am_sc(load, floatent);
292 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
293 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
296 if (is_Const_null(node)) {
297 load = new_bd_ia32_vfldz(dbgi, block);
299 set_ia32_ls_mode(load, mode);
300 } else if (is_Const_one(node)) {
301 load = new_bd_ia32_vfld1(dbgi, block);
303 set_ia32_ls_mode(load, mode);
308 floatent = create_float_const_entity(node);
309 /* create_float_const_ent is smart and sometimes creates
311 ls_mode = get_type_mode(get_entity_type(floatent));
312 base = get_symconst_base();
313 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
315 set_ia32_op_type(load, ia32_AddrModeS);
316 set_ia32_am_sc(load, floatent);
317 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
318 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
321 #ifdef CONSTRUCT_SSE_CONST
323 #endif /* CONSTRUCT_SSE_CONST */
324 SET_IA32_ORIG_NODE(load, node);
326 be_dep_on_frame(load);
328 } else { /* non-float mode */
330 tarval *tv = get_Const_tarval(node);
333 tv = tarval_convert_to(tv, mode_Iu);
335 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
337 panic("couldn't convert constant tarval (%+F)", node);
339 val = get_tarval_long(tv);
341 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
342 SET_IA32_ORIG_NODE(cnst, node);
344 be_dep_on_frame(cnst);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
377 SET_IA32_ORIG_NODE(cnst, node);
379 be_dep_on_frame(cnst);
384 * Create a float type for the given mode and cache it.
386 * @param mode the mode for the float type (might be integer mode for SSE2 types)
387 * @param align alignment
389 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
395 if (mode == mode_Iu) {
396 static ir_type *int_Iu[16] = {NULL, };
398 if (int_Iu[align] == NULL) {
399 int_Iu[align] = tp = new_type_primitive(mode);
400 /* set the specified alignment */
401 set_type_alignment_bytes(tp, align);
403 return int_Iu[align];
404 } else if (mode == mode_Lu) {
405 static ir_type *int_Lu[16] = {NULL, };
407 if (int_Lu[align] == NULL) {
408 int_Lu[align] = tp = new_type_primitive(mode);
409 /* set the specified alignment */
410 set_type_alignment_bytes(tp, align);
412 return int_Lu[align];
413 } else if (mode == mode_F) {
414 static ir_type *float_F[16] = {NULL, };
416 if (float_F[align] == NULL) {
417 float_F[align] = tp = new_type_primitive(mode);
418 /* set the specified alignment */
419 set_type_alignment_bytes(tp, align);
421 return float_F[align];
422 } else if (mode == mode_D) {
423 static ir_type *float_D[16] = {NULL, };
425 if (float_D[align] == NULL) {
426 float_D[align] = tp = new_type_primitive(mode);
427 /* set the specified alignment */
428 set_type_alignment_bytes(tp, align);
430 return float_D[align];
432 static ir_type *float_E[16] = {NULL, };
434 if (float_E[align] == NULL) {
435 float_E[align] = tp = new_type_primitive(mode);
436 /* set the specified alignment */
437 set_type_alignment_bytes(tp, align);
439 return float_E[align];
444 * Create a float[2] array type for the given atomic type.
446 * @param tp the atomic type
448 static ir_type *ia32_create_float_array(ir_type *tp)
450 ir_mode *mode = get_type_mode(tp);
451 unsigned align = get_type_alignment_bytes(tp);
456 if (mode == mode_F) {
457 static ir_type *float_F[16] = {NULL, };
459 if (float_F[align] != NULL)
460 return float_F[align];
461 arr = float_F[align] = new_type_array(1, tp);
462 } else if (mode == mode_D) {
463 static ir_type *float_D[16] = {NULL, };
465 if (float_D[align] != NULL)
466 return float_D[align];
467 arr = float_D[align] = new_type_array(1, tp);
469 static ir_type *float_E[16] = {NULL, };
471 if (float_E[align] != NULL)
472 return float_E[align];
473 arr = float_E[align] = new_type_array(1, tp);
475 set_type_alignment_bytes(arr, align);
476 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
477 set_type_state(arr, layout_fixed);
481 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
482 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
484 static const struct {
485 const char *ent_name;
486 const char *cnst_str;
489 } names [ia32_known_const_max] = {
490 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
491 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
492 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
493 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
494 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
496 static ir_entity *ent_cache[ia32_known_const_max];
498 const char *ent_name, *cnst_str;
504 ent_name = names[kct].ent_name;
505 if (! ent_cache[kct]) {
506 cnst_str = names[kct].cnst_str;
508 switch (names[kct].mode) {
509 case 0: mode = mode_Iu; break;
510 case 1: mode = mode_Lu; break;
511 default: mode = mode_F; break;
513 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
514 tp = ia32_create_float_type(mode, names[kct].align);
516 if (kct == ia32_ULLBIAS)
517 tp = ia32_create_float_array(tp);
518 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
520 set_entity_ld_ident(ent, get_entity_ident(ent));
521 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
522 set_entity_visibility(ent, ir_visibility_private);
524 if (kct == ia32_ULLBIAS) {
525 ir_initializer_t *initializer = create_initializer_compound(2);
527 set_initializer_compound_value(initializer, 0,
528 create_initializer_tarval(get_mode_null(mode)));
529 set_initializer_compound_value(initializer, 1,
530 create_initializer_tarval(tv));
532 set_entity_initializer(ent, initializer);
534 set_entity_initializer(ent, create_initializer_tarval(tv));
537 /* cache the entry */
538 ent_cache[kct] = ent;
541 return ent_cache[kct];
545 * return true if the node is a Proj(Load) and could be used in source address
546 * mode for another node. Will return only true if the @p other node is not
547 * dependent on the memory of the Load (for binary operations use the other
548 * input here, for unary operations use NULL).
550 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
551 ir_node *other, ir_node *other2, match_flags_t flags)
556 /* float constants are always available */
557 if (is_Const(node)) {
558 ir_mode *mode = get_irn_mode(node);
559 if (mode_is_float(mode)) {
560 if (ia32_cg_config.use_sse2) {
561 if (is_simple_sse_Const(node))
564 if (is_simple_x87_Const(node))
567 if (get_irn_n_edges(node) > 1)
575 load = get_Proj_pred(node);
576 pn = get_Proj_proj(node);
577 if (!is_Load(load) || pn != pn_Load_res)
579 if (get_nodes_block(load) != block)
581 /* we only use address mode if we're the only user of the load */
582 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
584 /* in some edge cases with address mode we might reach the load normally
585 * and through some AM sequence, if it is already materialized then we
586 * can't create an AM node from it */
587 if (be_is_transformed(node))
590 /* don't do AM if other node inputs depend on the load (via mem-proj) */
591 if (other != NULL && prevents_AM(block, load, other))
594 if (other2 != NULL && prevents_AM(block, load, other2))
600 typedef struct ia32_address_mode_t ia32_address_mode_t;
601 struct ia32_address_mode_t {
606 ia32_op_type_t op_type;
610 unsigned commutative : 1;
611 unsigned ins_permuted : 1;
614 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
616 /* construct load address */
617 memset(addr, 0, sizeof(addr[0]));
618 ia32_create_address_mode(addr, ptr, 0);
620 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
621 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
622 addr->mem = be_transform_node(mem);
625 static void build_address(ia32_address_mode_t *am, ir_node *node,
626 ia32_create_am_flags_t flags)
628 ia32_address_t *addr = &am->addr;
634 /* floating point immediates */
635 if (is_Const(node)) {
636 ir_entity *entity = create_float_const_entity(node);
637 addr->base = get_symconst_base();
638 addr->index = noreg_GP;
640 addr->symconst_ent = entity;
642 am->ls_mode = get_type_mode(get_entity_type(entity));
643 am->pinned = op_pin_state_floats;
647 load = get_Proj_pred(node);
648 ptr = get_Load_ptr(load);
649 mem = get_Load_mem(load);
650 new_mem = be_transform_node(mem);
651 am->pinned = get_irn_pinned(load);
652 am->ls_mode = get_Load_mode(load);
653 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
656 /* construct load address */
657 ia32_create_address_mode(addr, ptr, flags);
659 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
660 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
664 static void set_address(ir_node *node, const ia32_address_t *addr)
666 set_ia32_am_scale(node, addr->scale);
667 set_ia32_am_sc(node, addr->symconst_ent);
668 set_ia32_am_offs_int(node, addr->offset);
669 if (addr->symconst_sign)
670 set_ia32_am_sc_sign(node);
672 set_ia32_use_frame(node);
673 set_ia32_frame_ent(node, addr->frame_entity);
677 * Apply attributes of a given address mode to a node.
679 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
681 set_address(node, &am->addr);
683 set_ia32_op_type(node, am->op_type);
684 set_ia32_ls_mode(node, am->ls_mode);
685 if (am->pinned == op_pin_state_pinned) {
686 /* beware: some nodes are already pinned and did not allow to change the state */
687 if (get_irn_pinned(node) != op_pin_state_pinned)
688 set_irn_pinned(node, op_pin_state_pinned);
691 set_ia32_commutative(node);
695 * Check, if a given node is a Down-Conv, ie. a integer Conv
696 * from a mode with a mode with more bits to a mode with lesser bits.
697 * Moreover, we return only true if the node has not more than 1 user.
699 * @param node the node
700 * @return non-zero if node is a Down-Conv
702 static int is_downconv(const ir_node *node)
710 /* we only want to skip the conv when we're the only user
711 * (because this test is used in the context of address-mode selection
712 * and we don't want to use address mode for multiple users) */
713 if (get_irn_n_edges(node) > 1)
716 src_mode = get_irn_mode(get_Conv_op(node));
717 dest_mode = get_irn_mode(node);
719 ia32_mode_needs_gp_reg(src_mode) &&
720 ia32_mode_needs_gp_reg(dest_mode) &&
721 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
724 /** Skip all Down-Conv's on a given node and return the resulting node. */
725 ir_node *ia32_skip_downconv(ir_node *node)
727 while (is_downconv(node))
728 node = get_Conv_op(node);
733 static bool is_sameconv(ir_node *node)
741 /* we only want to skip the conv when we're the only user
742 * (because this test is used in the context of address-mode selection
743 * and we don't want to use address mode for multiple users) */
744 if (get_irn_n_edges(node) > 1)
747 src_mode = get_irn_mode(get_Conv_op(node));
748 dest_mode = get_irn_mode(node);
750 ia32_mode_needs_gp_reg(src_mode) &&
751 ia32_mode_needs_gp_reg(dest_mode) &&
752 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
755 /** Skip all signedness convs */
756 static ir_node *ia32_skip_sameconv(ir_node *node)
758 while (is_sameconv(node))
759 node = get_Conv_op(node);
764 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
766 ir_mode *mode = get_irn_mode(node);
771 if (mode_is_signed(mode)) {
776 block = get_nodes_block(node);
777 dbgi = get_irn_dbg_info(node);
779 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
783 * matches operands of a node into ia32 addressing/operand modes. This covers
784 * usage of source address mode, immediates, operations with non 32-bit modes,
786 * The resulting data is filled into the @p am struct. block is the block
787 * of the node whose arguments are matched. op1, op2 are the first and second
788 * input that are matched (op1 may be NULL). other_op is another unrelated
789 * input that is not matched! but which is needed sometimes to check if AM
790 * for op1/op2 is legal.
791 * @p flags describes the supported modes of the operation in detail.
793 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
794 ir_node *op1, ir_node *op2, ir_node *other_op,
797 ia32_address_t *addr = &am->addr;
798 ir_mode *mode = get_irn_mode(op2);
799 int mode_bits = get_mode_size_bits(mode);
800 ir_node *new_op1, *new_op2;
802 unsigned commutative;
803 int use_am_and_immediates;
806 memset(am, 0, sizeof(am[0]));
808 commutative = (flags & match_commutative) != 0;
809 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
810 use_am = (flags & match_am) != 0;
811 use_immediate = (flags & match_immediate) != 0;
812 assert(!use_am_and_immediates || use_immediate);
815 assert(!commutative || op1 != NULL);
816 assert(use_am || !(flags & match_8bit_am));
817 assert(use_am || !(flags & match_16bit_am));
819 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
820 (mode_bits == 16 && !(flags & match_16bit_am))) {
824 /* we can simply skip downconvs for mode neutral nodes: the upper bits
825 * can be random for these operations */
826 if (flags & match_mode_neutral) {
827 op2 = ia32_skip_downconv(op2);
829 op1 = ia32_skip_downconv(op1);
832 op2 = ia32_skip_sameconv(op2);
834 op1 = ia32_skip_sameconv(op1);
838 /* match immediates. firm nodes are normalized: constants are always on the
841 if (!(flags & match_try_am) && use_immediate) {
842 new_op2 = try_create_Immediate(op2, 0);
845 if (new_op2 == NULL &&
846 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
847 build_address(am, op2, 0);
848 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
849 if (mode_is_float(mode)) {
850 new_op2 = ia32_new_NoReg_vfp(env_cg);
854 am->op_type = ia32_AddrModeS;
855 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
857 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
859 build_address(am, op1, 0);
861 if (mode_is_float(mode)) {
862 noreg = ia32_new_NoReg_vfp(env_cg);
867 if (new_op2 != NULL) {
870 new_op1 = be_transform_node(op2);
872 am->ins_permuted = 1;
874 am->op_type = ia32_AddrModeS;
877 am->op_type = ia32_Normal;
879 if (flags & match_try_am) {
885 mode = get_irn_mode(op2);
886 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
887 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
889 new_op2 = create_upconv(op2, NULL);
890 am->ls_mode = mode_Iu;
892 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
894 new_op2 = be_transform_node(op2);
895 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
898 if (addr->base == NULL)
899 addr->base = noreg_GP;
900 if (addr->index == NULL)
901 addr->index = noreg_GP;
902 if (addr->mem == NULL)
905 am->new_op1 = new_op1;
906 am->new_op2 = new_op2;
907 am->commutative = commutative;
911 * "Fixes" a node that uses address mode by turning it into mode_T
912 * and returning a pn_ia32_res Proj.
914 * @param node the node
915 * @param am its address mode
917 * @return a Proj(pn_ia32_res) if a memory address mode is used,
920 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
925 if (am->mem_proj == NULL)
928 /* we have to create a mode_T so the old MemProj can attach to us */
929 mode = get_irn_mode(node);
930 load = get_Proj_pred(am->mem_proj);
932 be_set_transformed_node(load, node);
934 if (mode != mode_T) {
935 set_irn_mode(node, mode_T);
936 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
943 * Construct a standard binary operation, set AM and immediate if required.
945 * @param node The original node for which the binop is created
946 * @param op1 The first operand
947 * @param op2 The second operand
948 * @param func The node constructor function
949 * @return The constructed ia32 node.
951 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
952 construct_binop_func *func, match_flags_t flags)
955 ir_node *block, *new_block, *new_node;
956 ia32_address_mode_t am;
957 ia32_address_t *addr = &am.addr;
959 block = get_nodes_block(node);
960 match_arguments(&am, block, op1, op2, NULL, flags);
962 dbgi = get_irn_dbg_info(node);
963 new_block = be_transform_node(block);
964 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
965 am.new_op1, am.new_op2);
966 set_am_attributes(new_node, &am);
967 /* we can't use source address mode anymore when using immediates */
968 if (!(flags & match_am_and_immediates) &&
969 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
970 set_ia32_am_support(new_node, ia32_am_none);
971 SET_IA32_ORIG_NODE(new_node, node);
973 new_node = fix_mem_proj(new_node, &am);
979 * Generic names for the inputs of an ia32 binary op.
982 n_ia32_l_binop_left, /**< ia32 left input */
983 n_ia32_l_binop_right, /**< ia32 right input */
984 n_ia32_l_binop_eflags /**< ia32 eflags input */
986 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
987 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
988 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
989 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
990 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
991 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
994 * Construct a binary operation which also consumes the eflags.
996 * @param node The node to transform
997 * @param func The node constructor function
998 * @param flags The match flags
999 * @return The constructor ia32 node
1001 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1002 match_flags_t flags)
1004 ir_node *src_block = get_nodes_block(node);
1005 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1006 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1007 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1009 ir_node *block, *new_node, *new_eflags;
1010 ia32_address_mode_t am;
1011 ia32_address_t *addr = &am.addr;
1013 match_arguments(&am, src_block, op1, op2, eflags, flags);
1015 dbgi = get_irn_dbg_info(node);
1016 block = be_transform_node(src_block);
1017 new_eflags = be_transform_node(eflags);
1018 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1019 am.new_op1, am.new_op2, new_eflags);
1020 set_am_attributes(new_node, &am);
1021 /* we can't use source address mode anymore when using immediates */
1022 if (!(flags & match_am_and_immediates) &&
1023 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1024 set_ia32_am_support(new_node, ia32_am_none);
1025 SET_IA32_ORIG_NODE(new_node, node);
1027 new_node = fix_mem_proj(new_node, &am);
1032 static ir_node *get_fpcw(void)
1035 if (initial_fpcw != NULL)
1036 return initial_fpcw;
1038 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(env_cg->irg),
1039 &ia32_fp_cw_regs[REG_FPCW]);
1040 initial_fpcw = be_transform_node(fpcw);
1042 return initial_fpcw;
1046 * Construct a standard binary operation, set AM and immediate if required.
1048 * @param op1 The first operand
1049 * @param op2 The second operand
1050 * @param func The node constructor function
1051 * @return The constructed ia32 node.
1053 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1054 construct_binop_float_func *func)
1056 ir_mode *mode = get_irn_mode(node);
1058 ir_node *block, *new_block, *new_node;
1059 ia32_address_mode_t am;
1060 ia32_address_t *addr = &am.addr;
1061 ia32_x87_attr_t *attr;
1062 /* All operations are considered commutative, because there are reverse
1064 match_flags_t flags = match_commutative;
1066 /* happens for div nodes... */
1068 mode = get_divop_resmod(node);
1070 /* cannot use address mode with long double on x87 */
1071 if (get_mode_size_bits(mode) <= 64)
1074 block = get_nodes_block(node);
1075 match_arguments(&am, block, op1, op2, NULL, flags);
1077 dbgi = get_irn_dbg_info(node);
1078 new_block = be_transform_node(block);
1079 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1080 am.new_op1, am.new_op2, get_fpcw());
1081 set_am_attributes(new_node, &am);
1083 attr = get_ia32_x87_attr(new_node);
1084 attr->attr.data.ins_permuted = am.ins_permuted;
1086 SET_IA32_ORIG_NODE(new_node, node);
1088 new_node = fix_mem_proj(new_node, &am);
1094 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1096 * @param op1 The first operand
1097 * @param op2 The second operand
1098 * @param func The node constructor function
1099 * @return The constructed ia32 node.
1101 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1102 construct_shift_func *func,
1103 match_flags_t flags)
1106 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1108 assert(! mode_is_float(get_irn_mode(node)));
1109 assert(flags & match_immediate);
1110 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1112 if (flags & match_mode_neutral) {
1113 op1 = ia32_skip_downconv(op1);
1114 new_op1 = be_transform_node(op1);
1115 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1116 new_op1 = create_upconv(op1, node);
1118 new_op1 = be_transform_node(op1);
1121 /* the shift amount can be any mode that is bigger than 5 bits, since all
1122 * other bits are ignored anyway */
1123 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1124 ir_node *const op = get_Conv_op(op2);
1125 if (mode_is_float(get_irn_mode(op)))
1128 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1130 new_op2 = create_immediate_or_transform(op2, 0);
1132 dbgi = get_irn_dbg_info(node);
1133 block = get_nodes_block(node);
1134 new_block = be_transform_node(block);
1135 new_node = func(dbgi, new_block, new_op1, new_op2);
1136 SET_IA32_ORIG_NODE(new_node, node);
1138 /* lowered shift instruction may have a dependency operand, handle it here */
1139 if (get_irn_arity(node) == 3) {
1140 /* we have a dependency */
1141 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1142 add_irn_dep(new_node, new_dep);
1150 * Construct a standard unary operation, set AM and immediate if required.
1152 * @param op The operand
1153 * @param func The node constructor function
1154 * @return The constructed ia32 node.
1156 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1157 match_flags_t flags)
1160 ir_node *block, *new_block, *new_op, *new_node;
1162 assert(flags == 0 || flags == match_mode_neutral);
1163 if (flags & match_mode_neutral) {
1164 op = ia32_skip_downconv(op);
1167 new_op = be_transform_node(op);
1168 dbgi = get_irn_dbg_info(node);
1169 block = get_nodes_block(node);
1170 new_block = be_transform_node(block);
1171 new_node = func(dbgi, new_block, new_op);
1173 SET_IA32_ORIG_NODE(new_node, node);
1178 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1179 ia32_address_t *addr)
1181 ir_node *base, *index, *res;
1187 base = be_transform_node(base);
1190 index = addr->index;
1191 if (index == NULL) {
1194 index = be_transform_node(index);
1197 res = new_bd_ia32_Lea(dbgi, block, base, index);
1198 set_address(res, addr);
1204 * Returns non-zero if a given address mode has a symbolic or
1205 * numerical offset != 0.
1207 static int am_has_immediates(const ia32_address_t *addr)
1209 return addr->offset != 0 || addr->symconst_ent != NULL
1210 || addr->frame_entity || addr->use_frame;
1214 * Creates an ia32 Add.
1216 * @return the created ia32 Add node
1218 static ir_node *gen_Add(ir_node *node)
1220 ir_mode *mode = get_irn_mode(node);
1221 ir_node *op1 = get_Add_left(node);
1222 ir_node *op2 = get_Add_right(node);
1224 ir_node *block, *new_block, *new_node, *add_immediate_op;
1225 ia32_address_t addr;
1226 ia32_address_mode_t am;
1228 if (mode_is_float(mode)) {
1229 if (ia32_cg_config.use_sse2)
1230 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1231 match_commutative | match_am);
1233 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1236 ia32_mark_non_am(node);
1238 op2 = ia32_skip_downconv(op2);
1239 op1 = ia32_skip_downconv(op1);
1243 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1244 * 1. Add with immediate -> Lea
1245 * 2. Add with possible source address mode -> Add
1246 * 3. Otherwise -> Lea
1248 memset(&addr, 0, sizeof(addr));
1249 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1250 add_immediate_op = NULL;
1252 dbgi = get_irn_dbg_info(node);
1253 block = get_nodes_block(node);
1254 new_block = be_transform_node(block);
1257 if (addr.base == NULL && addr.index == NULL) {
1258 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1259 addr.symconst_sign, 0, addr.offset);
1260 be_dep_on_frame(new_node);
1261 SET_IA32_ORIG_NODE(new_node, node);
1264 /* add with immediate? */
1265 if (addr.index == NULL) {
1266 add_immediate_op = addr.base;
1267 } else if (addr.base == NULL && addr.scale == 0) {
1268 add_immediate_op = addr.index;
1271 if (add_immediate_op != NULL) {
1272 if (!am_has_immediates(&addr)) {
1273 #ifdef DEBUG_libfirm
1274 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1277 return be_transform_node(add_immediate_op);
1280 new_node = create_lea_from_address(dbgi, new_block, &addr);
1281 SET_IA32_ORIG_NODE(new_node, node);
1285 /* test if we can use source address mode */
1286 match_arguments(&am, block, op1, op2, NULL, match_commutative
1287 | match_mode_neutral | match_am | match_immediate | match_try_am);
1289 /* construct an Add with source address mode */
1290 if (am.op_type == ia32_AddrModeS) {
1291 ia32_address_t *am_addr = &am.addr;
1292 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1293 am_addr->index, am_addr->mem, am.new_op1,
1295 set_am_attributes(new_node, &am);
1296 SET_IA32_ORIG_NODE(new_node, node);
1298 new_node = fix_mem_proj(new_node, &am);
1303 /* otherwise construct a lea */
1304 new_node = create_lea_from_address(dbgi, new_block, &addr);
1305 SET_IA32_ORIG_NODE(new_node, node);
1310 * Creates an ia32 Mul.
1312 * @return the created ia32 Mul node
1314 static ir_node *gen_Mul(ir_node *node)
1316 ir_node *op1 = get_Mul_left(node);
1317 ir_node *op2 = get_Mul_right(node);
1318 ir_mode *mode = get_irn_mode(node);
1320 if (mode_is_float(mode)) {
1321 if (ia32_cg_config.use_sse2)
1322 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1323 match_commutative | match_am);
1325 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1327 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1328 match_commutative | match_am | match_mode_neutral |
1329 match_immediate | match_am_and_immediates);
1333 * Creates an ia32 Mulh.
1334 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1335 * this result while Mul returns the lower 32 bit.
1337 * @return the created ia32 Mulh node
1339 static ir_node *gen_Mulh(ir_node *node)
1341 dbg_info *dbgi = get_irn_dbg_info(node);
1342 ir_node *op1 = get_Mulh_left(node);
1343 ir_node *op2 = get_Mulh_right(node);
1344 ir_mode *mode = get_irn_mode(node);
1346 ir_node *proj_res_high;
1348 if (get_mode_size_bits(mode) != 32) {
1349 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1352 if (mode_is_signed(mode)) {
1353 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1354 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1356 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1357 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1359 return proj_res_high;
1363 * Creates an ia32 And.
1365 * @return The created ia32 And node
1367 static ir_node *gen_And(ir_node *node)
1369 ir_node *op1 = get_And_left(node);
1370 ir_node *op2 = get_And_right(node);
1371 assert(! mode_is_float(get_irn_mode(node)));
1373 /* is it a zero extension? */
1374 if (is_Const(op2)) {
1375 tarval *tv = get_Const_tarval(op2);
1376 long v = get_tarval_long(tv);
1378 if (v == 0xFF || v == 0xFFFF) {
1379 dbg_info *dbgi = get_irn_dbg_info(node);
1380 ir_node *block = get_nodes_block(node);
1387 assert(v == 0xFFFF);
1390 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1395 return gen_binop(node, op1, op2, new_bd_ia32_And,
1396 match_commutative | match_mode_neutral | match_am | match_immediate);
1402 * Creates an ia32 Or.
1404 * @return The created ia32 Or node
1406 static ir_node *gen_Or(ir_node *node)
1408 ir_node *op1 = get_Or_left(node);
1409 ir_node *op2 = get_Or_right(node);
1411 assert (! mode_is_float(get_irn_mode(node)));
1412 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1413 | match_mode_neutral | match_am | match_immediate);
1419 * Creates an ia32 Eor.
1421 * @return The created ia32 Eor node
1423 static ir_node *gen_Eor(ir_node *node)
1425 ir_node *op1 = get_Eor_left(node);
1426 ir_node *op2 = get_Eor_right(node);
1428 assert(! mode_is_float(get_irn_mode(node)));
1429 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1430 | match_mode_neutral | match_am | match_immediate);
1435 * Creates an ia32 Sub.
1437 * @return The created ia32 Sub node
1439 static ir_node *gen_Sub(ir_node *node)
1441 ir_node *op1 = get_Sub_left(node);
1442 ir_node *op2 = get_Sub_right(node);
1443 ir_mode *mode = get_irn_mode(node);
1445 if (mode_is_float(mode)) {
1446 if (ia32_cg_config.use_sse2)
1447 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1449 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1452 if (is_Const(op2)) {
1453 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1457 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1458 | match_am | match_immediate);
1461 static ir_node *transform_AM_mem(ir_node *const block,
1462 ir_node *const src_val,
1463 ir_node *const src_mem,
1464 ir_node *const am_mem)
1466 if (is_NoMem(am_mem)) {
1467 return be_transform_node(src_mem);
1468 } else if (is_Proj(src_val) &&
1470 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1471 /* avoid memory loop */
1473 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1474 ir_node *const ptr_pred = get_Proj_pred(src_val);
1475 int const arity = get_Sync_n_preds(src_mem);
1480 NEW_ARR_A(ir_node*, ins, arity + 1);
1482 /* NOTE: This sometimes produces dead-code because the old sync in
1483 * src_mem might not be used anymore, we should detect this case
1484 * and kill the sync... */
1485 for (i = arity - 1; i >= 0; --i) {
1486 ir_node *const pred = get_Sync_pred(src_mem, i);
1488 /* avoid memory loop */
1489 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1492 ins[n++] = be_transform_node(pred);
1497 return new_r_Sync(block, n, ins);
1501 ins[0] = be_transform_node(src_mem);
1503 return new_r_Sync(block, 2, ins);
1508 * Create a 32bit to 64bit signed extension.
1510 * @param dbgi debug info
1511 * @param block the block where node nodes should be placed
1512 * @param val the value to extend
1513 * @param orig the original node
1515 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1516 ir_node *val, const ir_node *orig)
1521 if (ia32_cg_config.use_short_sex_eax) {
1522 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1523 be_dep_on_frame(pval);
1524 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1526 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1527 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1529 SET_IA32_ORIG_NODE(res, orig);
1534 * Generates an ia32 DivMod with additional infrastructure for the
1535 * register allocator if needed.
1537 static ir_node *create_Div(ir_node *node)
1539 dbg_info *dbgi = get_irn_dbg_info(node);
1540 ir_node *block = get_nodes_block(node);
1541 ir_node *new_block = be_transform_node(block);
1548 ir_node *sign_extension;
1549 ia32_address_mode_t am;
1550 ia32_address_t *addr = &am.addr;
1552 /* the upper bits have random contents for smaller modes */
1553 switch (get_irn_opcode(node)) {
1555 op1 = get_Div_left(node);
1556 op2 = get_Div_right(node);
1557 mem = get_Div_mem(node);
1558 mode = get_Div_resmode(node);
1561 op1 = get_Mod_left(node);
1562 op2 = get_Mod_right(node);
1563 mem = get_Mod_mem(node);
1564 mode = get_Mod_resmode(node);
1567 op1 = get_DivMod_left(node);
1568 op2 = get_DivMod_right(node);
1569 mem = get_DivMod_mem(node);
1570 mode = get_DivMod_resmode(node);
1573 panic("invalid divmod node %+F", node);
1576 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1578 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1579 is the memory of the consumed address. We can have only the second op as address
1580 in Div nodes, so check only op2. */
1581 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1583 if (mode_is_signed(mode)) {
1584 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1585 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1586 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1588 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1589 be_dep_on_frame(sign_extension);
1591 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1592 addr->index, new_mem, am.new_op2,
1593 am.new_op1, sign_extension);
1596 set_irn_pinned(new_node, get_irn_pinned(node));
1598 set_am_attributes(new_node, &am);
1599 SET_IA32_ORIG_NODE(new_node, node);
1601 new_node = fix_mem_proj(new_node, &am);
1607 * Generates an ia32 Mod.
1609 static ir_node *gen_Mod(ir_node *node)
1611 return create_Div(node);
1615 * Generates an ia32 Div.
1617 static ir_node *gen_Div(ir_node *node)
1619 return create_Div(node);
1623 * Generates an ia32 DivMod.
1625 static ir_node *gen_DivMod(ir_node *node)
1627 return create_Div(node);
1633 * Creates an ia32 floating Div.
1635 * @return The created ia32 xDiv node
1637 static ir_node *gen_Quot(ir_node *node)
1639 ir_node *op1 = get_Quot_left(node);
1640 ir_node *op2 = get_Quot_right(node);
1642 if (ia32_cg_config.use_sse2) {
1643 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1645 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1651 * Creates an ia32 Shl.
1653 * @return The created ia32 Shl node
1655 static ir_node *gen_Shl(ir_node *node)
1657 ir_node *left = get_Shl_left(node);
1658 ir_node *right = get_Shl_right(node);
1660 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1661 match_mode_neutral | match_immediate);
1665 * Creates an ia32 Shr.
1667 * @return The created ia32 Shr node
1669 static ir_node *gen_Shr(ir_node *node)
1671 ir_node *left = get_Shr_left(node);
1672 ir_node *right = get_Shr_right(node);
1674 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1680 * Creates an ia32 Sar.
1682 * @return The created ia32 Shrs node
1684 static ir_node *gen_Shrs(ir_node *node)
1686 ir_node *left = get_Shrs_left(node);
1687 ir_node *right = get_Shrs_right(node);
1689 if (is_Const(right)) {
1690 tarval *tv = get_Const_tarval(right);
1691 long val = get_tarval_long(tv);
1693 /* this is a sign extension */
1694 dbg_info *dbgi = get_irn_dbg_info(node);
1695 ir_node *block = be_transform_node(get_nodes_block(node));
1696 ir_node *new_op = be_transform_node(left);
1698 return create_sex_32_64(dbgi, block, new_op, node);
1702 /* 8 or 16 bit sign extension? */
1703 if (is_Const(right) && is_Shl(left)) {
1704 ir_node *shl_left = get_Shl_left(left);
1705 ir_node *shl_right = get_Shl_right(left);
1706 if (is_Const(shl_right)) {
1707 tarval *tv1 = get_Const_tarval(right);
1708 tarval *tv2 = get_Const_tarval(shl_right);
1709 if (tv1 == tv2 && tarval_is_long(tv1)) {
1710 long val = get_tarval_long(tv1);
1711 if (val == 16 || val == 24) {
1712 dbg_info *dbgi = get_irn_dbg_info(node);
1713 ir_node *block = get_nodes_block(node);
1723 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1732 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1738 * Creates an ia32 Rol.
1740 * @param op1 The first operator
1741 * @param op2 The second operator
1742 * @return The created ia32 RotL node
1744 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1746 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1752 * Creates an ia32 Ror.
1753 * NOTE: There is no RotR with immediate because this would always be a RotL
1754 * "imm-mode_size_bits" which can be pre-calculated.
1756 * @param op1 The first operator
1757 * @param op2 The second operator
1758 * @return The created ia32 RotR node
1760 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1762 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1768 * Creates an ia32 RotR or RotL (depending on the found pattern).
1770 * @return The created ia32 RotL or RotR node
1772 static ir_node *gen_Rotl(ir_node *node)
1774 ir_node *op1 = get_Rotl_left(node);
1775 ir_node *op2 = get_Rotl_right(node);
1777 if (is_Minus(op2)) {
1778 return gen_Ror(node, op1, get_Minus_op(op2));
1781 return gen_Rol(node, op1, op2);
1787 * Transforms a Minus node.
1789 * @return The created ia32 Minus node
1791 static ir_node *gen_Minus(ir_node *node)
1793 ir_node *op = get_Minus_op(node);
1794 ir_node *block = be_transform_node(get_nodes_block(node));
1795 dbg_info *dbgi = get_irn_dbg_info(node);
1796 ir_mode *mode = get_irn_mode(node);
1801 if (mode_is_float(mode)) {
1802 ir_node *new_op = be_transform_node(op);
1803 if (ia32_cg_config.use_sse2) {
1804 /* TODO: non-optimal... if we have many xXors, then we should
1805 * rather create a load for the const and use that instead of
1806 * several AM nodes... */
1807 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1809 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1810 noreg_GP, nomem, new_op, noreg_xmm);
1812 size = get_mode_size_bits(mode);
1813 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1815 set_ia32_am_sc(new_node, ent);
1816 set_ia32_op_type(new_node, ia32_AddrModeS);
1817 set_ia32_ls_mode(new_node, mode);
1819 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1822 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1825 SET_IA32_ORIG_NODE(new_node, node);
1831 * Transforms a Not node.
1833 * @return The created ia32 Not node
1835 static ir_node *gen_Not(ir_node *node)
1837 ir_node *op = get_Not_op(node);
1839 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1840 assert (! mode_is_float(get_irn_mode(node)));
1842 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1848 * Transforms an Abs node.
1850 * @return The created ia32 Abs node
1852 static ir_node *gen_Abs(ir_node *node)
1854 ir_node *block = get_nodes_block(node);
1855 ir_node *new_block = be_transform_node(block);
1856 ir_node *op = get_Abs_op(node);
1857 dbg_info *dbgi = get_irn_dbg_info(node);
1858 ir_mode *mode = get_irn_mode(node);
1864 if (mode_is_float(mode)) {
1865 new_op = be_transform_node(op);
1867 if (ia32_cg_config.use_sse2) {
1868 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1869 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1870 noreg_GP, nomem, new_op, noreg_fp);
1872 size = get_mode_size_bits(mode);
1873 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1875 set_ia32_am_sc(new_node, ent);
1877 SET_IA32_ORIG_NODE(new_node, node);
1879 set_ia32_op_type(new_node, ia32_AddrModeS);
1880 set_ia32_ls_mode(new_node, mode);
1882 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1883 SET_IA32_ORIG_NODE(new_node, node);
1886 ir_node *xor, *sign_extension;
1888 if (get_mode_size_bits(mode) == 32) {
1889 new_op = be_transform_node(op);
1891 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1894 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1896 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1897 nomem, new_op, sign_extension);
1898 SET_IA32_ORIG_NODE(xor, node);
1900 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1901 nomem, xor, sign_extension);
1902 SET_IA32_ORIG_NODE(new_node, node);
1909 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1911 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1913 dbg_info *dbgi = get_irn_dbg_info(cmp);
1914 ir_node *block = get_nodes_block(cmp);
1915 ir_node *new_block = be_transform_node(block);
1916 ir_node *op1 = be_transform_node(x);
1917 ir_node *op2 = be_transform_node(n);
1919 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1923 * Transform a node returning a "flag" result.
1925 * @param node the node to transform
1926 * @param pnc_out the compare mode to use
1928 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1935 /* we have a Cmp as input */
1936 if (is_Proj(node)) {
1937 ir_node *pred = get_Proj_pred(node);
1939 pn_Cmp pnc = get_Proj_proj(node);
1940 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1941 ir_node *l = get_Cmp_left(pred);
1942 ir_node *r = get_Cmp_right(pred);
1944 ir_node *la = get_And_left(l);
1945 ir_node *ra = get_And_right(l);
1947 ir_node *c = get_Shl_left(la);
1948 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1949 /* (1 << n) & ra) */
1950 ir_node *n = get_Shl_right(la);
1951 flags = gen_bt(pred, ra, n);
1952 /* we must generate a Jc/Jnc jump */
1953 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1956 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1961 ir_node *c = get_Shl_left(ra);
1962 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1963 /* la & (1 << n)) */
1964 ir_node *n = get_Shl_right(ra);
1965 flags = gen_bt(pred, la, n);
1966 /* we must generate a Jc/Jnc jump */
1967 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1970 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1976 /* add ia32 compare flags */
1978 ir_node *l = get_Cmp_left(pred);
1979 ir_mode *mode = get_irn_mode(l);
1980 if (mode_is_float(mode))
1981 pnc |= ia32_pn_Cmp_float;
1982 else if (! mode_is_signed(mode))
1983 pnc |= ia32_pn_Cmp_unsigned;
1986 flags = be_transform_node(pred);
1991 /* a mode_b value, we have to compare it against 0 */
1992 dbgi = get_irn_dbg_info(node);
1993 new_block = be_transform_node(get_nodes_block(node));
1994 new_op = be_transform_node(node);
1995 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1996 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1997 *pnc_out = pn_Cmp_Lg;
2002 * Transforms a Load.
2004 * @return the created ia32 Load node
2006 static ir_node *gen_Load(ir_node *node)
2008 ir_node *old_block = get_nodes_block(node);
2009 ir_node *block = be_transform_node(old_block);
2010 ir_node *ptr = get_Load_ptr(node);
2011 ir_node *mem = get_Load_mem(node);
2012 ir_node *new_mem = be_transform_node(mem);
2015 dbg_info *dbgi = get_irn_dbg_info(node);
2016 ir_mode *mode = get_Load_mode(node);
2018 ia32_address_t addr;
2020 /* construct load address */
2021 memset(&addr, 0, sizeof(addr));
2022 ia32_create_address_mode(&addr, ptr, 0);
2029 base = be_transform_node(base);
2032 if (index == NULL) {
2035 index = be_transform_node(index);
2038 if (mode_is_float(mode)) {
2039 if (ia32_cg_config.use_sse2) {
2040 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2043 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2047 assert(mode != mode_b);
2049 /* create a conv node with address mode for smaller modes */
2050 if (get_mode_size_bits(mode) < 32) {
2051 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2052 new_mem, noreg_GP, mode);
2054 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2058 set_irn_pinned(new_node, get_irn_pinned(node));
2059 set_ia32_op_type(new_node, ia32_AddrModeS);
2060 set_ia32_ls_mode(new_node, mode);
2061 set_address(new_node, &addr);
2063 if (get_irn_pinned(node) == op_pin_state_floats) {
2064 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2065 && pn_ia32_vfld_res == pn_ia32_Load_res
2066 && pn_ia32_Load_res == pn_ia32_res);
2067 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2070 SET_IA32_ORIG_NODE(new_node, node);
2072 be_dep_on_frame(new_node);
2076 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2077 ir_node *ptr, ir_node *other)
2084 /* we only use address mode if we're the only user of the load */
2085 if (get_irn_n_edges(node) > 1)
2088 load = get_Proj_pred(node);
2091 if (get_nodes_block(load) != block)
2094 /* store should have the same pointer as the load */
2095 if (get_Load_ptr(load) != ptr)
2098 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2099 if (other != NULL &&
2100 get_nodes_block(other) == block &&
2101 heights_reachable_in_block(heights, other, load)) {
2105 if (prevents_AM(block, load, mem))
2107 /* Store should be attached to the load via mem */
2108 assert(heights_reachable_in_block(heights, mem, load));
2113 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2114 ir_node *mem, ir_node *ptr, ir_mode *mode,
2115 construct_binop_dest_func *func,
2116 construct_binop_dest_func *func8bit,
2117 match_flags_t flags)
2119 ir_node *src_block = get_nodes_block(node);
2127 ia32_address_mode_t am;
2128 ia32_address_t *addr = &am.addr;
2129 memset(&am, 0, sizeof(am));
2131 assert(flags & match_immediate); /* there is no destam node without... */
2132 commutative = (flags & match_commutative) != 0;
2134 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2135 build_address(&am, op1, ia32_create_am_double_use);
2136 new_op = create_immediate_or_transform(op2, 0);
2137 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2138 build_address(&am, op2, ia32_create_am_double_use);
2139 new_op = create_immediate_or_transform(op1, 0);
2144 if (addr->base == NULL)
2145 addr->base = noreg_GP;
2146 if (addr->index == NULL)
2147 addr->index = noreg_GP;
2148 if (addr->mem == NULL)
2151 dbgi = get_irn_dbg_info(node);
2152 block = be_transform_node(src_block);
2153 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2155 if (get_mode_size_bits(mode) == 8) {
2156 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2158 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2160 set_address(new_node, addr);
2161 set_ia32_op_type(new_node, ia32_AddrModeD);
2162 set_ia32_ls_mode(new_node, mode);
2163 SET_IA32_ORIG_NODE(new_node, node);
2165 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2166 mem_proj = be_transform_node(am.mem_proj);
2167 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2172 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2173 ir_node *ptr, ir_mode *mode,
2174 construct_unop_dest_func *func)
2176 ir_node *src_block = get_nodes_block(node);
2182 ia32_address_mode_t am;
2183 ia32_address_t *addr = &am.addr;
2185 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2188 memset(&am, 0, sizeof(am));
2189 build_address(&am, op, ia32_create_am_double_use);
2191 dbgi = get_irn_dbg_info(node);
2192 block = be_transform_node(src_block);
2193 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2194 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2195 set_address(new_node, addr);
2196 set_ia32_op_type(new_node, ia32_AddrModeD);
2197 set_ia32_ls_mode(new_node, mode);
2198 SET_IA32_ORIG_NODE(new_node, node);
2200 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2201 mem_proj = be_transform_node(am.mem_proj);
2202 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2207 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2209 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2210 return get_negated_pnc(pnc, mode);
2213 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2215 ir_mode *mode = get_irn_mode(node);
2216 ir_node *mux_true = get_Mux_true(node);
2217 ir_node *mux_false = get_Mux_false(node);
2226 ia32_address_t addr;
2228 if (get_mode_size_bits(mode) != 8)
2231 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2233 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2239 cond = get_Mux_sel(node);
2240 flags = get_flags_node(cond, &pnc);
2241 /* we can't handle the float special cases with SetM */
2242 if (pnc & ia32_pn_Cmp_float)
2245 pnc = ia32_get_negated_pnc(pnc);
2247 build_address_ptr(&addr, ptr, mem);
2249 dbgi = get_irn_dbg_info(node);
2250 block = get_nodes_block(node);
2251 new_block = be_transform_node(block);
2252 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2253 addr.index, addr.mem, flags, pnc);
2254 set_address(new_node, &addr);
2255 set_ia32_op_type(new_node, ia32_AddrModeD);
2256 set_ia32_ls_mode(new_node, mode);
2257 SET_IA32_ORIG_NODE(new_node, node);
2262 static ir_node *try_create_dest_am(ir_node *node)
2264 ir_node *val = get_Store_value(node);
2265 ir_node *mem = get_Store_mem(node);
2266 ir_node *ptr = get_Store_ptr(node);
2267 ir_mode *mode = get_irn_mode(val);
2268 unsigned bits = get_mode_size_bits(mode);
2273 /* handle only GP modes for now... */
2274 if (!ia32_mode_needs_gp_reg(mode))
2278 /* store must be the only user of the val node */
2279 if (get_irn_n_edges(val) > 1)
2281 /* skip pointless convs */
2283 ir_node *conv_op = get_Conv_op(val);
2284 ir_mode *pred_mode = get_irn_mode(conv_op);
2285 if (!ia32_mode_needs_gp_reg(pred_mode))
2287 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2295 /* value must be in the same block */
2296 if (get_nodes_block(node) != get_nodes_block(val))
2299 switch (get_irn_opcode(val)) {
2301 op1 = get_Add_left(val);
2302 op2 = get_Add_right(val);
2303 if (ia32_cg_config.use_incdec) {
2304 if (is_Const_1(op2)) {
2305 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2307 } else if (is_Const_Minus_1(op2)) {
2308 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2312 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2313 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2314 match_commutative | match_immediate);
2317 op1 = get_Sub_left(val);
2318 op2 = get_Sub_right(val);
2319 if (is_Const(op2)) {
2320 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2322 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2323 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2327 op1 = get_And_left(val);
2328 op2 = get_And_right(val);
2329 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2330 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2331 match_commutative | match_immediate);
2334 op1 = get_Or_left(val);
2335 op2 = get_Or_right(val);
2336 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2337 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2338 match_commutative | match_immediate);
2341 op1 = get_Eor_left(val);
2342 op2 = get_Eor_right(val);
2343 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2344 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2345 match_commutative | match_immediate);
2348 op1 = get_Shl_left(val);
2349 op2 = get_Shl_right(val);
2350 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2351 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2355 op1 = get_Shr_left(val);
2356 op2 = get_Shr_right(val);
2357 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2358 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2362 op1 = get_Shrs_left(val);
2363 op2 = get_Shrs_right(val);
2364 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2365 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2369 op1 = get_Rotl_left(val);
2370 op2 = get_Rotl_right(val);
2371 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2372 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2375 /* TODO: match ROR patterns... */
2377 new_node = try_create_SetMem(val, ptr, mem);
2381 op1 = get_Minus_op(val);
2382 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2385 /* should be lowered already */
2386 assert(mode != mode_b);
2387 op1 = get_Not_op(val);
2388 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2394 if (new_node != NULL) {
2395 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2396 get_irn_pinned(node) == op_pin_state_pinned) {
2397 set_irn_pinned(new_node, op_pin_state_pinned);
2404 static bool possible_int_mode_for_fp(ir_mode *mode)
2408 if (!mode_is_signed(mode))
2410 size = get_mode_size_bits(mode);
2411 if (size != 16 && size != 32)
2416 static int is_float_to_int_conv(const ir_node *node)
2418 ir_mode *mode = get_irn_mode(node);
2422 if (!possible_int_mode_for_fp(mode))
2427 conv_op = get_Conv_op(node);
2428 conv_mode = get_irn_mode(conv_op);
2430 if (!mode_is_float(conv_mode))
2437 * Transform a Store(floatConst) into a sequence of
2440 * @return the created ia32 Store node
2442 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2444 ir_mode *mode = get_irn_mode(cns);
2445 unsigned size = get_mode_size_bytes(mode);
2446 tarval *tv = get_Const_tarval(cns);
2447 ir_node *block = get_nodes_block(node);
2448 ir_node *new_block = be_transform_node(block);
2449 ir_node *ptr = get_Store_ptr(node);
2450 ir_node *mem = get_Store_mem(node);
2451 dbg_info *dbgi = get_irn_dbg_info(node);
2455 ia32_address_t addr;
2457 assert(size % 4 == 0);
2460 build_address_ptr(&addr, ptr, mem);
2464 get_tarval_sub_bits(tv, ofs) |
2465 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2466 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2467 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2468 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2470 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2471 addr.index, addr.mem, imm);
2473 set_irn_pinned(new_node, get_irn_pinned(node));
2474 set_ia32_op_type(new_node, ia32_AddrModeD);
2475 set_ia32_ls_mode(new_node, mode_Iu);
2476 set_address(new_node, &addr);
2477 SET_IA32_ORIG_NODE(new_node, node);
2480 ins[i++] = new_node;
2485 } while (size != 0);
2488 return new_rd_Sync(dbgi, new_block, i, ins);
2495 * Generate a vfist or vfisttp instruction.
2497 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2498 ir_node *mem, ir_node *val, ir_node **fist)
2502 if (ia32_cg_config.use_fisttp) {
2503 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2504 if other users exists */
2505 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2506 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2507 be_new_Keep(block, 1, &value);
2509 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2512 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2515 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2521 * Transforms a general (no special case) Store.
2523 * @return the created ia32 Store node
2525 static ir_node *gen_general_Store(ir_node *node)
2527 ir_node *val = get_Store_value(node);
2528 ir_mode *mode = get_irn_mode(val);
2529 ir_node *block = get_nodes_block(node);
2530 ir_node *new_block = be_transform_node(block);
2531 ir_node *ptr = get_Store_ptr(node);
2532 ir_node *mem = get_Store_mem(node);
2533 dbg_info *dbgi = get_irn_dbg_info(node);
2534 ir_node *new_val, *new_node, *store;
2535 ia32_address_t addr;
2537 /* check for destination address mode */
2538 new_node = try_create_dest_am(node);
2539 if (new_node != NULL)
2542 /* construct store address */
2543 memset(&addr, 0, sizeof(addr));
2544 ia32_create_address_mode(&addr, ptr, 0);
2546 if (addr.base == NULL) {
2547 addr.base = noreg_GP;
2549 addr.base = be_transform_node(addr.base);
2552 if (addr.index == NULL) {
2553 addr.index = noreg_GP;
2555 addr.index = be_transform_node(addr.index);
2557 addr.mem = be_transform_node(mem);
2559 if (mode_is_float(mode)) {
2560 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2562 while (is_Conv(val) && mode == get_irn_mode(val)) {
2563 ir_node *op = get_Conv_op(val);
2564 if (!mode_is_float(get_irn_mode(op)))
2568 new_val = be_transform_node(val);
2569 if (ia32_cg_config.use_sse2) {
2570 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2571 addr.index, addr.mem, new_val);
2573 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2574 addr.index, addr.mem, new_val, mode);
2577 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2578 val = get_Conv_op(val);
2580 /* TODO: is this optimisation still necessary at all (middleend)? */
2581 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2582 while (is_Conv(val)) {
2583 ir_node *op = get_Conv_op(val);
2584 if (!mode_is_float(get_irn_mode(op)))
2586 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2590 new_val = be_transform_node(val);
2591 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2593 new_val = create_immediate_or_transform(val, 0);
2594 assert(mode != mode_b);
2596 if (get_mode_size_bits(mode) == 8) {
2597 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2598 addr.index, addr.mem, new_val);
2600 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2601 addr.index, addr.mem, new_val);
2606 set_irn_pinned(store, get_irn_pinned(node));
2607 set_ia32_op_type(store, ia32_AddrModeD);
2608 set_ia32_ls_mode(store, mode);
2610 set_address(store, &addr);
2611 SET_IA32_ORIG_NODE(store, node);
2617 * Transforms a Store.
2619 * @return the created ia32 Store node
2621 static ir_node *gen_Store(ir_node *node)
2623 ir_node *val = get_Store_value(node);
2624 ir_mode *mode = get_irn_mode(val);
2626 if (mode_is_float(mode) && is_Const(val)) {
2627 /* We can transform every floating const store
2628 into a sequence of integer stores.
2629 If the constant is already in a register,
2630 it would be better to use it, but we don't
2631 have this information here. */
2632 return gen_float_const_Store(node, val);
2634 return gen_general_Store(node);
2638 * Transforms a Switch.
2640 * @return the created ia32 SwitchJmp node
2642 static ir_node *create_Switch(ir_node *node)
2644 dbg_info *dbgi = get_irn_dbg_info(node);
2645 ir_node *block = be_transform_node(get_nodes_block(node));
2646 ir_node *sel = get_Cond_selector(node);
2647 ir_node *new_sel = be_transform_node(sel);
2648 long switch_min = LONG_MAX;
2649 long switch_max = LONG_MIN;
2650 long default_pn = get_Cond_default_proj(node);
2652 const ir_edge_t *edge;
2654 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2656 /* determine the smallest switch case value */
2657 foreach_out_edge(node, edge) {
2658 ir_node *proj = get_edge_src_irn(edge);
2659 long pn = get_Proj_proj(proj);
2660 if (pn == default_pn)
2663 if (pn < switch_min)
2665 if (pn > switch_max)
2669 if ((unsigned long) (switch_max - switch_min) > 128000) {
2670 panic("Size of switch %+F bigger than 128000", node);
2673 if (switch_min != 0) {
2674 /* if smallest switch case is not 0 we need an additional sub */
2675 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2676 add_ia32_am_offs_int(new_sel, -switch_min);
2677 set_ia32_op_type(new_sel, ia32_AddrModeS);
2679 SET_IA32_ORIG_NODE(new_sel, node);
2682 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2683 SET_IA32_ORIG_NODE(new_node, node);
2689 * Transform a Cond node.
2691 static ir_node *gen_Cond(ir_node *node)
2693 ir_node *block = get_nodes_block(node);
2694 ir_node *new_block = be_transform_node(block);
2695 dbg_info *dbgi = get_irn_dbg_info(node);
2696 ir_node *sel = get_Cond_selector(node);
2697 ir_mode *sel_mode = get_irn_mode(sel);
2698 ir_node *flags = NULL;
2702 if (sel_mode != mode_b) {
2703 return create_Switch(node);
2706 /* we get flags from a Cmp */
2707 flags = get_flags_node(sel, &pnc);
2709 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2710 SET_IA32_ORIG_NODE(new_node, node);
2716 * Transform a be_Copy.
2718 static ir_node *gen_be_Copy(ir_node *node)
2720 ir_node *new_node = be_duplicate_node(node);
2721 ir_mode *mode = get_irn_mode(new_node);
2723 if (ia32_mode_needs_gp_reg(mode)) {
2724 set_irn_mode(new_node, mode_Iu);
2730 static ir_node *create_Fucom(ir_node *node)
2732 dbg_info *dbgi = get_irn_dbg_info(node);
2733 ir_node *block = get_nodes_block(node);
2734 ir_node *new_block = be_transform_node(block);
2735 ir_node *left = get_Cmp_left(node);
2736 ir_node *new_left = be_transform_node(left);
2737 ir_node *right = get_Cmp_right(node);
2741 if (ia32_cg_config.use_fucomi) {
2742 new_right = be_transform_node(right);
2743 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2745 set_ia32_commutative(new_node);
2746 SET_IA32_ORIG_NODE(new_node, node);
2748 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2749 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2751 new_right = be_transform_node(right);
2752 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2755 set_ia32_commutative(new_node);
2757 SET_IA32_ORIG_NODE(new_node, node);
2759 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2760 SET_IA32_ORIG_NODE(new_node, node);
2766 static ir_node *create_Ucomi(ir_node *node)
2768 dbg_info *dbgi = get_irn_dbg_info(node);
2769 ir_node *src_block = get_nodes_block(node);
2770 ir_node *new_block = be_transform_node(src_block);
2771 ir_node *left = get_Cmp_left(node);
2772 ir_node *right = get_Cmp_right(node);
2774 ia32_address_mode_t am;
2775 ia32_address_t *addr = &am.addr;
2777 match_arguments(&am, src_block, left, right, NULL,
2778 match_commutative | match_am);
2780 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2781 addr->mem, am.new_op1, am.new_op2,
2783 set_am_attributes(new_node, &am);
2785 SET_IA32_ORIG_NODE(new_node, node);
2787 new_node = fix_mem_proj(new_node, &am);
2793 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2794 * to fold an and into a test node
2796 static bool can_fold_test_and(ir_node *node)
2798 const ir_edge_t *edge;
2800 /** we can only have eq and lg projs */
2801 foreach_out_edge(node, edge) {
2802 ir_node *proj = get_edge_src_irn(edge);
2803 pn_Cmp pnc = get_Proj_proj(proj);
2804 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2812 * returns true if it is assured, that the upper bits of a node are "clean"
2813 * which means for a 16 or 8 bit value, that the upper bits in the register
2814 * are 0 for unsigned and a copy of the last significant bit for signed
2817 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2819 assert(ia32_mode_needs_gp_reg(mode));
2820 if (get_mode_size_bits(mode) >= 32)
2823 if (is_Proj(transformed_node))
2824 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2826 switch (get_ia32_irn_opcode(transformed_node)) {
2827 case iro_ia32_Conv_I2I:
2828 case iro_ia32_Conv_I2I8Bit: {
2829 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2830 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2832 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2839 if (mode_is_signed(mode)) {
2840 return false; /* TODO handle signed modes */
2842 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2843 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2844 const ia32_immediate_attr_t *attr
2845 = get_ia32_immediate_attr_const(right);
2846 if (attr->symconst == 0 &&
2847 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2851 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2855 /* TODO too conservative if shift amount is constant */
2856 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2859 if (!mode_is_signed(mode)) {
2861 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2862 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2864 /* TODO if one is known to be zero extended, then || is sufficient */
2869 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2870 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2872 case iro_ia32_Const:
2873 case iro_ia32_Immediate: {
2874 const ia32_immediate_attr_t *attr =
2875 get_ia32_immediate_attr_const(transformed_node);
2876 if (mode_is_signed(mode)) {
2877 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2878 return shifted == 0 || shifted == -1;
2880 unsigned long shifted = (unsigned long)attr->offset;
2881 shifted >>= get_mode_size_bits(mode);
2882 return shifted == 0;
2892 * Generate code for a Cmp.
2894 static ir_node *gen_Cmp(ir_node *node)
2896 dbg_info *dbgi = get_irn_dbg_info(node);
2897 ir_node *block = get_nodes_block(node);
2898 ir_node *new_block = be_transform_node(block);
2899 ir_node *left = get_Cmp_left(node);
2900 ir_node *right = get_Cmp_right(node);
2901 ir_mode *cmp_mode = get_irn_mode(left);
2903 ia32_address_mode_t am;
2904 ia32_address_t *addr = &am.addr;
2907 if (mode_is_float(cmp_mode)) {
2908 if (ia32_cg_config.use_sse2) {
2909 return create_Ucomi(node);
2911 return create_Fucom(node);
2915 assert(ia32_mode_needs_gp_reg(cmp_mode));
2917 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2918 cmp_unsigned = !mode_is_signed(cmp_mode);
2919 if (is_Const_0(right) &&
2921 get_irn_n_edges(left) == 1 &&
2922 can_fold_test_and(node)) {
2923 /* Test(and_left, and_right) */
2924 ir_node *and_left = get_And_left(left);
2925 ir_node *and_right = get_And_right(left);
2927 /* matze: code here used mode instead of cmd_mode, I think it is always
2928 * the same as cmp_mode, but I leave this here to see if this is really
2931 assert(get_irn_mode(and_left) == cmp_mode);
2933 match_arguments(&am, block, and_left, and_right, NULL,
2935 match_am | match_8bit_am | match_16bit_am |
2936 match_am_and_immediates | match_immediate);
2938 /* use 32bit compare mode if possible since the opcode is smaller */
2939 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2940 upper_bits_clean(am.new_op2, cmp_mode)) {
2941 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2944 if (get_mode_size_bits(cmp_mode) == 8) {
2945 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2946 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2949 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2950 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2953 /* Cmp(left, right) */
2954 match_arguments(&am, block, left, right, NULL,
2955 match_commutative | match_am | match_8bit_am |
2956 match_16bit_am | match_am_and_immediates |
2958 /* use 32bit compare mode if possible since the opcode is smaller */
2959 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2960 upper_bits_clean(am.new_op2, cmp_mode)) {
2961 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2964 if (get_mode_size_bits(cmp_mode) == 8) {
2965 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2966 addr->index, addr->mem, am.new_op1,
2967 am.new_op2, am.ins_permuted,
2970 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2971 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2974 set_am_attributes(new_node, &am);
2975 set_ia32_ls_mode(new_node, cmp_mode);
2977 SET_IA32_ORIG_NODE(new_node, node);
2979 new_node = fix_mem_proj(new_node, &am);
2984 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2987 dbg_info *dbgi = get_irn_dbg_info(node);
2988 ir_node *block = get_nodes_block(node);
2989 ir_node *new_block = be_transform_node(block);
2990 ir_node *val_true = get_Mux_true(node);
2991 ir_node *val_false = get_Mux_false(node);
2993 ia32_address_mode_t am;
2994 ia32_address_t *addr;
2996 assert(ia32_cg_config.use_cmov);
2997 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3001 match_arguments(&am, block, val_false, val_true, flags,
3002 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3004 if (am.ins_permuted)
3005 pnc = ia32_get_negated_pnc(pnc);
3007 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3008 addr->mem, am.new_op1, am.new_op2, new_flags,
3010 set_am_attributes(new_node, &am);
3012 SET_IA32_ORIG_NODE(new_node, node);
3014 new_node = fix_mem_proj(new_node, &am);
3020 * Creates a ia32 Setcc instruction.
3022 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3023 ir_node *flags, pn_Cmp pnc,
3026 ir_mode *mode = get_irn_mode(orig_node);
3029 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3030 SET_IA32_ORIG_NODE(new_node, orig_node);
3032 /* we might need to conv the result up */
3033 if (get_mode_size_bits(mode) > 8) {
3034 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3035 nomem, new_node, mode_Bu);
3036 SET_IA32_ORIG_NODE(new_node, orig_node);
3043 * Create instruction for an unsigned Difference or Zero.
3045 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3047 ir_mode *mode = get_irn_mode(psi);
3057 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3058 match_mode_neutral | match_am | match_immediate | match_two_users);
3060 block = get_nodes_block(new_node);
3062 if (is_Proj(new_node)) {
3063 sub = get_Proj_pred(new_node);
3064 assert(is_ia32_Sub(sub));
3067 set_irn_mode(sub, mode_T);
3068 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3070 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3072 dbgi = get_irn_dbg_info(psi);
3073 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3074 not = new_bd_ia32_Not(dbgi, block, sbb);
3076 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3077 set_ia32_commutative(new_node);
3082 * Create an const array of two float consts.
3084 * @param c0 the first constant
3085 * @param c1 the second constant
3086 * @param new_mode IN/OUT for the mode of the constants, if NULL
3087 * smallest possible mode will be used
3089 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3092 ir_mode *mode = *new_mode;
3094 ir_initializer_t *initializer;
3095 tarval *tv0 = get_Const_tarval(c0);
3096 tarval *tv1 = get_Const_tarval(c1);
3099 /* detect the best mode for the constants */
3100 mode = get_tarval_mode(tv0);
3102 if (mode != mode_F) {
3103 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3104 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3106 tv0 = tarval_convert_to(tv0, mode);
3107 tv1 = tarval_convert_to(tv1, mode);
3108 } else if (mode != mode_D) {
3109 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3110 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3112 tv0 = tarval_convert_to(tv0, mode);
3113 tv1 = tarval_convert_to(tv1, mode);
3120 tp = ia32_create_float_type(mode, 4);
3121 tp = ia32_create_float_array(tp);
3123 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3125 set_entity_ld_ident(ent, get_entity_ident(ent));
3126 set_entity_visibility(ent, ir_visibility_private);
3127 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3129 initializer = create_initializer_compound(2);
3131 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3132 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3134 set_entity_initializer(ent, initializer);
3141 * Possible transformations for creating a Setcc.
3143 enum setcc_transform_insn {
3156 typedef struct setcc_transform {
3160 enum setcc_transform_insn transform;
3164 } setcc_transform_t;
3167 * Setcc can only handle 0 and 1 result.
3168 * Find a transformation that creates 0 and 1 from
3171 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f,
3172 setcc_transform_t *res)
3178 if (tarval_is_null(t)) {
3182 pnc = ia32_get_negated_pnc(pnc);
3183 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3184 // now, t is the bigger one
3188 pnc = ia32_get_negated_pnc(pnc);
3192 if (! tarval_is_null(f)) {
3193 tarval *t_sub = tarval_sub(t, f, NULL);
3196 res->steps[step].transform = SETCC_TR_ADD;
3198 if (t == tarval_bad)
3199 panic("constant subtract failed");
3200 if (! tarval_is_long(f))
3201 panic("tarval is not long");
3203 res->steps[step].val = get_tarval_long(f);
3205 f = tarval_sub(f, f, NULL);
3206 assert(tarval_is_null(f));
3209 if (tarval_is_one(t)) {
3210 res->steps[step].transform = SETCC_TR_SET;
3211 res->num_steps = ++step;
3215 if (tarval_is_minus_one(t)) {
3216 res->steps[step].transform = SETCC_TR_NEG;
3218 res->steps[step].transform = SETCC_TR_SET;
3219 res->num_steps = ++step;
3222 if (tarval_is_long(t)) {
3223 long v = get_tarval_long(t);
3225 res->steps[step].val = 0;
3228 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3230 res->steps[step].transform = SETCC_TR_LEAxx;
3231 res->steps[step].scale = 3; /* (a << 3) + a */
3234 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3236 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3237 res->steps[step].scale = 3; /* (a << 3) */
3240 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3242 res->steps[step].transform = SETCC_TR_LEAxx;
3243 res->steps[step].scale = 2; /* (a << 2) + a */
3246 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3248 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3249 res->steps[step].scale = 2; /* (a << 2) */
3252 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3254 res->steps[step].transform = SETCC_TR_LEAxx;
3255 res->steps[step].scale = 1; /* (a << 1) + a */
3258 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3260 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3261 res->steps[step].scale = 1; /* (a << 1) */
3264 res->num_steps = step;
3267 if (! tarval_is_single_bit(t)) {
3268 res->steps[step].transform = SETCC_TR_AND;
3269 res->steps[step].val = v;
3271 res->steps[step].transform = SETCC_TR_NEG;
3273 int v = get_tarval_lowest_bit(t);
3276 res->steps[step].transform = SETCC_TR_SHL;
3277 res->steps[step].scale = v;
3281 res->steps[step].transform = SETCC_TR_SET;
3282 res->num_steps = ++step;
3285 panic("tarval is not long");
3289 * Transforms a Mux node into some code sequence.
3291 * @return The transformed node.
3293 static ir_node *gen_Mux(ir_node *node)
3295 dbg_info *dbgi = get_irn_dbg_info(node);
3296 ir_node *block = get_nodes_block(node);
3297 ir_node *new_block = be_transform_node(block);
3298 ir_node *mux_true = get_Mux_true(node);
3299 ir_node *mux_false = get_Mux_false(node);
3300 ir_node *cond = get_Mux_sel(node);
3301 ir_mode *mode = get_irn_mode(node);
3306 assert(get_irn_mode(cond) == mode_b);
3308 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3309 if (mode_is_float(mode)) {
3310 ir_node *cmp = get_Proj_pred(cond);
3311 ir_node *cmp_left = get_Cmp_left(cmp);
3312 ir_node *cmp_right = get_Cmp_right(cmp);
3313 pn_Cmp pnc = get_Proj_proj(cond);
3315 if (ia32_cg_config.use_sse2) {
3316 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3317 if (cmp_left == mux_true && cmp_right == mux_false) {
3318 /* Mux(a <= b, a, b) => MIN */
3319 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3320 match_commutative | match_am | match_two_users);
3321 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3322 /* Mux(a <= b, b, a) => MAX */
3323 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3324 match_commutative | match_am | match_two_users);
3326 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3327 if (cmp_left == mux_true && cmp_right == mux_false) {
3328 /* Mux(a >= b, a, b) => MAX */
3329 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3330 match_commutative | match_am | match_two_users);
3331 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3332 /* Mux(a >= b, b, a) => MIN */
3333 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3334 match_commutative | match_am | match_two_users);
3339 if (is_Const(mux_true) && is_Const(mux_false)) {
3340 ia32_address_mode_t am;
3345 flags = get_flags_node(cond, &pnc);
3346 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3348 if (ia32_cg_config.use_sse2) {
3349 /* cannot load from different mode on SSE */
3352 /* x87 can load any mode */
3356 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3358 switch (get_mode_size_bytes(new_mode)) {
3368 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3369 set_ia32_am_scale(new_node, 2);
3374 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3375 set_ia32_am_scale(new_node, 1);
3378 /* arg, shift 16 NOT supported */
3380 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3383 panic("Unsupported constant size");
3386 am.ls_mode = new_mode;
3387 am.addr.base = get_symconst_base();
3388 am.addr.index = new_node;
3389 am.addr.mem = nomem;
3391 am.addr.scale = scale;
3392 am.addr.use_frame = 0;
3393 am.addr.frame_entity = NULL;
3394 am.addr.symconst_sign = 0;
3395 am.mem_proj = am.addr.mem;
3396 am.op_type = ia32_AddrModeS;
3399 am.pinned = op_pin_state_floats;
3401 am.ins_permuted = 0;
3403 if (ia32_cg_config.use_sse2)
3404 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3406 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3407 set_am_attributes(load, &am);
3409 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3411 panic("cannot transform floating point Mux");
3414 assert(ia32_mode_needs_gp_reg(mode));
3416 if (is_Proj(cond)) {
3417 ir_node *cmp = get_Proj_pred(cond);
3419 ir_node *cmp_left = get_Cmp_left(cmp);
3420 ir_node *cmp_right = get_Cmp_right(cmp);
3421 ir_node *val_true = mux_true;
3422 ir_node *val_false = mux_false;
3423 pn_Cmp pnc = get_Proj_proj(cond);
3425 if (is_Const(val_true) && is_Const_null(val_true)) {
3426 ir_node *tmp = val_false;
3427 val_false = val_true;
3429 pnc = ia32_get_negated_pnc(pnc);
3431 if (is_Const_0(val_false) && is_Sub(val_true)) {
3432 if ((pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge)
3433 && get_Sub_left(val_true) == cmp_left
3434 && get_Sub_right(val_true) == cmp_right) {
3435 return create_doz(node, cmp_left, cmp_right);
3437 if ((pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le)
3438 && get_Sub_left(val_true) == cmp_right
3439 && get_Sub_right(val_true) == cmp_left) {
3440 return create_doz(node, cmp_right, cmp_left);
3446 flags = get_flags_node(cond, &pnc);
3448 if (is_Const(mux_true) && is_Const(mux_false)) {
3449 /* both are const, good */
3450 tarval *tv_true = get_Const_tarval(mux_true);
3451 tarval *tv_false = get_Const_tarval(mux_false);
3452 setcc_transform_t res;
3455 find_const_transform(pnc, tv_true, tv_false, &res);
3457 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3460 switch (res.steps[step].transform) {
3462 imm = ia32_immediate_from_long(res.steps[step].val);
3463 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3465 case SETCC_TR_ADDxx:
3466 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3469 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3470 set_ia32_am_scale(new_node, res.steps[step].scale);
3471 set_ia32_am_offs_int(new_node, res.steps[step].val);
3473 case SETCC_TR_LEAxx:
3474 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3475 set_ia32_am_scale(new_node, res.steps[step].scale);
3476 set_ia32_am_offs_int(new_node, res.steps[step].val);
3479 imm = ia32_immediate_from_long(res.steps[step].scale);
3480 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3483 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3486 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3489 imm = ia32_immediate_from_long(res.steps[step].val);
3490 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3493 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, node);
3496 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3499 panic("unknown setcc transform");
3503 new_node = create_CMov(node, cond, flags, pnc);
3511 * Create a conversion from x87 state register to general purpose.
3513 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3515 ir_node *block = be_transform_node(get_nodes_block(node));
3516 ir_node *op = get_Conv_op(node);
3517 ir_node *new_op = be_transform_node(op);
3518 ir_graph *irg = current_ir_graph;
3519 dbg_info *dbgi = get_irn_dbg_info(node);
3520 ir_mode *mode = get_irn_mode(node);
3521 ir_node *fist, *load, *mem;
3523 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3524 set_irn_pinned(fist, op_pin_state_floats);
3525 set_ia32_use_frame(fist);
3526 set_ia32_op_type(fist, ia32_AddrModeD);
3528 assert(get_mode_size_bits(mode) <= 32);
3529 /* exception we can only store signed 32 bit integers, so for unsigned
3530 we store a 64bit (signed) integer and load the lower bits */
3531 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3532 set_ia32_ls_mode(fist, mode_Ls);
3534 set_ia32_ls_mode(fist, mode_Is);
3536 SET_IA32_ORIG_NODE(fist, node);
3539 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3541 set_irn_pinned(load, op_pin_state_floats);
3542 set_ia32_use_frame(load);
3543 set_ia32_op_type(load, ia32_AddrModeS);
3544 set_ia32_ls_mode(load, mode_Is);
3545 if (get_ia32_ls_mode(fist) == mode_Ls) {
3546 ia32_attr_t *attr = get_ia32_attr(load);
3547 attr->data.need_64bit_stackent = 1;
3549 ia32_attr_t *attr = get_ia32_attr(load);
3550 attr->data.need_32bit_stackent = 1;
3552 SET_IA32_ORIG_NODE(load, node);
3554 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3558 * Creates a x87 strict Conv by placing a Store and a Load
3560 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3562 ir_node *block = get_nodes_block(node);
3563 ir_graph *irg = get_Block_irg(block);
3564 dbg_info *dbgi = get_irn_dbg_info(node);
3565 ir_node *frame = get_irg_frame(irg);
3566 ir_node *store, *load;
3569 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3570 set_ia32_use_frame(store);
3571 set_ia32_op_type(store, ia32_AddrModeD);
3572 SET_IA32_ORIG_NODE(store, node);
3574 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3575 set_ia32_use_frame(load);
3576 set_ia32_op_type(load, ia32_AddrModeS);
3577 SET_IA32_ORIG_NODE(load, node);
3579 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3583 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3584 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3586 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3588 func = get_mode_size_bits(mode) == 8 ?
3589 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3590 return func(dbgi, block, base, index, mem, val, mode);
3594 * Create a conversion from general purpose to x87 register
3596 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3598 ir_node *src_block = get_nodes_block(node);
3599 ir_node *block = be_transform_node(src_block);
3600 ir_graph *irg = get_Block_irg(block);
3601 dbg_info *dbgi = get_irn_dbg_info(node);
3602 ir_node *op = get_Conv_op(node);
3603 ir_node *new_op = NULL;
3605 ir_mode *store_mode;
3610 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3611 if (possible_int_mode_for_fp(src_mode)) {
3612 ia32_address_mode_t am;
3614 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3615 if (am.op_type == ia32_AddrModeS) {
3616 ia32_address_t *addr = &am.addr;
3618 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3619 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3621 set_am_attributes(fild, &am);
3622 SET_IA32_ORIG_NODE(fild, node);
3624 fix_mem_proj(fild, &am);
3629 if (new_op == NULL) {
3630 new_op = be_transform_node(op);
3633 mode = get_irn_mode(op);
3635 /* first convert to 32 bit signed if necessary */
3636 if (get_mode_size_bits(src_mode) < 32) {
3637 if (!upper_bits_clean(new_op, src_mode)) {
3638 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3639 SET_IA32_ORIG_NODE(new_op, node);
3644 assert(get_mode_size_bits(mode) == 32);
3647 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3649 set_ia32_use_frame(store);
3650 set_ia32_op_type(store, ia32_AddrModeD);
3651 set_ia32_ls_mode(store, mode_Iu);
3653 /* exception for 32bit unsigned, do a 64bit spill+load */
3654 if (!mode_is_signed(mode)) {
3657 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3659 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3660 noreg_GP, nomem, zero_const);
3662 set_ia32_use_frame(zero_store);
3663 set_ia32_op_type(zero_store, ia32_AddrModeD);
3664 add_ia32_am_offs_int(zero_store, 4);
3665 set_ia32_ls_mode(zero_store, mode_Iu);
3670 store = new_rd_Sync(dbgi, block, 2, in);
3671 store_mode = mode_Ls;
3673 store_mode = mode_Is;
3677 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3679 set_ia32_use_frame(fild);
3680 set_ia32_op_type(fild, ia32_AddrModeS);
3681 set_ia32_ls_mode(fild, store_mode);
3683 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3689 * Create a conversion from one integer mode into another one
3691 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3692 dbg_info *dbgi, ir_node *block, ir_node *op,
3695 ir_node *new_block = be_transform_node(block);
3697 ir_mode *smaller_mode;
3698 ia32_address_mode_t am;
3699 ia32_address_t *addr = &am.addr;
3702 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3703 smaller_mode = src_mode;
3705 smaller_mode = tgt_mode;
3708 #ifdef DEBUG_libfirm
3710 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3715 match_arguments(&am, block, NULL, op, NULL,
3716 match_am | match_8bit_am | match_16bit_am);
3718 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3719 /* unnecessary conv. in theory it shouldn't have been AM */
3720 assert(is_ia32_NoReg_GP(addr->base));
3721 assert(is_ia32_NoReg_GP(addr->index));
3722 assert(is_NoMem(addr->mem));
3723 assert(am.addr.offset == 0);
3724 assert(am.addr.symconst_ent == NULL);
3728 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3729 addr->mem, am.new_op2, smaller_mode);
3730 set_am_attributes(new_node, &am);
3731 /* match_arguments assume that out-mode = in-mode, this isn't true here
3733 set_ia32_ls_mode(new_node, smaller_mode);
3734 SET_IA32_ORIG_NODE(new_node, node);
3735 new_node = fix_mem_proj(new_node, &am);
3740 * Transforms a Conv node.
3742 * @return The created ia32 Conv node
3744 static ir_node *gen_Conv(ir_node *node)
3746 ir_node *block = get_nodes_block(node);
3747 ir_node *new_block = be_transform_node(block);
3748 ir_node *op = get_Conv_op(node);
3749 ir_node *new_op = NULL;
3750 dbg_info *dbgi = get_irn_dbg_info(node);
3751 ir_mode *src_mode = get_irn_mode(op);
3752 ir_mode *tgt_mode = get_irn_mode(node);
3753 int src_bits = get_mode_size_bits(src_mode);
3754 int tgt_bits = get_mode_size_bits(tgt_mode);
3755 ir_node *res = NULL;
3757 assert(!mode_is_int(src_mode) || src_bits <= 32);
3758 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3760 /* modeB -> X should already be lowered by the lower_mode_b pass */
3761 if (src_mode == mode_b) {
3762 panic("ConvB not lowered %+F", node);
3765 if (src_mode == tgt_mode) {
3766 if (get_Conv_strict(node)) {
3767 if (ia32_cg_config.use_sse2) {
3768 /* when we are in SSE mode, we can kill all strict no-op conversion */
3769 return be_transform_node(op);
3772 /* this should be optimized already, but who knows... */
3773 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3774 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3775 return be_transform_node(op);
3779 if (mode_is_float(src_mode)) {
3780 new_op = be_transform_node(op);
3781 /* we convert from float ... */
3782 if (mode_is_float(tgt_mode)) {
3784 if (ia32_cg_config.use_sse2) {
3785 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3786 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3788 set_ia32_ls_mode(res, tgt_mode);
3790 if (get_Conv_strict(node)) {
3791 /* if fp_no_float_fold is not set then we assume that we
3792 * don't have any float operations in a non
3793 * mode_float_arithmetic mode and can skip strict upconvs */
3794 if (src_bits < tgt_bits
3795 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3796 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3799 res = gen_x87_strict_conv(tgt_mode, new_op);
3800 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3804 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3809 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3810 if (ia32_cg_config.use_sse2) {
3811 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3813 set_ia32_ls_mode(res, src_mode);
3815 return gen_x87_fp_to_gp(node);
3819 /* we convert from int ... */
3820 if (mode_is_float(tgt_mode)) {
3822 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3823 if (ia32_cg_config.use_sse2) {
3824 new_op = be_transform_node(op);
3825 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3827 set_ia32_ls_mode(res, tgt_mode);
3829 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3830 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3831 res = gen_x87_gp_to_fp(node, src_mode);
3833 /* we need a strict-Conv, if the int mode has more bits than the
3835 if (float_mantissa < int_mantissa) {
3836 res = gen_x87_strict_conv(tgt_mode, res);
3837 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3841 } else if (tgt_mode == mode_b) {
3842 /* mode_b lowering already took care that we only have 0/1 values */
3843 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3844 src_mode, tgt_mode));
3845 return be_transform_node(op);
3848 if (src_bits == tgt_bits) {
3849 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3850 src_mode, tgt_mode));
3851 return be_transform_node(op);
3854 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3862 static ir_node *create_immediate_or_transform(ir_node *node,
3863 char immediate_constraint_type)
3865 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3866 if (new_node == NULL) {
3867 new_node = be_transform_node(node);
3873 * Transforms a FrameAddr into an ia32 Add.
3875 static ir_node *gen_be_FrameAddr(ir_node *node)
3877 ir_node *block = be_transform_node(get_nodes_block(node));
3878 ir_node *op = be_get_FrameAddr_frame(node);
3879 ir_node *new_op = be_transform_node(op);
3880 dbg_info *dbgi = get_irn_dbg_info(node);
3883 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3884 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3885 set_ia32_use_frame(new_node);
3887 SET_IA32_ORIG_NODE(new_node, node);
3893 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3895 static ir_node *gen_be_Return(ir_node *node)
3897 ir_graph *irg = current_ir_graph;
3898 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3899 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3900 ir_entity *ent = get_irg_entity(irg);
3901 ir_type *tp = get_entity_type(ent);
3906 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3907 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3909 int pn_ret_val, pn_ret_mem, arity, i;
3911 assert(ret_val != NULL);
3912 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3913 return be_duplicate_node(node);
3916 res_type = get_method_res_type(tp, 0);
3918 if (! is_Primitive_type(res_type)) {
3919 return be_duplicate_node(node);
3922 mode = get_type_mode(res_type);
3923 if (! mode_is_float(mode)) {
3924 return be_duplicate_node(node);
3927 assert(get_method_n_ress(tp) == 1);
3929 pn_ret_val = get_Proj_proj(ret_val);
3930 pn_ret_mem = get_Proj_proj(ret_mem);
3932 /* get the Barrier */
3933 barrier = get_Proj_pred(ret_val);
3935 /* get result input of the Barrier */
3936 ret_val = get_irn_n(barrier, pn_ret_val);
3937 new_ret_val = be_transform_node(ret_val);
3939 /* get memory input of the Barrier */
3940 ret_mem = get_irn_n(barrier, pn_ret_mem);
3941 new_ret_mem = be_transform_node(ret_mem);
3943 frame = get_irg_frame(irg);
3945 dbgi = get_irn_dbg_info(barrier);
3946 block = be_transform_node(get_nodes_block(barrier));
3948 /* store xmm0 onto stack */
3949 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3950 new_ret_mem, new_ret_val);
3951 set_ia32_ls_mode(sse_store, mode);
3952 set_ia32_op_type(sse_store, ia32_AddrModeD);
3953 set_ia32_use_frame(sse_store);
3955 /* load into x87 register */
3956 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3957 set_ia32_op_type(fld, ia32_AddrModeS);
3958 set_ia32_use_frame(fld);
3960 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3961 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3963 /* create a new barrier */
3964 arity = get_irn_arity(barrier);
3965 in = ALLOCAN(ir_node*, arity);
3966 for (i = 0; i < arity; ++i) {
3969 if (i == pn_ret_val) {
3971 } else if (i == pn_ret_mem) {
3974 ir_node *in = get_irn_n(barrier, i);
3975 new_in = be_transform_node(in);
3980 new_barrier = new_ir_node(dbgi, irg, block,
3981 get_irn_op(barrier), get_irn_mode(barrier),
3983 copy_node_attr(irg, barrier, new_barrier);
3984 be_duplicate_deps(barrier, new_barrier);
3985 be_set_transformed_node(barrier, new_barrier);
3987 /* transform normally */
3988 return be_duplicate_node(node);
3992 * Transform a be_AddSP into an ia32_SubSP.
3994 static ir_node *gen_be_AddSP(ir_node *node)
3996 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3997 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3999 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4000 match_am | match_immediate);
4004 * Transform a be_SubSP into an ia32_AddSP
4006 static ir_node *gen_be_SubSP(ir_node *node)
4008 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4009 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4011 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4012 match_am | match_immediate);
4016 * Change some phi modes
4018 static ir_node *gen_Phi(ir_node *node)
4020 const arch_register_req_t *req;
4021 ir_node *block = be_transform_node(get_nodes_block(node));
4022 ir_graph *irg = current_ir_graph;
4023 dbg_info *dbgi = get_irn_dbg_info(node);
4024 ir_mode *mode = get_irn_mode(node);
4027 if (ia32_mode_needs_gp_reg(mode)) {
4028 /* we shouldn't have any 64bit stuff around anymore */
4029 assert(get_mode_size_bits(mode) <= 32);
4030 /* all integer operations are on 32bit registers now */
4032 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4033 } else if (mode_is_float(mode)) {
4034 if (ia32_cg_config.use_sse2) {
4036 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4039 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4042 req = arch_no_register_req;
4045 /* phi nodes allow loops, so we use the old arguments for now
4046 * and fix this later */
4047 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4048 get_irn_in(node) + 1);
4049 copy_node_attr(irg, node, phi);
4050 be_duplicate_deps(node, phi);
4052 arch_set_out_register_req(phi, 0, req);
4054 be_enqueue_preds(node);
4059 static ir_node *gen_Jmp(ir_node *node)
4061 ir_node *block = get_nodes_block(node);
4062 ir_node *new_block = be_transform_node(block);
4063 dbg_info *dbgi = get_irn_dbg_info(node);
4066 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4067 SET_IA32_ORIG_NODE(new_node, node);
4075 static ir_node *gen_IJmp(ir_node *node)
4077 ir_node *block = get_nodes_block(node);
4078 ir_node *new_block = be_transform_node(block);
4079 dbg_info *dbgi = get_irn_dbg_info(node);
4080 ir_node *op = get_IJmp_target(node);
4082 ia32_address_mode_t am;
4083 ia32_address_t *addr = &am.addr;
4085 assert(get_irn_mode(op) == mode_P);
4087 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4089 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4090 addr->mem, am.new_op2);
4091 set_am_attributes(new_node, &am);
4092 SET_IA32_ORIG_NODE(new_node, node);
4094 new_node = fix_mem_proj(new_node, &am);
4100 * Transform a Bound node.
4102 static ir_node *gen_Bound(ir_node *node)
4105 ir_node *lower = get_Bound_lower(node);
4106 dbg_info *dbgi = get_irn_dbg_info(node);
4108 if (is_Const_0(lower)) {
4109 /* typical case for Java */
4110 ir_node *sub, *res, *flags, *block;
4112 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4113 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
4115 block = get_nodes_block(res);
4116 if (! is_Proj(res)) {
4118 set_irn_mode(sub, mode_T);
4119 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4121 sub = get_Proj_pred(res);
4123 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4124 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4125 SET_IA32_ORIG_NODE(new_node, node);
4127 panic("generic Bound not supported in ia32 Backend");
4133 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4135 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4136 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4138 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4139 match_immediate | match_mode_neutral);
4142 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4144 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4145 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4146 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4150 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4152 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4153 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4154 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4158 static ir_node *gen_ia32_l_Add(ir_node *node)
4160 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4161 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4162 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4163 match_commutative | match_am | match_immediate |
4164 match_mode_neutral);
4166 if (is_Proj(lowered)) {
4167 lowered = get_Proj_pred(lowered);
4169 assert(is_ia32_Add(lowered));
4170 set_irn_mode(lowered, mode_T);
4176 static ir_node *gen_ia32_l_Adc(ir_node *node)
4178 return gen_binop_flags(node, new_bd_ia32_Adc,
4179 match_commutative | match_am | match_immediate |
4180 match_mode_neutral);
4184 * Transforms a l_MulS into a "real" MulS node.
4186 * @return the created ia32 Mul node
4188 static ir_node *gen_ia32_l_Mul(ir_node *node)
4190 ir_node *left = get_binop_left(node);
4191 ir_node *right = get_binop_right(node);
4193 return gen_binop(node, left, right, new_bd_ia32_Mul,
4194 match_commutative | match_am | match_mode_neutral);
4198 * Transforms a l_IMulS into a "real" IMul1OPS node.
4200 * @return the created ia32 IMul1OP node
4202 static ir_node *gen_ia32_l_IMul(ir_node *node)
4204 ir_node *left = get_binop_left(node);
4205 ir_node *right = get_binop_right(node);
4207 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4208 match_commutative | match_am | match_mode_neutral);
4211 static ir_node *gen_ia32_l_Sub(ir_node *node)
4213 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4214 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4215 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4216 match_am | match_immediate | match_mode_neutral);
4218 if (is_Proj(lowered)) {
4219 lowered = get_Proj_pred(lowered);
4221 assert(is_ia32_Sub(lowered));
4222 set_irn_mode(lowered, mode_T);
4228 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4230 return gen_binop_flags(node, new_bd_ia32_Sbb,
4231 match_am | match_immediate | match_mode_neutral);
4235 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4236 * op1 - target to be shifted
4237 * op2 - contains bits to be shifted into target
4239 * Only op3 can be an immediate.
4241 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4242 ir_node *low, ir_node *count)
4244 ir_node *block = get_nodes_block(node);
4245 ir_node *new_block = be_transform_node(block);
4246 dbg_info *dbgi = get_irn_dbg_info(node);
4247 ir_node *new_high = be_transform_node(high);
4248 ir_node *new_low = be_transform_node(low);
4252 /* the shift amount can be any mode that is bigger than 5 bits, since all
4253 * other bits are ignored anyway */
4254 while (is_Conv(count) &&
4255 get_irn_n_edges(count) == 1 &&
4256 mode_is_int(get_irn_mode(count))) {
4257 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4258 count = get_Conv_op(count);
4260 new_count = create_immediate_or_transform(count, 0);
4262 if (is_ia32_l_ShlD(node)) {
4263 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4266 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4269 SET_IA32_ORIG_NODE(new_node, node);
4274 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4276 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4277 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4278 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4279 return gen_lowered_64bit_shifts(node, high, low, count);
4282 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4284 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4285 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4286 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4287 return gen_lowered_64bit_shifts(node, high, low, count);
4290 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4292 ir_node *src_block = get_nodes_block(node);
4293 ir_node *block = be_transform_node(src_block);
4294 ir_graph *irg = current_ir_graph;
4295 dbg_info *dbgi = get_irn_dbg_info(node);
4296 ir_node *frame = get_irg_frame(irg);
4297 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4298 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4299 ir_node *new_val_low = be_transform_node(val_low);
4300 ir_node *new_val_high = be_transform_node(val_high);
4302 ir_node *sync, *fild, *res;
4303 ir_node *store_low, *store_high;
4305 if (ia32_cg_config.use_sse2) {
4306 panic("ia32_l_LLtoFloat not implemented for SSE2");
4310 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4312 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4314 SET_IA32_ORIG_NODE(store_low, node);
4315 SET_IA32_ORIG_NODE(store_high, node);
4317 set_ia32_use_frame(store_low);
4318 set_ia32_use_frame(store_high);
4319 set_ia32_op_type(store_low, ia32_AddrModeD);
4320 set_ia32_op_type(store_high, ia32_AddrModeD);
4321 set_ia32_ls_mode(store_low, mode_Iu);
4322 set_ia32_ls_mode(store_high, mode_Is);
4323 add_ia32_am_offs_int(store_high, 4);
4327 sync = new_rd_Sync(dbgi, block, 2, in);
4330 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4332 set_ia32_use_frame(fild);
4333 set_ia32_op_type(fild, ia32_AddrModeS);
4334 set_ia32_ls_mode(fild, mode_Ls);
4336 SET_IA32_ORIG_NODE(fild, node);
4338 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4340 if (! mode_is_signed(get_irn_mode(val_high))) {
4341 ia32_address_mode_t am;
4343 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4346 am.addr.base = get_symconst_base();
4347 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4348 am.addr.mem = nomem;
4351 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4352 am.addr.use_frame = 0;
4353 am.addr.frame_entity = NULL;
4354 am.addr.symconst_sign = 0;
4355 am.ls_mode = mode_F;
4356 am.mem_proj = nomem;
4357 am.op_type = ia32_AddrModeS;
4359 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4360 am.pinned = op_pin_state_floats;
4362 am.ins_permuted = 0;
4364 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4365 am.new_op1, am.new_op2, get_fpcw());
4366 set_am_attributes(fadd, &am);
4368 set_irn_mode(fadd, mode_T);
4369 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4374 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4376 ir_node *src_block = get_nodes_block(node);
4377 ir_node *block = be_transform_node(src_block);
4378 ir_graph *irg = get_Block_irg(block);
4379 dbg_info *dbgi = get_irn_dbg_info(node);
4380 ir_node *frame = get_irg_frame(irg);
4381 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4382 ir_node *new_val = be_transform_node(val);
4383 ir_node *fist, *mem;
4385 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4386 SET_IA32_ORIG_NODE(fist, node);
4387 set_ia32_use_frame(fist);
4388 set_ia32_op_type(fist, ia32_AddrModeD);
4389 set_ia32_ls_mode(fist, mode_Ls);
4394 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4396 ir_node *block = be_transform_node(get_nodes_block(node));
4397 ir_graph *irg = get_Block_irg(block);
4398 ir_node *pred = get_Proj_pred(node);
4399 ir_node *new_pred = be_transform_node(pred);
4400 ir_node *frame = get_irg_frame(irg);
4401 dbg_info *dbgi = get_irn_dbg_info(node);
4402 long pn = get_Proj_proj(node);
4407 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4408 SET_IA32_ORIG_NODE(load, node);
4409 set_ia32_use_frame(load);
4410 set_ia32_op_type(load, ia32_AddrModeS);
4411 set_ia32_ls_mode(load, mode_Iu);
4412 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4413 * 32 bit from it with this particular load */
4414 attr = get_ia32_attr(load);
4415 attr->data.need_64bit_stackent = 1;
4417 if (pn == pn_ia32_l_FloattoLL_res_high) {
4418 add_ia32_am_offs_int(load, 4);
4420 assert(pn == pn_ia32_l_FloattoLL_res_low);
4423 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4429 * Transform the Projs of an AddSP.
4431 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4433 ir_node *pred = get_Proj_pred(node);
4434 ir_node *new_pred = be_transform_node(pred);
4435 dbg_info *dbgi = get_irn_dbg_info(node);
4436 long proj = get_Proj_proj(node);
4438 if (proj == pn_be_AddSP_sp) {
4439 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4440 pn_ia32_SubSP_stack);
4441 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4443 } else if (proj == pn_be_AddSP_res) {
4444 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4445 pn_ia32_SubSP_addr);
4446 } else if (proj == pn_be_AddSP_M) {
4447 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4450 panic("No idea how to transform proj->AddSP");
4454 * Transform the Projs of a SubSP.
4456 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4458 ir_node *pred = get_Proj_pred(node);
4459 ir_node *new_pred = be_transform_node(pred);
4460 dbg_info *dbgi = get_irn_dbg_info(node);
4461 long proj = get_Proj_proj(node);
4463 if (proj == pn_be_SubSP_sp) {
4464 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4465 pn_ia32_AddSP_stack);
4466 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4468 } else if (proj == pn_be_SubSP_M) {
4469 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4472 panic("No idea how to transform proj->SubSP");
4476 * Transform and renumber the Projs from a Load.
4478 static ir_node *gen_Proj_Load(ir_node *node)
4481 ir_node *block = be_transform_node(get_nodes_block(node));
4482 ir_node *pred = get_Proj_pred(node);
4483 dbg_info *dbgi = get_irn_dbg_info(node);
4484 long proj = get_Proj_proj(node);
4486 /* loads might be part of source address mode matches, so we don't
4487 * transform the ProjMs yet (with the exception of loads whose result is
4490 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4493 /* this is needed, because sometimes we have loops that are only
4494 reachable through the ProjM */
4495 be_enqueue_preds(node);
4496 /* do it in 2 steps, to silence firm verifier */
4497 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4498 set_Proj_proj(res, pn_ia32_mem);
4502 /* renumber the proj */
4503 new_pred = be_transform_node(pred);
4504 if (is_ia32_Load(new_pred)) {
4507 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4509 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4510 case pn_Load_X_regular:
4511 return new_rd_Jmp(dbgi, block);
4512 case pn_Load_X_except:
4513 /* This Load might raise an exception. Mark it. */
4514 set_ia32_exc_label(new_pred, 1);
4515 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4519 } else if (is_ia32_Conv_I2I(new_pred) ||
4520 is_ia32_Conv_I2I8Bit(new_pred)) {
4521 set_irn_mode(new_pred, mode_T);
4522 if (proj == pn_Load_res) {
4523 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4524 } else if (proj == pn_Load_M) {
4525 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4527 } else if (is_ia32_xLoad(new_pred)) {
4530 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4532 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4533 case pn_Load_X_regular:
4534 return new_rd_Jmp(dbgi, block);
4535 case pn_Load_X_except:
4536 /* This Load might raise an exception. Mark it. */
4537 set_ia32_exc_label(new_pred, 1);
4538 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4542 } else if (is_ia32_vfld(new_pred)) {
4545 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4547 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4548 case pn_Load_X_regular:
4549 return new_rd_Jmp(dbgi, block);
4550 case pn_Load_X_except:
4551 /* This Load might raise an exception. Mark it. */
4552 set_ia32_exc_label(new_pred, 1);
4553 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4558 /* can happen for ProJMs when source address mode happened for the
4561 /* however it should not be the result proj, as that would mean the
4562 load had multiple users and should not have been used for
4564 if (proj != pn_Load_M) {
4565 panic("internal error: transformed node not a Load");
4567 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4570 panic("No idea how to transform proj");
4574 * Transform and renumber the Projs from a DivMod like instruction.
4576 static ir_node *gen_Proj_DivMod(ir_node *node)
4578 ir_node *block = be_transform_node(get_nodes_block(node));
4579 ir_node *pred = get_Proj_pred(node);
4580 ir_node *new_pred = be_transform_node(pred);
4581 dbg_info *dbgi = get_irn_dbg_info(node);
4582 long proj = get_Proj_proj(node);
4584 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4586 switch (get_irn_opcode(pred)) {
4590 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4592 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4593 case pn_Div_X_regular:
4594 return new_rd_Jmp(dbgi, block);
4595 case pn_Div_X_except:
4596 set_ia32_exc_label(new_pred, 1);
4597 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4605 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4607 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4608 case pn_Mod_X_except:
4609 set_ia32_exc_label(new_pred, 1);
4610 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4618 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4619 case pn_DivMod_res_div:
4620 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4621 case pn_DivMod_res_mod:
4622 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4623 case pn_DivMod_X_regular:
4624 return new_rd_Jmp(dbgi, block);
4625 case pn_DivMod_X_except:
4626 set_ia32_exc_label(new_pred, 1);
4627 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4636 panic("No idea how to transform proj->DivMod");
4640 * Transform and renumber the Projs from a CopyB.
4642 static ir_node *gen_Proj_CopyB(ir_node *node)
4644 ir_node *pred = get_Proj_pred(node);
4645 ir_node *new_pred = be_transform_node(pred);
4646 dbg_info *dbgi = get_irn_dbg_info(node);
4647 long proj = get_Proj_proj(node);
4651 if (is_ia32_CopyB_i(new_pred)) {
4652 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4653 } else if (is_ia32_CopyB(new_pred)) {
4654 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4661 panic("No idea how to transform proj->CopyB");
4665 * Transform and renumber the Projs from a Quot.
4667 static ir_node *gen_Proj_Quot(ir_node *node)
4669 ir_node *pred = get_Proj_pred(node);
4670 ir_node *new_pred = be_transform_node(pred);
4671 dbg_info *dbgi = get_irn_dbg_info(node);
4672 long proj = get_Proj_proj(node);
4676 if (is_ia32_xDiv(new_pred)) {
4677 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4678 } else if (is_ia32_vfdiv(new_pred)) {
4679 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4683 if (is_ia32_xDiv(new_pred)) {
4684 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4685 } else if (is_ia32_vfdiv(new_pred)) {
4686 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4689 case pn_Quot_X_regular:
4690 case pn_Quot_X_except:
4695 panic("No idea how to transform proj->Quot");
4698 static ir_node *gen_be_Call(ir_node *node)
4700 dbg_info *const dbgi = get_irn_dbg_info(node);
4701 ir_node *const src_block = get_nodes_block(node);
4702 ir_node *const block = be_transform_node(src_block);
4703 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4704 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4705 ir_node *const sp = be_transform_node(src_sp);
4706 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4707 ia32_address_mode_t am;
4708 ia32_address_t *const addr = &am.addr;
4713 ir_node * eax = noreg_GP;
4714 ir_node * ecx = noreg_GP;
4715 ir_node * edx = noreg_GP;
4716 unsigned const pop = be_Call_get_pop(node);
4717 ir_type *const call_tp = be_Call_get_type(node);
4718 int old_no_pic_adjust;
4720 /* Run the x87 simulator if the call returns a float value */
4721 if (get_method_n_ress(call_tp) > 0) {
4722 ir_type *const res_type = get_method_res_type(call_tp, 0);
4723 ir_mode *const res_mode = get_type_mode(res_type);
4725 if (res_mode != NULL && mode_is_float(res_mode)) {
4726 env_cg->do_x87_sim = 1;
4730 /* We do not want be_Call direct calls */
4731 assert(be_Call_get_entity(node) == NULL);
4733 /* special case for PIC trampoline calls */
4734 old_no_pic_adjust = no_pic_adjust;
4735 no_pic_adjust = be_get_irg_options(env_cg->irg)->pic;
4737 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4738 match_am | match_immediate);
4740 no_pic_adjust = old_no_pic_adjust;
4742 i = get_irn_arity(node) - 1;
4743 fpcw = be_transform_node(get_irn_n(node, i--));
4744 for (; i >= be_pos_Call_first_arg; --i) {
4745 arch_register_req_t const *const req = arch_get_register_req(node, i);
4746 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4748 assert(req->type == arch_register_req_type_limited);
4749 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4751 switch (*req->limited) {
4752 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4753 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4754 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4755 default: panic("Invalid GP register for register parameter");
4759 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4760 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4761 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4762 set_am_attributes(call, &am);
4763 call = fix_mem_proj(call, &am);
4765 if (get_irn_pinned(node) == op_pin_state_pinned)
4766 set_irn_pinned(call, op_pin_state_pinned);
4768 SET_IA32_ORIG_NODE(call, node);
4770 if (ia32_cg_config.use_sse2) {
4771 /* remember this call for post-processing */
4772 ARR_APP1(ir_node *, call_list, call);
4773 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4780 * Transform Builtin trap
4782 static ir_node *gen_trap(ir_node *node)
4784 dbg_info *dbgi = get_irn_dbg_info(node);
4785 ir_node *block = be_transform_node(get_nodes_block(node));
4786 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4788 return new_bd_ia32_UD2(dbgi, block, mem);
4792 * Transform Builtin debugbreak
4794 static ir_node *gen_debugbreak(ir_node *node)
4796 dbg_info *dbgi = get_irn_dbg_info(node);
4797 ir_node *block = be_transform_node(get_nodes_block(node));
4798 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4800 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4804 * Transform Builtin return_address
4806 static ir_node *gen_return_address(ir_node *node)
4808 ir_node *param = get_Builtin_param(node, 0);
4809 ir_node *frame = get_Builtin_param(node, 1);
4810 dbg_info *dbgi = get_irn_dbg_info(node);
4811 tarval *tv = get_Const_tarval(param);
4812 unsigned long value = get_tarval_long(tv);
4814 ir_node *block = be_transform_node(get_nodes_block(node));
4815 ir_node *ptr = be_transform_node(frame);
4819 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4820 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4821 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4824 /* load the return address from this frame */
4825 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4827 set_irn_pinned(load, get_irn_pinned(node));
4828 set_ia32_op_type(load, ia32_AddrModeS);
4829 set_ia32_ls_mode(load, mode_Iu);
4831 set_ia32_am_offs_int(load, 0);
4832 set_ia32_use_frame(load);
4833 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4835 if (get_irn_pinned(node) == op_pin_state_floats) {
4836 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4837 && pn_ia32_vfld_res == pn_ia32_Load_res
4838 && pn_ia32_Load_res == pn_ia32_res);
4839 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4842 SET_IA32_ORIG_NODE(load, node);
4843 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4847 * Transform Builtin frame_address
4849 static ir_node *gen_frame_address(ir_node *node)
4851 ir_node *param = get_Builtin_param(node, 0);
4852 ir_node *frame = get_Builtin_param(node, 1);
4853 dbg_info *dbgi = get_irn_dbg_info(node);
4854 tarval *tv = get_Const_tarval(param);
4855 unsigned long value = get_tarval_long(tv);
4857 ir_node *block = be_transform_node(get_nodes_block(node));
4858 ir_node *ptr = be_transform_node(frame);
4863 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4864 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4865 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4868 /* load the frame address from this frame */
4869 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4871 set_irn_pinned(load, get_irn_pinned(node));
4872 set_ia32_op_type(load, ia32_AddrModeS);
4873 set_ia32_ls_mode(load, mode_Iu);
4875 ent = ia32_get_frame_address_entity();
4877 set_ia32_am_offs_int(load, 0);
4878 set_ia32_use_frame(load);
4879 set_ia32_frame_ent(load, ent);
4881 /* will fail anyway, but gcc does this: */
4882 set_ia32_am_offs_int(load, 0);
4885 if (get_irn_pinned(node) == op_pin_state_floats) {
4886 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4887 && pn_ia32_vfld_res == pn_ia32_Load_res
4888 && pn_ia32_Load_res == pn_ia32_res);
4889 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4892 SET_IA32_ORIG_NODE(load, node);
4893 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4897 * Transform Builtin frame_address
4899 static ir_node *gen_prefetch(ir_node *node)
4902 ir_node *ptr, *block, *mem, *base, *index;
4903 ir_node *param, *new_node;
4906 ia32_address_t addr;
4908 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4909 /* no prefetch at all, route memory */
4910 return be_transform_node(get_Builtin_mem(node));
4913 param = get_Builtin_param(node, 1);
4914 tv = get_Const_tarval(param);
4915 rw = get_tarval_long(tv);
4917 /* construct load address */
4918 memset(&addr, 0, sizeof(addr));
4919 ptr = get_Builtin_param(node, 0);
4920 ia32_create_address_mode(&addr, ptr, 0);
4927 base = be_transform_node(base);
4930 if (index == NULL) {
4933 index = be_transform_node(index);
4936 dbgi = get_irn_dbg_info(node);
4937 block = be_transform_node(get_nodes_block(node));
4938 mem = be_transform_node(get_Builtin_mem(node));
4940 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4941 /* we have 3DNow!, this was already checked above */
4942 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4943 } else if (ia32_cg_config.use_sse_prefetch) {
4944 /* note: rw == 1 is IGNORED in that case */
4945 param = get_Builtin_param(node, 2);
4946 tv = get_Const_tarval(param);
4947 locality = get_tarval_long(tv);
4949 /* SSE style prefetch */
4952 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4955 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4958 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4961 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4965 assert(ia32_cg_config.use_3dnow_prefetch);
4966 /* 3DNow! style prefetch */
4967 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4970 set_irn_pinned(new_node, get_irn_pinned(node));
4971 set_ia32_op_type(new_node, ia32_AddrModeS);
4972 set_ia32_ls_mode(new_node, mode_Bu);
4973 set_address(new_node, &addr);
4975 SET_IA32_ORIG_NODE(new_node, node);
4977 be_dep_on_frame(new_node);
4978 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4982 * Transform bsf like node
4984 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4986 ir_node *param = get_Builtin_param(node, 0);
4987 dbg_info *dbgi = get_irn_dbg_info(node);
4989 ir_node *block = get_nodes_block(node);
4990 ir_node *new_block = be_transform_node(block);
4992 ia32_address_mode_t am;
4993 ia32_address_t *addr = &am.addr;
4996 match_arguments(&am, block, NULL, param, NULL, match_am);
4998 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4999 set_am_attributes(cnt, &am);
5000 set_ia32_ls_mode(cnt, get_irn_mode(param));
5002 SET_IA32_ORIG_NODE(cnt, node);
5003 return fix_mem_proj(cnt, &am);
5007 * Transform builtin ffs.
5009 static ir_node *gen_ffs(ir_node *node)
5011 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5012 ir_node *real = skip_Proj(bsf);
5013 dbg_info *dbgi = get_irn_dbg_info(real);
5014 ir_node *block = get_nodes_block(real);
5015 ir_node *flag, *set, *conv, *neg, *or;
5018 if (get_irn_mode(real) != mode_T) {
5019 set_irn_mode(real, mode_T);
5020 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5023 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5026 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5027 SET_IA32_ORIG_NODE(set, node);
5030 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5031 SET_IA32_ORIG_NODE(conv, node);
5034 neg = new_bd_ia32_Neg(dbgi, block, conv);
5037 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5038 set_ia32_commutative(or);
5041 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5045 * Transform builtin clz.
5047 static ir_node *gen_clz(ir_node *node)
5049 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5050 ir_node *real = skip_Proj(bsr);
5051 dbg_info *dbgi = get_irn_dbg_info(real);
5052 ir_node *block = get_nodes_block(real);
5053 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5055 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5059 * Transform builtin ctz.
5061 static ir_node *gen_ctz(ir_node *node)
5063 return gen_unop_AM(node, new_bd_ia32_Bsf);
5067 * Transform builtin parity.
5069 static ir_node *gen_parity(ir_node *node)
5071 ir_node *param = get_Builtin_param(node, 0);
5072 dbg_info *dbgi = get_irn_dbg_info(node);
5074 ir_node *block = get_nodes_block(node);
5076 ir_node *new_block = be_transform_node(block);
5077 ir_node *imm, *cmp, *new_node;
5079 ia32_address_mode_t am;
5080 ia32_address_t *addr = &am.addr;
5084 match_arguments(&am, block, NULL, param, NULL, match_am);
5085 imm = ia32_create_Immediate(NULL, 0, 0);
5086 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5087 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5088 set_am_attributes(cmp, &am);
5089 set_ia32_ls_mode(cmp, mode_Iu);
5091 SET_IA32_ORIG_NODE(cmp, node);
5093 cmp = fix_mem_proj(cmp, &am);
5096 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5097 SET_IA32_ORIG_NODE(new_node, node);
5100 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5101 nomem, new_node, mode_Bu);
5102 SET_IA32_ORIG_NODE(new_node, node);
5107 * Transform builtin popcount
5109 static ir_node *gen_popcount(ir_node *node)
5111 ir_node *param = get_Builtin_param(node, 0);
5112 dbg_info *dbgi = get_irn_dbg_info(node);
5114 ir_node *block = get_nodes_block(node);
5115 ir_node *new_block = be_transform_node(block);
5118 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5120 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5121 if (ia32_cg_config.use_popcnt) {
5122 ia32_address_mode_t am;
5123 ia32_address_t *addr = &am.addr;
5126 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5128 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5129 set_am_attributes(cnt, &am);
5130 set_ia32_ls_mode(cnt, get_irn_mode(param));
5132 SET_IA32_ORIG_NODE(cnt, node);
5133 return fix_mem_proj(cnt, &am);
5136 new_param = be_transform_node(param);
5138 /* do the standard popcount algo */
5140 /* m1 = x & 0x55555555 */
5141 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5142 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5145 simm = ia32_create_Immediate(NULL, 0, 1);
5146 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5148 /* m2 = s1 & 0x55555555 */
5149 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5152 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5154 /* m4 = m3 & 0x33333333 */
5155 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5156 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5159 simm = ia32_create_Immediate(NULL, 0, 2);
5160 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5162 /* m5 = s2 & 0x33333333 */
5163 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5166 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5168 /* m7 = m6 & 0x0F0F0F0F */
5169 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5170 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5173 simm = ia32_create_Immediate(NULL, 0, 4);
5174 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5176 /* m8 = s3 & 0x0F0F0F0F */
5177 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5180 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5182 /* m10 = m9 & 0x00FF00FF */
5183 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5184 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5187 simm = ia32_create_Immediate(NULL, 0, 8);
5188 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5190 /* m11 = s4 & 0x00FF00FF */
5191 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5193 /* m12 = m10 + m11 */
5194 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5196 /* m13 = m12 & 0x0000FFFF */
5197 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5198 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5200 /* s5 = m12 >> 16 */
5201 simm = ia32_create_Immediate(NULL, 0, 16);
5202 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5204 /* res = m13 + s5 */
5205 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5209 * Transform builtin byte swap.
5211 static ir_node *gen_bswap(ir_node *node)
5213 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5214 dbg_info *dbgi = get_irn_dbg_info(node);
5216 ir_node *block = get_nodes_block(node);
5217 ir_node *new_block = be_transform_node(block);
5218 ir_mode *mode = get_irn_mode(param);
5219 unsigned size = get_mode_size_bits(mode);
5220 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5224 if (ia32_cg_config.use_i486) {
5225 /* swap available */
5226 return new_bd_ia32_Bswap(dbgi, new_block, param);
5228 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5229 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5231 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5232 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5234 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5236 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5237 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5239 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5240 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5243 /* swap16 always available */
5244 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5247 panic("Invalid bswap size (%d)", size);
5252 * Transform builtin outport.
5254 static ir_node *gen_outport(ir_node *node)
5256 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5257 ir_node *oldv = get_Builtin_param(node, 1);
5258 ir_mode *mode = get_irn_mode(oldv);
5259 ir_node *value = be_transform_node(oldv);
5260 ir_node *block = be_transform_node(get_nodes_block(node));
5261 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5262 dbg_info *dbgi = get_irn_dbg_info(node);
5264 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5265 set_ia32_ls_mode(res, mode);
5270 * Transform builtin inport.
5272 static ir_node *gen_inport(ir_node *node)
5274 ir_type *tp = get_Builtin_type(node);
5275 ir_type *rstp = get_method_res_type(tp, 0);
5276 ir_mode *mode = get_type_mode(rstp);
5277 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5278 ir_node *block = be_transform_node(get_nodes_block(node));
5279 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5280 dbg_info *dbgi = get_irn_dbg_info(node);
5282 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5283 set_ia32_ls_mode(res, mode);
5285 /* check for missing Result Proj */
5290 * Transform a builtin inner trampoline
5292 static ir_node *gen_inner_trampoline(ir_node *node)
5294 ir_node *ptr = get_Builtin_param(node, 0);
5295 ir_node *callee = get_Builtin_param(node, 1);
5296 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5297 ir_node *mem = get_Builtin_mem(node);
5298 ir_node *block = get_nodes_block(node);
5299 ir_node *new_block = be_transform_node(block);
5303 ir_node *trampoline;
5305 dbg_info *dbgi = get_irn_dbg_info(node);
5306 ia32_address_t addr;
5308 /* construct store address */
5309 memset(&addr, 0, sizeof(addr));
5310 ia32_create_address_mode(&addr, ptr, 0);
5312 if (addr.base == NULL) {
5313 addr.base = noreg_GP;
5315 addr.base = be_transform_node(addr.base);
5318 if (addr.index == NULL) {
5319 addr.index = noreg_GP;
5321 addr.index = be_transform_node(addr.index);
5323 addr.mem = be_transform_node(mem);
5325 /* mov ecx, <env> */
5326 val = ia32_create_Immediate(NULL, 0, 0xB9);
5327 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5328 addr.index, addr.mem, val);
5329 set_irn_pinned(store, get_irn_pinned(node));
5330 set_ia32_op_type(store, ia32_AddrModeD);
5331 set_ia32_ls_mode(store, mode_Bu);
5332 set_address(store, &addr);
5336 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5337 addr.index, addr.mem, env);
5338 set_irn_pinned(store, get_irn_pinned(node));
5339 set_ia32_op_type(store, ia32_AddrModeD);
5340 set_ia32_ls_mode(store, mode_Iu);
5341 set_address(store, &addr);
5345 /* jmp rel <callee> */
5346 val = ia32_create_Immediate(NULL, 0, 0xE9);
5347 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5348 addr.index, addr.mem, val);
5349 set_irn_pinned(store, get_irn_pinned(node));
5350 set_ia32_op_type(store, ia32_AddrModeD);
5351 set_ia32_ls_mode(store, mode_Bu);
5352 set_address(store, &addr);
5356 trampoline = be_transform_node(ptr);
5358 /* the callee is typically an immediate */
5359 if (is_SymConst(callee)) {
5360 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5362 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5364 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5366 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5367 addr.index, addr.mem, rel);
5368 set_irn_pinned(store, get_irn_pinned(node));
5369 set_ia32_op_type(store, ia32_AddrModeD);
5370 set_ia32_ls_mode(store, mode_Iu);
5371 set_address(store, &addr);
5376 return new_r_Tuple(new_block, 2, in);
5380 * Transform Builtin node.
5382 static ir_node *gen_Builtin(ir_node *node)
5384 ir_builtin_kind kind = get_Builtin_kind(node);
5388 return gen_trap(node);
5389 case ir_bk_debugbreak:
5390 return gen_debugbreak(node);
5391 case ir_bk_return_address:
5392 return gen_return_address(node);
5393 case ir_bk_frame_address:
5394 return gen_frame_address(node);
5395 case ir_bk_prefetch:
5396 return gen_prefetch(node);
5398 return gen_ffs(node);
5400 return gen_clz(node);
5402 return gen_ctz(node);
5404 return gen_parity(node);
5405 case ir_bk_popcount:
5406 return gen_popcount(node);
5408 return gen_bswap(node);
5410 return gen_outport(node);
5412 return gen_inport(node);
5413 case ir_bk_inner_trampoline:
5414 return gen_inner_trampoline(node);
5416 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5420 * Transform Proj(Builtin) node.
5422 static ir_node *gen_Proj_Builtin(ir_node *proj)
5424 ir_node *node = get_Proj_pred(proj);
5425 ir_node *new_node = be_transform_node(node);
5426 ir_builtin_kind kind = get_Builtin_kind(node);
5429 case ir_bk_return_address:
5430 case ir_bk_frame_address:
5435 case ir_bk_popcount:
5437 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5440 case ir_bk_debugbreak:
5441 case ir_bk_prefetch:
5443 assert(get_Proj_proj(proj) == pn_Builtin_M);
5446 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5447 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5449 assert(get_Proj_proj(proj) == pn_Builtin_M);
5450 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5452 case ir_bk_inner_trampoline:
5453 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5454 return get_Tuple_pred(new_node, 1);
5456 assert(get_Proj_proj(proj) == pn_Builtin_M);
5457 return get_Tuple_pred(new_node, 0);
5460 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5463 static ir_node *gen_be_IncSP(ir_node *node)
5465 ir_node *res = be_duplicate_node(node);
5466 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5472 * Transform the Projs from a be_Call.
5474 static ir_node *gen_Proj_be_Call(ir_node *node)
5476 ir_node *call = get_Proj_pred(node);
5477 ir_node *new_call = be_transform_node(call);
5478 dbg_info *dbgi = get_irn_dbg_info(node);
5479 long proj = get_Proj_proj(node);
5480 ir_mode *mode = get_irn_mode(node);
5483 if (proj == pn_be_Call_M_regular) {
5484 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5486 /* transform call modes */
5487 if (mode_is_data(mode)) {
5488 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5492 /* Map from be_Call to ia32_Call proj number */
5493 if (proj == pn_be_Call_sp) {
5494 proj = pn_ia32_Call_stack;
5495 } else if (proj == pn_be_Call_M_regular) {
5496 proj = pn_ia32_Call_M;
5498 arch_register_req_t const *const req = arch_get_register_req_out(node);
5499 int const n_outs = arch_irn_get_n_outs(new_call);
5502 assert(proj >= pn_be_Call_first_res);
5503 assert(req->type & arch_register_req_type_limited);
5505 for (i = 0; i < n_outs; ++i) {
5506 arch_register_req_t const *const new_req
5507 = arch_get_out_register_req(new_call, i);
5509 if (!(new_req->type & arch_register_req_type_limited) ||
5510 new_req->cls != req->cls ||
5511 *new_req->limited != *req->limited)
5520 res = new_rd_Proj(dbgi, new_call, mode, proj);
5522 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5524 case pn_ia32_Call_stack:
5525 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5528 case pn_ia32_Call_fpcw:
5529 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5537 * Transform the Projs from a Cmp.
5539 static ir_node *gen_Proj_Cmp(ir_node *node)
5541 /* this probably means not all mode_b nodes were lowered... */
5542 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5547 * Transform the Projs from a Bound.
5549 static ir_node *gen_Proj_Bound(ir_node *node)
5552 ir_node *pred = get_Proj_pred(node);
5554 switch (get_Proj_proj(node)) {
5556 return be_transform_node(get_Bound_mem(pred));
5557 case pn_Bound_X_regular:
5558 new_node = be_transform_node(pred);
5559 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5560 case pn_Bound_X_except:
5561 new_node = be_transform_node(pred);
5562 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5564 return be_transform_node(get_Bound_index(pred));
5566 panic("unsupported Proj from Bound");
5570 static ir_node *gen_Proj_ASM(ir_node *node)
5572 ir_mode *mode = get_irn_mode(node);
5573 ir_node *pred = get_Proj_pred(node);
5574 ir_node *new_pred = be_transform_node(pred);
5575 long pos = get_Proj_proj(node);
5577 if (mode == mode_M) {
5578 pos = arch_irn_get_n_outs(new_pred)-1;
5579 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5581 } else if (mode_is_float(mode)) {
5584 panic("unexpected proj mode at ASM");
5587 return new_r_Proj(new_pred, mode, pos);
5591 * Transform and potentially renumber Proj nodes.
5593 static ir_node *gen_Proj(ir_node *node)
5595 ir_node *pred = get_Proj_pred(node);
5598 switch (get_irn_opcode(pred)) {
5600 proj = get_Proj_proj(node);
5601 if (proj == pn_Store_M) {
5602 return be_transform_node(pred);
5604 panic("No idea how to transform proj->Store");
5607 return gen_Proj_Load(node);
5609 return gen_Proj_ASM(node);
5611 return gen_Proj_Builtin(node);
5615 return gen_Proj_DivMod(node);
5617 return gen_Proj_CopyB(node);
5619 return gen_Proj_Quot(node);
5621 return gen_Proj_be_SubSP(node);
5623 return gen_Proj_be_AddSP(node);
5625 return gen_Proj_be_Call(node);
5627 return gen_Proj_Cmp(node);
5629 return gen_Proj_Bound(node);
5631 proj = get_Proj_proj(node);
5633 case pn_Start_X_initial_exec: {
5634 ir_node *block = get_nodes_block(pred);
5635 ir_node *new_block = be_transform_node(block);
5636 dbg_info *dbgi = get_irn_dbg_info(node);
5637 /* we exchange the ProjX with a jump */
5638 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5643 case pn_Start_P_tls:
5644 return gen_Proj_tls(node);
5649 if (is_ia32_l_FloattoLL(pred)) {
5650 return gen_Proj_l_FloattoLL(node);
5652 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5656 ir_mode *mode = get_irn_mode(node);
5657 if (ia32_mode_needs_gp_reg(mode)) {
5658 ir_node *new_pred = be_transform_node(pred);
5659 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5660 get_Proj_proj(node));
5661 new_proj->node_nr = node->node_nr;
5666 return be_duplicate_node(node);
5670 * Enters all transform functions into the generic pointer
5672 static void register_transformers(void)
5674 /* first clear the generic function pointer for all ops */
5675 be_start_transform_setup();
5677 be_set_transform_function(op_Abs, gen_Abs);
5678 be_set_transform_function(op_Add, gen_Add);
5679 be_set_transform_function(op_And, gen_And);
5680 be_set_transform_function(op_ASM, gen_ASM);
5681 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5682 be_set_transform_function(op_be_Call, gen_be_Call);
5683 be_set_transform_function(op_be_Copy, gen_be_Copy);
5684 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5685 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5686 be_set_transform_function(op_be_Return, gen_be_Return);
5687 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5688 be_set_transform_function(op_Bound, gen_Bound);
5689 be_set_transform_function(op_Builtin, gen_Builtin);
5690 be_set_transform_function(op_Cmp, gen_Cmp);
5691 be_set_transform_function(op_Cond, gen_Cond);
5692 be_set_transform_function(op_Const, gen_Const);
5693 be_set_transform_function(op_Conv, gen_Conv);
5694 be_set_transform_function(op_CopyB, gen_CopyB);
5695 be_set_transform_function(op_Div, gen_Div);
5696 be_set_transform_function(op_DivMod, gen_DivMod);
5697 be_set_transform_function(op_Eor, gen_Eor);
5698 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5699 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5700 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5701 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5702 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5703 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5704 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5705 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5706 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5707 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5708 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5709 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5710 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5711 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5712 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5713 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5714 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5715 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5716 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5717 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5718 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5719 be_set_transform_function(op_IJmp, gen_IJmp);
5720 be_set_transform_function(op_Jmp, gen_Jmp);
5721 be_set_transform_function(op_Load, gen_Load);
5722 be_set_transform_function(op_Minus, gen_Minus);
5723 be_set_transform_function(op_Mod, gen_Mod);
5724 be_set_transform_function(op_Mul, gen_Mul);
5725 be_set_transform_function(op_Mulh, gen_Mulh);
5726 be_set_transform_function(op_Mux, gen_Mux);
5727 be_set_transform_function(op_Not, gen_Not);
5728 be_set_transform_function(op_Or, gen_Or);
5729 be_set_transform_function(op_Phi, gen_Phi);
5730 be_set_transform_function(op_Proj, gen_Proj);
5731 be_set_transform_function(op_Quot, gen_Quot);
5732 be_set_transform_function(op_Rotl, gen_Rotl);
5733 be_set_transform_function(op_Shl, gen_Shl);
5734 be_set_transform_function(op_Shr, gen_Shr);
5735 be_set_transform_function(op_Shrs, gen_Shrs);
5736 be_set_transform_function(op_Store, gen_Store);
5737 be_set_transform_function(op_Sub, gen_Sub);
5738 be_set_transform_function(op_SymConst, gen_SymConst);
5739 be_set_transform_function(op_Unknown, gen_Unknown);
5743 * Pre-transform all unknown and noreg nodes.
5745 static void ia32_pretransform_node(void)
5747 ia32_code_gen_t *cg = env_cg;
5749 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5750 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5751 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5753 nomem = get_irg_no_mem(current_ir_graph);
5754 noreg_GP = ia32_new_NoReg_gp(cg);
5760 * Post-process all calls if we are in SSE mode.
5761 * The ABI requires that the results are in st0, copy them
5762 * to a xmm register.
5764 static void postprocess_fp_call_results(void)
5768 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5769 ir_node *call = call_list[i];
5770 ir_type *mtp = call_types[i];
5773 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5774 ir_type *res_tp = get_method_res_type(mtp, j);
5775 ir_node *res, *new_res;
5776 const ir_edge_t *edge, *next;
5779 if (! is_atomic_type(res_tp)) {
5780 /* no floating point return */
5783 mode = get_type_mode(res_tp);
5784 if (! mode_is_float(mode)) {
5785 /* no floating point return */
5789 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5792 /* now patch the users */
5793 foreach_out_edge_safe(res, edge, next) {
5794 ir_node *succ = get_edge_src_irn(edge);
5797 if (be_is_Keep(succ))
5800 if (is_ia32_xStore(succ)) {
5801 /* an xStore can be patched into an vfst */
5802 dbg_info *db = get_irn_dbg_info(succ);
5803 ir_node *block = get_nodes_block(succ);
5804 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5805 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5806 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5807 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5808 ir_mode *mode = get_ia32_ls_mode(succ);
5810 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5811 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5812 if (is_ia32_use_frame(succ))
5813 set_ia32_use_frame(st);
5814 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5815 set_irn_pinned(st, get_irn_pinned(succ));
5816 set_ia32_op_type(st, ia32_AddrModeD);
5820 if (new_res == NULL) {
5821 dbg_info *db = get_irn_dbg_info(call);
5822 ir_node *block = get_nodes_block(call);
5823 ir_node *frame = get_irg_frame(current_ir_graph);
5824 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5825 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5826 ir_node *vfst, *xld, *new_mem;
5828 /* store st(0) on stack */
5829 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5830 set_ia32_op_type(vfst, ia32_AddrModeD);
5831 set_ia32_use_frame(vfst);
5833 /* load into SSE register */
5834 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5835 set_ia32_op_type(xld, ia32_AddrModeS);
5836 set_ia32_use_frame(xld);
5838 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5839 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5841 if (old_mem != NULL) {
5842 edges_reroute(old_mem, new_mem, current_ir_graph);
5846 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5853 /* do the transformation */
5854 void ia32_transform_graph(ia32_code_gen_t *cg)
5858 register_transformers();
5860 initial_fpcw = NULL;
5863 be_timer_push(T_HEIGHTS);
5864 heights = heights_new(cg->irg);
5865 be_timer_pop(T_HEIGHTS);
5866 ia32_calculate_non_address_mode_nodes(cg->irg);
5868 /* the transform phase is not safe for CSE (yet) because several nodes get
5869 * attributes set after their creation */
5870 cse_last = get_opt_cse();
5873 call_list = NEW_ARR_F(ir_node *, 0);
5874 call_types = NEW_ARR_F(ir_type *, 0);
5875 be_transform_graph(cg->irg, ia32_pretransform_node);
5877 if (ia32_cg_config.use_sse2)
5878 postprocess_fp_call_results();
5879 DEL_ARR_F(call_types);
5880 DEL_ARR_F(call_list);
5882 set_opt_cse(cse_last);
5884 ia32_free_non_address_mode_nodes();
5885 heights_free(heights);
5889 void ia32_init_transform(void)
5891 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");