2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
55 #include "betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_address_mode.h"
66 #include "ia32_architecture.h"
68 #include "gen_ia32_regalloc_if.h"
70 /* define this to construct SSE constants instead of load them */
71 #undef CONSTRUCT_SSE_CONST
73 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
74 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
76 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
78 static ir_node *old_initial_fpcw = NULL;
79 static ir_node *initial_fpcw = NULL;
80 int ia32_no_pic_adjust;
82 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
83 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
86 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
87 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
90 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
91 ir_node *op1, ir_node *op2);
93 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
94 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
96 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem);
99 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
100 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
103 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
105 static ir_node *create_immediate_or_transform(ir_node *node,
106 char immediate_constraint_type);
108 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
109 dbg_info *dbgi, ir_node *block,
110 ir_node *op, ir_node *orig_node);
112 /* its enough to have those once */
113 static ir_node *nomem, *noreg_GP;
115 /** a list to postprocess all calls */
116 static ir_node **call_list;
117 static ir_type **call_types;
119 /** Return non-zero is a node represents the 0 constant. */
120 static bool is_Const_0(ir_node *node)
122 return is_Const(node) && is_Const_null(node);
125 /** Return non-zero is a node represents the 1 constant. */
126 static bool is_Const_1(ir_node *node)
128 return is_Const(node) && is_Const_one(node);
131 /** Return non-zero is a node represents the -1 constant. */
132 static bool is_Const_Minus_1(ir_node *node)
134 return is_Const(node) && is_Const_all_one(node);
138 * returns true if constant can be created with a simple float command
140 static bool is_simple_x87_Const(ir_node *node)
142 ir_tarval *tv = get_Const_tarval(node);
143 if (tarval_is_null(tv) || tarval_is_one(tv))
146 /* TODO: match all the other float constants */
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_sse_Const(ir_node *node)
155 ir_tarval *tv = get_Const_tarval(node);
156 ir_mode *mode = get_tarval_mode(tv);
161 if (tarval_is_null(tv)
162 #ifdef CONSTRUCT_SSE_CONST
167 #ifdef CONSTRUCT_SSE_CONST
168 if (mode == mode_D) {
169 unsigned val = get_tarval_sub_bits(tv, 0) |
170 (get_tarval_sub_bits(tv, 1) << 8) |
171 (get_tarval_sub_bits(tv, 2) << 16) |
172 (get_tarval_sub_bits(tv, 3) << 24);
174 /* lower 32bit are zero, really a 32bit constant */
177 #endif /* CONSTRUCT_SSE_CONST */
178 /* TODO: match all the other float constants */
183 * return NoREG or pic_base in case of PIC.
184 * This is necessary as base address for newly created symbols
186 static ir_node *get_symconst_base(void)
188 ir_graph *irg = current_ir_graph;
190 if (be_options.pic) {
191 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
192 return arch_env->impl->get_pic_base(irg);
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
207 ir_tarval *tv = get_Const_tarval(node);
209 assert(is_Const(node));
211 if (mode_is_float(mode)) {
212 ir_graph *irg = get_irn_irg(node);
213 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
214 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
219 if (ia32_cg_config.use_sse2) {
220 if (tarval_is_null(tv)) {
221 load = new_bd_ia32_xZero(dbgi, block);
222 set_ia32_ls_mode(load, mode);
224 #ifdef CONSTRUCT_SSE_CONST
225 } else if (tarval_is_one(tv)) {
226 int cnst = mode == mode_F ? 26 : 55;
227 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
228 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
229 ir_node *pslld, *psrld;
231 load = new_bd_ia32_xAllOnes(dbgi, block);
232 set_ia32_ls_mode(load, mode);
233 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
234 set_ia32_ls_mode(pslld, mode);
235 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
236 set_ia32_ls_mode(psrld, mode);
238 #endif /* CONSTRUCT_SSE_CONST */
239 } else if (mode == mode_F) {
240 /* we can place any 32bit constant by using a movd gp, sse */
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
245 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
246 load = new_bd_ia32_xMovd(dbgi, block, cnst);
247 set_ia32_ls_mode(load, mode);
251 #ifdef CONSTRUCT_SSE_CONST
252 if (mode == mode_D) {
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
259 ir_node *cnst, *psllq;
261 /* fine, lower 32bit are zero, produce 32bit value */
262 val = get_tarval_sub_bits(tv, 4) |
263 (get_tarval_sub_bits(tv, 5) << 8) |
264 (get_tarval_sub_bits(tv, 6) << 16) |
265 (get_tarval_sub_bits(tv, 7) << 24);
266 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
267 load = new_bd_ia32_xMovd(dbgi, block, cnst);
268 set_ia32_ls_mode(load, mode);
269 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
270 set_ia32_ls_mode(psllq, mode);
275 #endif /* CONSTRUCT_SSE_CONST */
276 floatent = ia32_create_float_const_entity(isa, tv, NULL);
278 base = get_symconst_base();
279 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
281 set_ia32_op_type(load, ia32_AddrModeS);
282 set_ia32_am_sc(load, floatent);
283 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
284 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
287 if (tarval_is_null(tv)) {
288 load = new_bd_ia32_vfldz(dbgi, block);
290 set_ia32_ls_mode(load, mode);
291 } else if (tarval_is_one(tv)) {
292 load = new_bd_ia32_vfld1(dbgi, block);
294 set_ia32_ls_mode(load, mode);
299 floatent = ia32_create_float_const_entity(isa, tv, NULL);
300 /* create_float_const_ent is smart and sometimes creates
302 ls_mode = get_type_mode(get_entity_type(floatent));
303 base = get_symconst_base();
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 } else { /* non-float mode */
321 tv = tarval_convert_to(tv, mode_Iu);
323 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
325 panic("couldn't convert constant tarval (%+F)", node);
327 val = get_tarval_long(tv);
329 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
330 SET_IA32_ORIG_NODE(cnst, node);
337 * Transforms a SymConst.
339 static ir_node *gen_SymConst(ir_node *node)
341 ir_node *old_block = get_nodes_block(node);
342 ir_node *block = be_transform_node(old_block);
343 dbg_info *dbgi = get_irn_dbg_info(node);
344 ir_mode *mode = get_irn_mode(node);
347 if (mode_is_float(mode)) {
348 if (ia32_cg_config.use_sse2)
349 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
351 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
352 set_ia32_am_sc(cnst, get_SymConst_entity(node));
353 set_ia32_use_frame(cnst);
357 if (get_SymConst_kind(node) != symconst_addr_ent) {
358 panic("backend only support symconst_addr_ent (at %+F)", node);
360 entity = get_SymConst_entity(node);
361 if (get_entity_owner(entity) == get_tls_type()) {
362 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
363 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
364 set_ia32_am_sc(lea, entity);
367 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
371 SET_IA32_ORIG_NODE(cnst, node);
376 static ir_type *make_array_type(ir_type *tp)
378 unsigned alignment = get_type_alignment_bytes(tp);
379 unsigned size = get_type_size_bytes(tp);
380 ir_type *res = new_type_array(1, tp);
381 set_type_alignment_bytes(res, alignment);
382 set_array_bounds_int(res, 0, 0, 2);
383 if (alignment > size)
385 set_type_size_bytes(res, 2 * size);
386 set_type_state(res, layout_fixed);
391 * Create a float[2] array type for the given atomic type.
393 * @param tp the atomic type
395 static ir_type *ia32_create_float_array(ir_type *tp)
397 ir_mode *mode = get_type_mode(tp);
400 if (mode == mode_F) {
401 static ir_type *float_F;
405 arr = float_F = make_array_type(tp);
406 } else if (mode == mode_D) {
407 static ir_type *float_D;
411 arr = float_D = make_array_type(tp);
413 static ir_type *float_E;
417 arr = float_E = make_array_type(tp);
422 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
423 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
425 static const struct {
427 const char *cnst_str;
429 } names [ia32_known_const_max] = {
430 { "C_sfp_sign", "0x80000000", 0 },
431 { "C_dfp_sign", "0x8000000000000000", 1 },
432 { "C_sfp_abs", "0x7FFFFFFF", 0 },
433 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
434 { "C_ull_bias", "0x10000000000000000", 2 }
436 static ir_entity *ent_cache[ia32_known_const_max];
438 ir_entity *ent = ent_cache[kct];
441 ir_graph *irg = current_ir_graph;
442 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
443 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
444 const char *cnst_str = names[kct].cnst_str;
445 ident *name = new_id_from_str(names[kct].name);
448 switch (names[kct].mode) {
449 case 0: mode = mode_Iu; break;
450 case 1: mode = mode_Lu; break;
451 case 2: mode = mode_F; break;
452 default: panic("internal compiler error");
454 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
456 if (kct == ia32_ULLBIAS) {
457 ir_type *type = ia32_get_prim_type(mode_F);
458 ir_type *atype = ia32_create_float_array(type);
459 ir_initializer_t *initializer;
461 ent = new_entity(get_glob_type(), name, atype);
463 set_entity_ld_ident(ent, name);
464 set_entity_visibility(ent, ir_visibility_private);
465 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
467 initializer = create_initializer_compound(2);
468 set_initializer_compound_value(initializer, 0,
469 create_initializer_tarval(get_mode_null(mode)));
470 set_initializer_compound_value(initializer, 1,
471 create_initializer_tarval(tv));
472 set_entity_initializer(ent, initializer);
474 ent = ia32_create_float_const_entity(isa, tv, name);
476 /* cache the entry */
477 ent_cache[kct] = ent;
480 return ent_cache[kct];
484 * return true if the node is a Proj(Load) and could be used in source address
485 * mode for another node. Will return only true if the @p other node is not
486 * dependent on the memory of the Load (for binary operations use the other
487 * input here, for unary operations use NULL).
489 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
490 ir_node *other, ir_node *other2,
497 /* float constants are always available */
498 if (is_Const(node)) {
499 mode = get_irn_mode(node);
500 if (mode_is_float(mode)) {
501 ir_tarval *tv = get_Const_tarval(node);
502 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
504 if (ia32_cg_config.use_sse2) {
505 if (is_simple_sse_Const(node))
508 if (is_simple_x87_Const(node))
511 if (get_irn_n_edges(node) > 1)
520 load = get_Proj_pred(node);
521 pn = get_Proj_proj(node);
522 if (!is_Load(load) || pn != pn_Load_res)
524 if (get_nodes_block(load) != block)
526 mode = get_irn_mode(node);
527 /* we can't fold mode_E AM */
528 if (mode == ia32_mode_E)
530 /* we only use address mode if we're the only user of the load */
531 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
533 /* in some edge cases with address mode we might reach the load normally
534 * and through some AM sequence, if it is already materialized then we
535 * can't create an AM node from it */
536 if (be_is_transformed(node))
539 /* don't do AM if other node inputs depend on the load (via mem-proj) */
540 if (other != NULL && ia32_prevents_AM(block, load, other))
543 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
549 typedef struct ia32_address_mode_t ia32_address_mode_t;
550 struct ia32_address_mode_t {
555 ia32_op_type_t op_type;
559 unsigned commutative : 1;
560 unsigned ins_permuted : 1;
563 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
565 /* construct load address */
566 memset(addr, 0, sizeof(addr[0]));
567 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
569 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
570 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
571 addr->mem = be_transform_node(mem);
574 static void build_address(ia32_address_mode_t *am, ir_node *node,
575 ia32_create_am_flags_t flags)
577 ia32_address_t *addr = &am->addr;
583 /* floating point immediates */
584 if (is_Const(node)) {
585 ir_graph *irg = get_irn_irg(node);
586 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
587 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
588 ir_tarval *tv = get_Const_tarval(node);
589 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
590 addr->base = get_symconst_base();
591 addr->index = noreg_GP;
593 addr->symconst_ent = entity;
594 addr->tls_segment = false;
596 am->ls_mode = get_type_mode(get_entity_type(entity));
597 am->pinned = op_pin_state_floats;
601 load = get_Proj_pred(node);
602 ptr = get_Load_ptr(load);
603 mem = get_Load_mem(load);
604 new_mem = be_transform_node(mem);
605 am->pinned = get_irn_pinned(load);
606 am->ls_mode = get_Load_mode(load);
607 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
610 /* construct load address */
611 ia32_create_address_mode(addr, ptr, flags);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
618 static void set_address(ir_node *node, const ia32_address_t *addr)
620 set_ia32_am_scale(node, addr->scale);
621 set_ia32_am_sc(node, addr->symconst_ent);
622 set_ia32_am_offs_int(node, addr->offset);
623 set_ia32_am_tls_segment(node, addr->tls_segment);
624 if (addr->symconst_sign)
625 set_ia32_am_sc_sign(node);
627 set_ia32_use_frame(node);
628 set_ia32_frame_ent(node, addr->frame_entity);
632 * Apply attributes of a given address mode to a node.
634 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
636 set_address(node, &am->addr);
638 set_ia32_op_type(node, am->op_type);
639 set_ia32_ls_mode(node, am->ls_mode);
640 if (am->pinned == op_pin_state_pinned) {
641 /* beware: some nodes are already pinned and did not allow to change the state */
642 if (get_irn_pinned(node) != op_pin_state_pinned)
643 set_irn_pinned(node, op_pin_state_pinned);
646 set_ia32_commutative(node);
650 * Check, if a given node is a Down-Conv, ie. a integer Conv
651 * from a mode with a mode with more bits to a mode with lesser bits.
652 * Moreover, we return only true if the node has not more than 1 user.
654 * @param node the node
655 * @return non-zero if node is a Down-Conv
657 static int is_downconv(const ir_node *node)
665 /* we only want to skip the conv when we're the only user
666 * (because this test is used in the context of address-mode selection
667 * and we don't want to use address mode for multiple users) */
668 if (get_irn_n_edges(node) > 1)
671 src_mode = get_irn_mode(get_Conv_op(node));
672 dest_mode = get_irn_mode(node);
674 ia32_mode_needs_gp_reg(src_mode) &&
675 ia32_mode_needs_gp_reg(dest_mode) &&
676 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
679 /** Skip all Down-Conv's on a given node and return the resulting node. */
680 ir_node *ia32_skip_downconv(ir_node *node)
682 while (is_downconv(node))
683 node = get_Conv_op(node);
688 static bool is_sameconv(ir_node *node)
696 /* we only want to skip the conv when we're the only user
697 * (because this test is used in the context of address-mode selection
698 * and we don't want to use address mode for multiple users) */
699 if (get_irn_n_edges(node) > 1)
702 src_mode = get_irn_mode(get_Conv_op(node));
703 dest_mode = get_irn_mode(node);
705 ia32_mode_needs_gp_reg(src_mode) &&
706 ia32_mode_needs_gp_reg(dest_mode) &&
707 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
710 /** Skip all signedness convs */
711 static ir_node *ia32_skip_sameconv(ir_node *node)
713 while (is_sameconv(node))
714 node = get_Conv_op(node);
719 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
721 ir_mode *mode = get_irn_mode(node);
726 if (mode_is_signed(mode)) {
731 block = get_nodes_block(node);
732 dbgi = get_irn_dbg_info(node);
734 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
738 * matches operands of a node into ia32 addressing/operand modes. This covers
739 * usage of source address mode, immediates, operations with non 32-bit modes,
741 * The resulting data is filled into the @p am struct. block is the block
742 * of the node whose arguments are matched. op1, op2 are the first and second
743 * input that are matched (op1 may be NULL). other_op is another unrelated
744 * input that is not matched! but which is needed sometimes to check if AM
745 * for op1/op2 is legal.
746 * @p flags describes the supported modes of the operation in detail.
748 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
749 ir_node *op1, ir_node *op2, ir_node *other_op,
752 ia32_address_t *addr = &am->addr;
753 ir_mode *mode = get_irn_mode(op2);
754 int mode_bits = get_mode_size_bits(mode);
755 ir_node *new_op1, *new_op2;
757 unsigned commutative;
758 int use_am_and_immediates;
761 memset(am, 0, sizeof(am[0]));
763 commutative = (flags & match_commutative) != 0;
764 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
765 use_am = (flags & match_am) != 0;
766 use_immediate = (flags & match_immediate) != 0;
767 assert(!use_am_and_immediates || use_immediate);
770 assert(!commutative || op1 != NULL);
771 assert(use_am || !(flags & match_8bit_am));
772 assert(use_am || !(flags & match_16bit_am));
774 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
775 (mode_bits == 16 && !(flags & match_16bit_am))) {
779 /* we can simply skip downconvs for mode neutral nodes: the upper bits
780 * can be random for these operations */
781 if (flags & match_mode_neutral) {
782 op2 = ia32_skip_downconv(op2);
784 op1 = ia32_skip_downconv(op1);
787 op2 = ia32_skip_sameconv(op2);
789 op1 = ia32_skip_sameconv(op1);
793 /* match immediates. firm nodes are normalized: constants are always on the
796 if (!(flags & match_try_am) && use_immediate) {
797 new_op2 = ia32_try_create_Immediate(op2, 0);
800 if (new_op2 == NULL &&
801 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
802 build_address(am, op2, ia32_create_am_normal);
803 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
804 if (mode_is_float(mode)) {
805 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
809 am->op_type = ia32_AddrModeS;
810 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
812 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
814 build_address(am, op1, ia32_create_am_normal);
816 if (mode_is_float(mode)) {
817 noreg = ia32_new_NoReg_vfp(current_ir_graph);
822 if (new_op2 != NULL) {
825 new_op1 = be_transform_node(op2);
827 am->ins_permuted = true;
829 am->op_type = ia32_AddrModeS;
831 am->op_type = ia32_Normal;
833 if (flags & match_try_am) {
839 mode = get_irn_mode(op2);
840 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
841 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
843 new_op2 = create_upconv(op2, NULL);
844 am->ls_mode = mode_Iu;
846 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
848 new_op2 = be_transform_node(op2);
849 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
852 if (addr->base == NULL)
853 addr->base = noreg_GP;
854 if (addr->index == NULL)
855 addr->index = noreg_GP;
856 if (addr->mem == NULL)
859 am->new_op1 = new_op1;
860 am->new_op2 = new_op2;
861 am->commutative = commutative;
865 * "Fixes" a node that uses address mode by turning it into mode_T
866 * and returning a pn_ia32_res Proj.
868 * @param node the node
869 * @param am its address mode
871 * @return a Proj(pn_ia32_res) if a memory address mode is used,
874 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
879 if (am->mem_proj == NULL)
882 /* we have to create a mode_T so the old MemProj can attach to us */
883 mode = get_irn_mode(node);
884 load = get_Proj_pred(am->mem_proj);
886 be_set_transformed_node(load, node);
888 if (mode != mode_T) {
889 set_irn_mode(node, mode_T);
890 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
897 * Construct a standard binary operation, set AM and immediate if required.
899 * @param node The original node for which the binop is created
900 * @param op1 The first operand
901 * @param op2 The second operand
902 * @param func The node constructor function
903 * @return The constructed ia32 node.
905 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
906 construct_binop_func *func, match_flags_t flags)
909 ir_node *block, *new_block, *new_node;
910 ia32_address_mode_t am;
911 ia32_address_t *addr = &am.addr;
913 block = get_nodes_block(node);
914 match_arguments(&am, block, op1, op2, NULL, flags);
916 dbgi = get_irn_dbg_info(node);
917 new_block = be_transform_node(block);
918 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
919 am.new_op1, am.new_op2);
920 set_am_attributes(new_node, &am);
921 /* we can't use source address mode anymore when using immediates */
922 if (!(flags & match_am_and_immediates) &&
923 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
924 set_ia32_am_support(new_node, ia32_am_none);
925 SET_IA32_ORIG_NODE(new_node, node);
927 new_node = fix_mem_proj(new_node, &am);
933 * Generic names for the inputs of an ia32 binary op.
936 n_ia32_l_binop_left, /**< ia32 left input */
937 n_ia32_l_binop_right, /**< ia32 right input */
938 n_ia32_l_binop_eflags /**< ia32 eflags input */
940 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
941 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
942 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
943 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
944 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
945 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
948 * Construct a binary operation which also consumes the eflags.
950 * @param node The node to transform
951 * @param func The node constructor function
952 * @param flags The match flags
953 * @return The constructor ia32 node
955 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
958 ir_node *src_block = get_nodes_block(node);
959 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
960 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
961 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
963 ir_node *block, *new_node, *new_eflags;
964 ia32_address_mode_t am;
965 ia32_address_t *addr = &am.addr;
967 match_arguments(&am, src_block, op1, op2, eflags, flags);
969 dbgi = get_irn_dbg_info(node);
970 block = be_transform_node(src_block);
971 new_eflags = be_transform_node(eflags);
972 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
973 am.new_op1, am.new_op2, new_eflags);
974 set_am_attributes(new_node, &am);
975 /* we can't use source address mode anymore when using immediates */
976 if (!(flags & match_am_and_immediates) &&
977 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
978 set_ia32_am_support(new_node, ia32_am_none);
979 SET_IA32_ORIG_NODE(new_node, node);
981 new_node = fix_mem_proj(new_node, &am);
986 static ir_node *get_fpcw(void)
988 if (initial_fpcw != NULL)
991 initial_fpcw = be_transform_node(old_initial_fpcw);
995 static ir_node *skip_float_upconv(ir_node *node)
997 ir_mode *mode = get_irn_mode(node);
998 assert(mode_is_float(mode));
1000 while (is_Conv(node)) {
1001 ir_node *pred = get_Conv_op(node);
1002 ir_mode *pred_mode = get_irn_mode(pred);
1005 * suboptimal, but without this check the address mode matcher
1006 * can incorrectly think that something has only 1 user
1008 if (get_irn_n_edges(node) > 1)
1011 if (!mode_is_float(pred_mode)
1012 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1021 * Construct a standard binary operation, set AM and immediate if required.
1023 * @param op1 The first operand
1024 * @param op2 The second operand
1025 * @param func The node constructor function
1026 * @return The constructed ia32 node.
1028 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1029 construct_binop_float_func *func)
1035 ia32_address_mode_t am;
1036 ia32_address_t *addr = &am.addr;
1037 ia32_x87_attr_t *attr;
1038 /* All operations are considered commutative, because there are reverse
1040 match_flags_t flags = match_commutative | match_am;
1042 op1 = skip_float_upconv(op1);
1043 op2 = skip_float_upconv(op2);
1045 block = get_nodes_block(node);
1046 match_arguments(&am, block, op1, op2, NULL, flags);
1048 dbgi = get_irn_dbg_info(node);
1049 new_block = be_transform_node(block);
1050 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1051 am.new_op1, am.new_op2, get_fpcw());
1052 set_am_attributes(new_node, &am);
1054 attr = get_ia32_x87_attr(new_node);
1055 attr->attr.data.ins_permuted = am.ins_permuted;
1057 SET_IA32_ORIG_NODE(new_node, node);
1059 new_node = fix_mem_proj(new_node, &am);
1065 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1067 * @param op1 The first operand
1068 * @param op2 The second operand
1069 * @param func The node constructor function
1070 * @return The constructed ia32 node.
1072 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1073 construct_shift_func *func,
1074 match_flags_t flags)
1077 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1078 ir_mode *mode = get_irn_mode(node);
1080 assert(! mode_is_float(mode));
1081 assert(flags & match_immediate);
1082 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1084 if (get_mode_modulo_shift(mode) != 32)
1085 panic("modulo shift!=32 not supported by ia32 backend");
1087 if (flags & match_mode_neutral) {
1088 op1 = ia32_skip_downconv(op1);
1089 new_op1 = be_transform_node(op1);
1090 } else if (get_mode_size_bits(mode) != 32) {
1091 new_op1 = create_upconv(op1, node);
1093 new_op1 = be_transform_node(op1);
1096 /* the shift amount can be any mode that is bigger than 5 bits, since all
1097 * other bits are ignored anyway */
1098 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1099 ir_node *const op = get_Conv_op(op2);
1100 if (mode_is_float(get_irn_mode(op)))
1103 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1105 new_op2 = create_immediate_or_transform(op2, 0);
1107 dbgi = get_irn_dbg_info(node);
1108 block = get_nodes_block(node);
1109 new_block = be_transform_node(block);
1110 new_node = func(dbgi, new_block, new_op1, new_op2);
1111 SET_IA32_ORIG_NODE(new_node, node);
1113 /* lowered shift instruction may have a dependency operand, handle it here */
1114 if (get_irn_arity(node) == 3) {
1115 /* we have a dependency */
1116 ir_node* dep = get_irn_n(node, 2);
1117 if (get_irn_n_edges(dep) > 1) {
1118 /* ... which has at least one user other than 'node' */
1119 ir_node *new_dep = be_transform_node(dep);
1120 add_irn_dep(new_node, new_dep);
1129 * Construct a standard unary operation, set AM and immediate if required.
1131 * @param op The operand
1132 * @param func The node constructor function
1133 * @return The constructed ia32 node.
1135 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1136 match_flags_t flags)
1139 ir_node *block, *new_block, *new_op, *new_node;
1141 assert(flags == 0 || flags == match_mode_neutral);
1142 if (flags & match_mode_neutral) {
1143 op = ia32_skip_downconv(op);
1146 new_op = be_transform_node(op);
1147 dbgi = get_irn_dbg_info(node);
1148 block = get_nodes_block(node);
1149 new_block = be_transform_node(block);
1150 new_node = func(dbgi, new_block, new_op);
1152 SET_IA32_ORIG_NODE(new_node, node);
1157 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1158 ia32_address_t *addr)
1168 base = be_transform_node(base);
1175 idx = be_transform_node(idx);
1178 /* segment overrides are ineffective for Leas :-( so we have to patch
1180 if (addr->tls_segment) {
1181 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1182 assert(addr->symconst_ent != NULL);
1183 if (base == noreg_GP)
1186 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1187 addr->tls_segment = false;
1190 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1191 set_address(res, addr);
1197 * Returns non-zero if a given address mode has a symbolic or
1198 * numerical offset != 0.
1200 static int am_has_immediates(const ia32_address_t *addr)
1202 return addr->offset != 0 || addr->symconst_ent != NULL
1203 || addr->frame_entity || addr->use_frame;
1206 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1207 ir_node *high, ir_node *low,
1211 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1212 * op1 - target to be shifted
1213 * op2 - contains bits to be shifted into target
1215 * Only op3 can be an immediate.
1217 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1218 ir_node *high, ir_node *low, ir_node *count,
1219 new_shiftd_func func)
1221 ir_node *new_block = be_transform_node(block);
1222 ir_node *new_high = be_transform_node(high);
1223 ir_node *new_low = be_transform_node(low);
1227 /* the shift amount can be any mode that is bigger than 5 bits, since all
1228 * other bits are ignored anyway */
1229 while (is_Conv(count) &&
1230 get_irn_n_edges(count) == 1 &&
1231 mode_is_int(get_irn_mode(count))) {
1232 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1233 count = get_Conv_op(count);
1235 new_count = create_immediate_or_transform(count, 0);
1237 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1242 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1245 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1247 if (is_Const(value1) && is_Const(value2)) {
1248 ir_tarval *tv1 = get_Const_tarval(value1);
1249 ir_tarval *tv2 = get_Const_tarval(value2);
1250 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1251 long v1 = get_tarval_long(tv1);
1252 long v2 = get_tarval_long(tv2);
1253 return v1 <= v2 && v2 == 32-v1;
1259 static ir_node *match_64bit_shift(ir_node *node)
1261 ir_node *op1 = get_binop_left(node);
1262 ir_node *op2 = get_binop_right(node);
1263 assert(is_Or(node) || is_Add(node));
1271 /* match ShlD operation */
1272 if (is_Shl(op1) && is_Shr(op2)) {
1273 ir_node *shl_right = get_Shl_right(op1);
1274 ir_node *shl_left = get_Shl_left(op1);
1275 ir_node *shr_right = get_Shr_right(op2);
1276 ir_node *shr_left = get_Shr_left(op2);
1277 /* constant ShlD operation */
1278 if (is_complementary_shifts(shl_right, shr_right)) {
1279 dbg_info *dbgi = get_irn_dbg_info(node);
1280 ir_node *block = get_nodes_block(node);
1281 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1284 /* constant ShrD operation */
1285 if (is_complementary_shifts(shr_right, shl_right)) {
1286 dbg_info *dbgi = get_irn_dbg_info(node);
1287 ir_node *block = get_nodes_block(node);
1288 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1291 /* lower_dw produces the following for ShlD:
1292 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1293 if (is_Shr(shr_left) && is_Not(shr_right)
1294 && is_Const_1(get_Shr_right(shr_left))
1295 && get_Not_op(shr_right) == shl_right) {
1296 dbg_info *dbgi = get_irn_dbg_info(node);
1297 ir_node *block = get_nodes_block(node);
1298 ir_node *val_h = get_Shr_left(shr_left);
1299 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1302 /* lower_dw produces the following for ShrD:
1303 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1304 if (is_Shl(shl_left) && is_Not(shl_right)
1305 && is_Const_1(get_Shl_right(shl_left))
1306 && get_Not_op(shl_right) == shr_right) {
1307 dbg_info *dbgi = get_irn_dbg_info(node);
1308 ir_node *block = get_nodes_block(node);
1309 ir_node *val_h = get_Shl_left(shl_left);
1310 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1319 * Creates an ia32 Add.
1321 * @return the created ia32 Add node
1323 static ir_node *gen_Add(ir_node *node)
1325 ir_mode *mode = get_irn_mode(node);
1326 ir_node *op1 = get_Add_left(node);
1327 ir_node *op2 = get_Add_right(node);
1329 ir_node *block, *new_block, *new_node, *add_immediate_op;
1330 ia32_address_t addr;
1331 ia32_address_mode_t am;
1333 new_node = match_64bit_shift(node);
1334 if (new_node != NULL)
1337 if (mode_is_float(mode)) {
1338 if (ia32_cg_config.use_sse2)
1339 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1340 match_commutative | match_am);
1342 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1345 ia32_mark_non_am(node);
1349 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1350 * 1. Add with immediate -> Lea
1351 * 2. Add with possible source address mode -> Add
1352 * 3. Otherwise -> Lea
1354 memset(&addr, 0, sizeof(addr));
1355 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1356 add_immediate_op = NULL;
1358 dbgi = get_irn_dbg_info(node);
1359 block = get_nodes_block(node);
1360 new_block = be_transform_node(block);
1363 if (addr.base == NULL && addr.index == NULL) {
1364 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1365 addr.symconst_sign, 0, addr.offset);
1366 SET_IA32_ORIG_NODE(new_node, node);
1369 /* add with immediate? */
1370 if (addr.index == NULL) {
1371 add_immediate_op = addr.base;
1372 } else if (addr.base == NULL && addr.scale == 0) {
1373 add_immediate_op = addr.index;
1376 if (add_immediate_op != NULL) {
1377 if (!am_has_immediates(&addr)) {
1378 #ifdef DEBUG_libfirm
1379 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1382 return be_transform_node(add_immediate_op);
1385 new_node = create_lea_from_address(dbgi, new_block, &addr);
1386 SET_IA32_ORIG_NODE(new_node, node);
1390 /* test if we can use source address mode */
1391 match_arguments(&am, block, op1, op2, NULL, match_commutative
1392 | match_mode_neutral | match_am | match_immediate | match_try_am);
1394 /* construct an Add with source address mode */
1395 if (am.op_type == ia32_AddrModeS) {
1396 ia32_address_t *am_addr = &am.addr;
1397 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1398 am_addr->index, am_addr->mem, am.new_op1,
1400 set_am_attributes(new_node, &am);
1401 SET_IA32_ORIG_NODE(new_node, node);
1403 new_node = fix_mem_proj(new_node, &am);
1408 /* otherwise construct a lea */
1409 new_node = create_lea_from_address(dbgi, new_block, &addr);
1410 SET_IA32_ORIG_NODE(new_node, node);
1415 * Creates an ia32 Mul.
1417 * @return the created ia32 Mul node
1419 static ir_node *gen_Mul(ir_node *node)
1421 ir_node *op1 = get_Mul_left(node);
1422 ir_node *op2 = get_Mul_right(node);
1423 ir_mode *mode = get_irn_mode(node);
1425 if (mode_is_float(mode)) {
1426 if (ia32_cg_config.use_sse2)
1427 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1428 match_commutative | match_am);
1430 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1432 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1433 match_commutative | match_am | match_mode_neutral |
1434 match_immediate | match_am_and_immediates);
1438 * Creates an ia32 Mulh.
1439 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1440 * this result while Mul returns the lower 32 bit.
1442 * @return the created ia32 Mulh node
1444 static ir_node *gen_Mulh(ir_node *node)
1446 dbg_info *dbgi = get_irn_dbg_info(node);
1447 ir_node *op1 = get_Mulh_left(node);
1448 ir_node *op2 = get_Mulh_right(node);
1449 ir_mode *mode = get_irn_mode(node);
1451 ir_node *proj_res_high;
1453 if (get_mode_size_bits(mode) != 32) {
1454 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1457 if (mode_is_signed(mode)) {
1458 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1459 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1461 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1462 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1464 return proj_res_high;
1468 * Creates an ia32 And.
1470 * @return The created ia32 And node
1472 static ir_node *gen_And(ir_node *node)
1474 ir_node *op1 = get_And_left(node);
1475 ir_node *op2 = get_And_right(node);
1476 assert(! mode_is_float(get_irn_mode(node)));
1478 /* is it a zero extension? */
1479 if (is_Const(op2)) {
1480 ir_tarval *tv = get_Const_tarval(op2);
1481 long v = get_tarval_long(tv);
1483 if (v == 0xFF || v == 0xFFFF) {
1484 dbg_info *dbgi = get_irn_dbg_info(node);
1485 ir_node *block = get_nodes_block(node);
1492 assert(v == 0xFFFF);
1495 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1500 return gen_binop(node, op1, op2, new_bd_ia32_And,
1501 match_commutative | match_mode_neutral | match_am | match_immediate);
1505 * Creates an ia32 Or.
1507 * @return The created ia32 Or node
1509 static ir_node *gen_Or(ir_node *node)
1511 ir_node *op1 = get_Or_left(node);
1512 ir_node *op2 = get_Or_right(node);
1515 res = match_64bit_shift(node);
1519 assert (! mode_is_float(get_irn_mode(node)));
1520 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1521 | match_mode_neutral | match_am | match_immediate);
1527 * Creates an ia32 Eor.
1529 * @return The created ia32 Eor node
1531 static ir_node *gen_Eor(ir_node *node)
1533 ir_node *op1 = get_Eor_left(node);
1534 ir_node *op2 = get_Eor_right(node);
1536 assert(! mode_is_float(get_irn_mode(node)));
1537 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1538 | match_mode_neutral | match_am | match_immediate);
1543 * Creates an ia32 Sub.
1545 * @return The created ia32 Sub node
1547 static ir_node *gen_Sub(ir_node *node)
1549 ir_node *op1 = get_Sub_left(node);
1550 ir_node *op2 = get_Sub_right(node);
1551 ir_mode *mode = get_irn_mode(node);
1553 if (mode_is_float(mode)) {
1554 if (ia32_cg_config.use_sse2)
1555 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1557 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1560 if (is_Const(op2)) {
1561 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1565 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1566 | match_am | match_immediate);
1569 static ir_node *transform_AM_mem(ir_node *const block,
1570 ir_node *const src_val,
1571 ir_node *const src_mem,
1572 ir_node *const am_mem)
1574 if (is_NoMem(am_mem)) {
1575 return be_transform_node(src_mem);
1576 } else if (is_Proj(src_val) &&
1578 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1579 /* avoid memory loop */
1581 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1582 ir_node *const ptr_pred = get_Proj_pred(src_val);
1583 int const arity = get_Sync_n_preds(src_mem);
1588 NEW_ARR_A(ir_node*, ins, arity + 1);
1590 /* NOTE: This sometimes produces dead-code because the old sync in
1591 * src_mem might not be used anymore, we should detect this case
1592 * and kill the sync... */
1593 for (i = arity - 1; i >= 0; --i) {
1594 ir_node *const pred = get_Sync_pred(src_mem, i);
1596 /* avoid memory loop */
1597 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1600 ins[n++] = be_transform_node(pred);
1603 if (n==1 && ins[0] == am_mem) {
1605 /* creating a new Sync and relying on CSE may fail,
1606 * if am_mem is a ProjM, which does not yet verify. */
1610 return new_r_Sync(block, n, ins);
1614 ins[0] = be_transform_node(src_mem);
1616 return new_r_Sync(block, 2, ins);
1621 * Create a 32bit to 64bit signed extension.
1623 * @param dbgi debug info
1624 * @param block the block where node nodes should be placed
1625 * @param val the value to extend
1626 * @param orig the original node
1628 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1629 ir_node *val, const ir_node *orig)
1634 if (ia32_cg_config.use_short_sex_eax) {
1635 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1636 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1638 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1639 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1641 SET_IA32_ORIG_NODE(res, orig);
1646 * Generates an ia32 Div with additional infrastructure for the
1647 * register allocator if needed.
1649 static ir_node *create_Div(ir_node *node)
1651 dbg_info *dbgi = get_irn_dbg_info(node);
1652 ir_node *block = get_nodes_block(node);
1653 ir_node *new_block = be_transform_node(block);
1654 int throws_exception = ir_throws_exception(node);
1661 ir_node *sign_extension;
1662 ia32_address_mode_t am;
1663 ia32_address_t *addr = &am.addr;
1665 /* the upper bits have random contents for smaller modes */
1666 switch (get_irn_opcode(node)) {
1668 op1 = get_Div_left(node);
1669 op2 = get_Div_right(node);
1670 mem = get_Div_mem(node);
1671 mode = get_Div_resmode(node);
1674 op1 = get_Mod_left(node);
1675 op2 = get_Mod_right(node);
1676 mem = get_Mod_mem(node);
1677 mode = get_Mod_resmode(node);
1680 panic("invalid divmod node %+F", node);
1683 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1685 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1686 is the memory of the consumed address. We can have only the second op as address
1687 in Div nodes, so check only op2. */
1688 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1690 if (mode_is_signed(mode)) {
1691 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1692 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1693 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1695 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1697 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1698 addr->index, new_mem, am.new_op2,
1699 am.new_op1, sign_extension);
1701 ir_set_throws_exception(new_node, throws_exception);
1703 set_irn_pinned(new_node, get_irn_pinned(node));
1705 set_am_attributes(new_node, &am);
1706 SET_IA32_ORIG_NODE(new_node, node);
1708 new_node = fix_mem_proj(new_node, &am);
1714 * Generates an ia32 Mod.
1716 static ir_node *gen_Mod(ir_node *node)
1718 return create_Div(node);
1722 * Generates an ia32 Div.
1724 static ir_node *gen_Div(ir_node *node)
1726 ir_mode *mode = get_Div_resmode(node);
1727 if (mode_is_float(mode)) {
1728 ir_node *op1 = get_Div_left(node);
1729 ir_node *op2 = get_Div_right(node);
1731 if (ia32_cg_config.use_sse2) {
1732 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1734 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1738 return create_Div(node);
1742 * Creates an ia32 Shl.
1744 * @return The created ia32 Shl node
1746 static ir_node *gen_Shl(ir_node *node)
1748 ir_node *left = get_Shl_left(node);
1749 ir_node *right = get_Shl_right(node);
1751 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1752 match_mode_neutral | match_immediate);
1756 * Creates an ia32 Shr.
1758 * @return The created ia32 Shr node
1760 static ir_node *gen_Shr(ir_node *node)
1762 ir_node *left = get_Shr_left(node);
1763 ir_node *right = get_Shr_right(node);
1765 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1771 * Creates an ia32 Sar.
1773 * @return The created ia32 Shrs node
1775 static ir_node *gen_Shrs(ir_node *node)
1777 ir_node *left = get_Shrs_left(node);
1778 ir_node *right = get_Shrs_right(node);
1780 if (is_Const(right)) {
1781 ir_tarval *tv = get_Const_tarval(right);
1782 long val = get_tarval_long(tv);
1784 /* this is a sign extension */
1785 dbg_info *dbgi = get_irn_dbg_info(node);
1786 ir_node *block = be_transform_node(get_nodes_block(node));
1787 ir_node *new_op = be_transform_node(left);
1789 return create_sex_32_64(dbgi, block, new_op, node);
1793 /* 8 or 16 bit sign extension? */
1794 if (is_Const(right) && is_Shl(left)) {
1795 ir_node *shl_left = get_Shl_left(left);
1796 ir_node *shl_right = get_Shl_right(left);
1797 if (is_Const(shl_right)) {
1798 ir_tarval *tv1 = get_Const_tarval(right);
1799 ir_tarval *tv2 = get_Const_tarval(shl_right);
1800 if (tv1 == tv2 && tarval_is_long(tv1)) {
1801 long val = get_tarval_long(tv1);
1802 if (val == 16 || val == 24) {
1803 dbg_info *dbgi = get_irn_dbg_info(node);
1804 ir_node *block = get_nodes_block(node);
1814 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1823 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1829 * Creates an ia32 Rol.
1831 * @param op1 The first operator
1832 * @param op2 The second operator
1833 * @return The created ia32 RotL node
1835 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1837 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1843 * Creates an ia32 Ror.
1844 * NOTE: There is no RotR with immediate because this would always be a RotL
1845 * "imm-mode_size_bits" which can be pre-calculated.
1847 * @param op1 The first operator
1848 * @param op2 The second operator
1849 * @return The created ia32 RotR node
1851 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1853 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1859 * Creates an ia32 RotR or RotL (depending on the found pattern).
1861 * @return The created ia32 RotL or RotR node
1863 static ir_node *gen_Rotl(ir_node *node)
1865 ir_node *op1 = get_Rotl_left(node);
1866 ir_node *op2 = get_Rotl_right(node);
1868 if (is_Minus(op2)) {
1869 return gen_Ror(node, op1, get_Minus_op(op2));
1872 return gen_Rol(node, op1, op2);
1878 * Transforms a Minus node.
1880 * @return The created ia32 Minus node
1882 static ir_node *gen_Minus(ir_node *node)
1884 ir_node *op = get_Minus_op(node);
1885 ir_node *block = be_transform_node(get_nodes_block(node));
1886 dbg_info *dbgi = get_irn_dbg_info(node);
1887 ir_mode *mode = get_irn_mode(node);
1892 if (mode_is_float(mode)) {
1893 ir_node *new_op = be_transform_node(op);
1894 if (ia32_cg_config.use_sse2) {
1895 /* TODO: non-optimal... if we have many xXors, then we should
1896 * rather create a load for the const and use that instead of
1897 * several AM nodes... */
1898 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1900 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1901 noreg_GP, nomem, new_op, noreg_xmm);
1903 size = get_mode_size_bits(mode);
1904 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1906 set_ia32_am_sc(new_node, ent);
1907 set_ia32_op_type(new_node, ia32_AddrModeS);
1908 set_ia32_ls_mode(new_node, mode);
1910 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1913 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1916 SET_IA32_ORIG_NODE(new_node, node);
1922 * Transforms a Not node.
1924 * @return The created ia32 Not node
1926 static ir_node *gen_Not(ir_node *node)
1928 ir_node *op = get_Not_op(node);
1930 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1931 assert (! mode_is_float(get_irn_mode(node)));
1933 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1936 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1937 bool negate, ir_node *node)
1939 ir_node *new_block = be_transform_node(block);
1940 ir_mode *mode = get_irn_mode(op);
1941 ir_node *new_op = be_transform_node(op);
1946 assert(mode_is_float(mode));
1948 if (ia32_cg_config.use_sse2) {
1949 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1950 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1951 noreg_GP, nomem, new_op, noreg_fp);
1953 size = get_mode_size_bits(mode);
1954 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1956 set_ia32_am_sc(new_node, ent);
1958 SET_IA32_ORIG_NODE(new_node, node);
1960 set_ia32_op_type(new_node, ia32_AddrModeS);
1961 set_ia32_ls_mode(new_node, mode);
1963 /* TODO, implement -Abs case */
1966 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1967 SET_IA32_ORIG_NODE(new_node, node);
1969 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1970 SET_IA32_ORIG_NODE(new_node, node);
1978 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1980 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1982 dbg_info *dbgi = get_irn_dbg_info(cmp);
1983 ir_node *block = get_nodes_block(cmp);
1984 ir_node *new_block = be_transform_node(block);
1985 ir_node *op1 = be_transform_node(x);
1986 ir_node *op2 = be_transform_node(n);
1988 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1991 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
1993 bool overflow_possible)
1995 if (mode_is_float(mode)) {
1997 case ir_relation_equal: return ia32_cc_float_equal;
1998 case ir_relation_less: return ia32_cc_float_below;
1999 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2000 case ir_relation_greater: return ia32_cc_float_above;
2001 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2002 case ir_relation_less_greater: return ia32_cc_not_equal;
2003 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2004 case ir_relation_unordered: return ia32_cc_parity;
2005 case ir_relation_unordered_equal: return ia32_cc_equal;
2006 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2007 case ir_relation_unordered_less_equal:
2008 return ia32_cc_float_unordered_below_equal;
2009 case ir_relation_unordered_greater:
2010 return ia32_cc_float_unordered_above;
2011 case ir_relation_unordered_greater_equal:
2012 return ia32_cc_float_unordered_above_equal;
2013 case ir_relation_unordered_less_greater:
2014 return ia32_cc_float_not_equal;
2015 case ir_relation_false:
2016 case ir_relation_true:
2017 /* should we introduce a jump always/jump never? */
2020 panic("Unexpected float pnc");
2021 } else if (mode_is_signed(mode)) {
2023 case ir_relation_unordered_equal:
2024 case ir_relation_equal: return ia32_cc_equal;
2025 case ir_relation_unordered_less:
2026 case ir_relation_less:
2027 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2028 case ir_relation_unordered_less_equal:
2029 case ir_relation_less_equal: return ia32_cc_less_equal;
2030 case ir_relation_unordered_greater:
2031 case ir_relation_greater: return ia32_cc_greater;
2032 case ir_relation_unordered_greater_equal:
2033 case ir_relation_greater_equal:
2034 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2035 case ir_relation_unordered_less_greater:
2036 case ir_relation_less_greater: return ia32_cc_not_equal;
2037 case ir_relation_less_equal_greater:
2038 case ir_relation_unordered:
2039 case ir_relation_false:
2040 case ir_relation_true:
2041 /* introduce jump always/jump never? */
2044 panic("Unexpected pnc");
2047 case ir_relation_unordered_equal:
2048 case ir_relation_equal: return ia32_cc_equal;
2049 case ir_relation_unordered_less:
2050 case ir_relation_less: return ia32_cc_below;
2051 case ir_relation_unordered_less_equal:
2052 case ir_relation_less_equal: return ia32_cc_below_equal;
2053 case ir_relation_unordered_greater:
2054 case ir_relation_greater: return ia32_cc_above;
2055 case ir_relation_unordered_greater_equal:
2056 case ir_relation_greater_equal: return ia32_cc_above_equal;
2057 case ir_relation_unordered_less_greater:
2058 case ir_relation_less_greater: return ia32_cc_not_equal;
2059 case ir_relation_less_equal_greater:
2060 case ir_relation_unordered:
2061 case ir_relation_false:
2062 case ir_relation_true:
2063 /* introduce jump always/jump never? */
2066 panic("Unexpected pnc");
2070 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2072 /* must have a Cmp as input */
2073 ir_relation relation = get_Cmp_relation(cmp);
2074 ir_node *l = get_Cmp_left(cmp);
2075 ir_node *r = get_Cmp_right(cmp);
2076 ir_mode *mode = get_irn_mode(l);
2077 bool overflow_possible;
2080 /* check for bit-test */
2081 if (ia32_cg_config.use_bt
2082 && (relation == ir_relation_equal
2083 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2084 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2086 ir_node *la = get_And_left(l);
2087 ir_node *ra = get_And_right(l);
2094 ir_node *c = get_Shl_left(la);
2095 if (is_Const_1(c) && is_Const_0(r)) {
2096 /* (1 << n) & ra) */
2097 ir_node *n = get_Shl_right(la);
2098 flags = gen_bt(cmp, ra, n);
2099 /* the bit is copied into the CF flag */
2100 if (relation & ir_relation_equal)
2101 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2103 *cc_out = ia32_cc_below; /* test for CF=1 */
2109 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2110 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2111 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2112 * a predecessor node). So add the < bit.
2113 * (Note that we do not want to produce <=> (which can happen for
2114 * unoptimized code), because no x86 flag can represent that */
2115 if (!(relation & ir_relation_equal) && relation & ir_relation_less_greater)
2116 relation |= get_negated_relation(ir_get_possible_cmp_relations(l, r)) & ir_relation_less_greater;
2118 overflow_possible = true;
2119 if (is_Const(r) && is_Const_null(r))
2120 overflow_possible = false;
2122 /* just do a normal transformation of the Cmp */
2123 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2124 flags = be_transform_node(cmp);
2129 * Transforms a Load.
2131 * @return the created ia32 Load node
2133 static ir_node *gen_Load(ir_node *node)
2135 ir_node *old_block = get_nodes_block(node);
2136 ir_node *block = be_transform_node(old_block);
2137 ir_node *ptr = get_Load_ptr(node);
2138 ir_node *mem = get_Load_mem(node);
2139 ir_node *new_mem = be_transform_node(mem);
2140 dbg_info *dbgi = get_irn_dbg_info(node);
2141 ir_mode *mode = get_Load_mode(node);
2142 int throws_exception = ir_throws_exception(node);
2146 ia32_address_t addr;
2148 /* construct load address */
2149 memset(&addr, 0, sizeof(addr));
2150 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2157 base = be_transform_node(base);
2163 idx = be_transform_node(idx);
2166 if (mode_is_float(mode)) {
2167 if (ia32_cg_config.use_sse2) {
2168 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2171 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2175 assert(mode != mode_b);
2177 /* create a conv node with address mode for smaller modes */
2178 if (get_mode_size_bits(mode) < 32) {
2179 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2180 new_mem, noreg_GP, mode);
2182 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2185 ir_set_throws_exception(new_node, throws_exception);
2187 set_irn_pinned(new_node, get_irn_pinned(node));
2188 set_ia32_op_type(new_node, ia32_AddrModeS);
2189 set_ia32_ls_mode(new_node, mode);
2190 set_address(new_node, &addr);
2192 if (get_irn_pinned(node) == op_pin_state_floats) {
2193 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2194 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2195 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2196 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2199 SET_IA32_ORIG_NODE(new_node, node);
2204 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2205 ir_node *ptr, ir_node *other)
2212 /* we only use address mode if we're the only user of the load */
2213 if (get_irn_n_edges(node) > 1)
2216 load = get_Proj_pred(node);
2219 if (get_nodes_block(load) != block)
2222 /* store should have the same pointer as the load */
2223 if (get_Load_ptr(load) != ptr)
2226 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2227 if (other != NULL &&
2228 get_nodes_block(other) == block &&
2229 heights_reachable_in_block(ia32_heights, other, load)) {
2233 if (ia32_prevents_AM(block, load, mem))
2235 /* Store should be attached to the load via mem */
2236 assert(heights_reachable_in_block(ia32_heights, mem, load));
2241 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2242 ir_node *mem, ir_node *ptr, ir_mode *mode,
2243 construct_binop_dest_func *func,
2244 construct_binop_dest_func *func8bit,
2245 match_flags_t flags)
2247 ir_node *src_block = get_nodes_block(node);
2255 ia32_address_mode_t am;
2256 ia32_address_t *addr = &am.addr;
2257 memset(&am, 0, sizeof(am));
2259 assert(flags & match_immediate); /* there is no destam node without... */
2260 commutative = (flags & match_commutative) != 0;
2262 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2263 build_address(&am, op1, ia32_create_am_double_use);
2264 new_op = create_immediate_or_transform(op2, 0);
2265 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2266 build_address(&am, op2, ia32_create_am_double_use);
2267 new_op = create_immediate_or_transform(op1, 0);
2272 if (addr->base == NULL)
2273 addr->base = noreg_GP;
2274 if (addr->index == NULL)
2275 addr->index = noreg_GP;
2276 if (addr->mem == NULL)
2279 dbgi = get_irn_dbg_info(node);
2280 block = be_transform_node(src_block);
2281 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2283 if (get_mode_size_bits(mode) == 8) {
2284 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2286 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2288 set_address(new_node, addr);
2289 set_ia32_op_type(new_node, ia32_AddrModeD);
2290 set_ia32_ls_mode(new_node, mode);
2291 SET_IA32_ORIG_NODE(new_node, node);
2293 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2294 mem_proj = be_transform_node(am.mem_proj);
2295 be_set_transformed_node(am.mem_proj, new_node);
2296 be_set_transformed_node(mem_proj, new_node);
2301 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2302 ir_node *ptr, ir_mode *mode,
2303 construct_unop_dest_func *func)
2305 ir_node *src_block = get_nodes_block(node);
2311 ia32_address_mode_t am;
2312 ia32_address_t *addr = &am.addr;
2314 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2317 memset(&am, 0, sizeof(am));
2318 build_address(&am, op, ia32_create_am_double_use);
2320 dbgi = get_irn_dbg_info(node);
2321 block = be_transform_node(src_block);
2322 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2323 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2324 set_address(new_node, addr);
2325 set_ia32_op_type(new_node, ia32_AddrModeD);
2326 set_ia32_ls_mode(new_node, mode);
2327 SET_IA32_ORIG_NODE(new_node, node);
2329 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2330 mem_proj = be_transform_node(am.mem_proj);
2331 be_set_transformed_node(am.mem_proj, new_node);
2332 be_set_transformed_node(mem_proj, new_node);
2337 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2339 ir_mode *mode = get_irn_mode(node);
2340 ir_node *mux_true = get_Mux_true(node);
2341 ir_node *mux_false = get_Mux_false(node);
2349 ia32_condition_code_t cc;
2350 ia32_address_t addr;
2352 if (get_mode_size_bits(mode) != 8)
2355 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2357 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2363 cond = get_Mux_sel(node);
2364 flags = get_flags_node(cond, &cc);
2365 /* we can't handle the float special cases with SetM */
2366 if (cc & ia32_cc_additional_float_cases)
2369 cc = ia32_negate_condition_code(cc);
2371 build_address_ptr(&addr, ptr, mem);
2373 dbgi = get_irn_dbg_info(node);
2374 block = get_nodes_block(node);
2375 new_block = be_transform_node(block);
2376 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2377 addr.index, addr.mem, flags, cc);
2378 set_address(new_node, &addr);
2379 set_ia32_op_type(new_node, ia32_AddrModeD);
2380 set_ia32_ls_mode(new_node, mode);
2381 SET_IA32_ORIG_NODE(new_node, node);
2386 static ir_node *try_create_dest_am(ir_node *node)
2388 ir_node *val = get_Store_value(node);
2389 ir_node *mem = get_Store_mem(node);
2390 ir_node *ptr = get_Store_ptr(node);
2391 ir_mode *mode = get_irn_mode(val);
2392 unsigned bits = get_mode_size_bits(mode);
2397 /* handle only GP modes for now... */
2398 if (!ia32_mode_needs_gp_reg(mode))
2402 /* store must be the only user of the val node */
2403 if (get_irn_n_edges(val) > 1)
2405 /* skip pointless convs */
2407 ir_node *conv_op = get_Conv_op(val);
2408 ir_mode *pred_mode = get_irn_mode(conv_op);
2409 if (!ia32_mode_needs_gp_reg(pred_mode))
2411 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2419 /* value must be in the same block */
2420 if (get_nodes_block(node) != get_nodes_block(val))
2423 switch (get_irn_opcode(val)) {
2425 op1 = get_Add_left(val);
2426 op2 = get_Add_right(val);
2427 if (ia32_cg_config.use_incdec) {
2428 if (is_Const_1(op2)) {
2429 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2431 } else if (is_Const_Minus_1(op2)) {
2432 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2436 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2437 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2438 match_commutative | match_immediate);
2441 op1 = get_Sub_left(val);
2442 op2 = get_Sub_right(val);
2443 if (is_Const(op2)) {
2444 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2446 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2447 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2451 op1 = get_And_left(val);
2452 op2 = get_And_right(val);
2453 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2454 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2455 match_commutative | match_immediate);
2458 op1 = get_Or_left(val);
2459 op2 = get_Or_right(val);
2460 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2461 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2462 match_commutative | match_immediate);
2465 op1 = get_Eor_left(val);
2466 op2 = get_Eor_right(val);
2467 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2468 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2469 match_commutative | match_immediate);
2472 op1 = get_Shl_left(val);
2473 op2 = get_Shl_right(val);
2474 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2475 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2479 op1 = get_Shr_left(val);
2480 op2 = get_Shr_right(val);
2481 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2482 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2486 op1 = get_Shrs_left(val);
2487 op2 = get_Shrs_right(val);
2488 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2489 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2493 op1 = get_Rotl_left(val);
2494 op2 = get_Rotl_right(val);
2495 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2496 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2499 /* TODO: match ROR patterns... */
2501 new_node = try_create_SetMem(val, ptr, mem);
2505 op1 = get_Minus_op(val);
2506 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2509 /* should be lowered already */
2510 assert(mode != mode_b);
2511 op1 = get_Not_op(val);
2512 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2518 if (new_node != NULL) {
2519 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2520 get_irn_pinned(node) == op_pin_state_pinned) {
2521 set_irn_pinned(new_node, op_pin_state_pinned);
2528 static bool possible_int_mode_for_fp(ir_mode *mode)
2532 if (!mode_is_signed(mode))
2534 size = get_mode_size_bits(mode);
2535 if (size != 16 && size != 32)
2540 static int is_float_to_int_conv(const ir_node *node)
2542 ir_mode *mode = get_irn_mode(node);
2546 if (!possible_int_mode_for_fp(mode))
2551 conv_op = get_Conv_op(node);
2552 conv_mode = get_irn_mode(conv_op);
2554 if (!mode_is_float(conv_mode))
2561 * Transform a Store(floatConst) into a sequence of
2564 * @return the created ia32 Store node
2566 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2568 ir_mode *mode = get_irn_mode(cns);
2569 unsigned size = get_mode_size_bytes(mode);
2570 ir_tarval *tv = get_Const_tarval(cns);
2571 ir_node *block = get_nodes_block(node);
2572 ir_node *new_block = be_transform_node(block);
2573 ir_node *ptr = get_Store_ptr(node);
2574 ir_node *mem = get_Store_mem(node);
2575 dbg_info *dbgi = get_irn_dbg_info(node);
2578 int throws_exception = ir_throws_exception(node);
2580 ia32_address_t addr;
2582 build_address_ptr(&addr, ptr, mem);
2589 val= get_tarval_sub_bits(tv, ofs) |
2590 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2591 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2592 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2595 } else if (size >= 2) {
2596 val= get_tarval_sub_bits(tv, ofs) |
2597 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2601 panic("invalid size of Store float to mem (%+F)", node);
2603 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2605 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2606 addr.index, addr.mem, imm);
2607 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2609 ir_set_throws_exception(new_node, throws_exception);
2610 set_irn_pinned(new_node, get_irn_pinned(node));
2611 set_ia32_op_type(new_node, ia32_AddrModeD);
2612 set_ia32_ls_mode(new_node, mode);
2613 set_address(new_node, &addr);
2614 SET_IA32_ORIG_NODE(new_node, node);
2621 addr.offset += delta;
2622 } while (size != 0);
2625 return new_rd_Sync(dbgi, new_block, i, ins);
2627 return get_Proj_pred(ins[0]);
2632 * Generate a vfist or vfisttp instruction.
2634 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2635 ir_node *index, ir_node *mem, ir_node *val)
2637 if (ia32_cg_config.use_fisttp) {
2638 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2639 if other users exists */
2640 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2641 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_vfisttp_res);
2642 be_new_Keep(block, 1, &value);
2646 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2649 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2655 * Transforms a general (no special case) Store.
2657 * @return the created ia32 Store node
2659 static ir_node *gen_general_Store(ir_node *node)
2661 ir_node *val = get_Store_value(node);
2662 ir_mode *mode = get_irn_mode(val);
2663 ir_node *block = get_nodes_block(node);
2664 ir_node *new_block = be_transform_node(block);
2665 ir_node *ptr = get_Store_ptr(node);
2666 ir_node *mem = get_Store_mem(node);
2667 dbg_info *dbgi = get_irn_dbg_info(node);
2668 int throws_exception = ir_throws_exception(node);
2671 ia32_address_t addr;
2673 /* check for destination address mode */
2674 new_node = try_create_dest_am(node);
2675 if (new_node != NULL)
2678 /* construct store address */
2679 memset(&addr, 0, sizeof(addr));
2680 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2682 if (addr.base == NULL) {
2683 addr.base = noreg_GP;
2685 addr.base = be_transform_node(addr.base);
2688 if (addr.index == NULL) {
2689 addr.index = noreg_GP;
2691 addr.index = be_transform_node(addr.index);
2693 addr.mem = be_transform_node(mem);
2695 if (mode_is_float(mode)) {
2696 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2698 while (is_Conv(val) && mode == get_irn_mode(val)) {
2699 ir_node *op = get_Conv_op(val);
2700 if (!mode_is_float(get_irn_mode(op)))
2704 new_val = be_transform_node(val);
2705 if (ia32_cg_config.use_sse2) {
2706 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2707 addr.index, addr.mem, new_val);
2709 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2710 addr.index, addr.mem, new_val, mode);
2712 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2713 val = get_Conv_op(val);
2715 /* TODO: is this optimisation still necessary at all (middleend)? */
2716 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2718 while (is_Conv(val)) {
2719 ir_node *op = get_Conv_op(val);
2720 if (!mode_is_float(get_irn_mode(op)))
2722 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2726 new_val = be_transform_node(val);
2727 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2729 new_val = create_immediate_or_transform(val, 0);
2730 assert(mode != mode_b);
2732 if (get_mode_size_bits(mode) == 8) {
2733 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2734 addr.index, addr.mem, new_val);
2736 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2737 addr.index, addr.mem, new_val);
2740 ir_set_throws_exception(new_node, throws_exception);
2742 set_irn_pinned(new_node, get_irn_pinned(node));
2743 set_ia32_op_type(new_node, ia32_AddrModeD);
2744 set_ia32_ls_mode(new_node, mode);
2746 set_address(new_node, &addr);
2747 SET_IA32_ORIG_NODE(new_node, node);
2753 * Transforms a Store.
2755 * @return the created ia32 Store node
2757 static ir_node *gen_Store(ir_node *node)
2759 ir_node *val = get_Store_value(node);
2760 ir_mode *mode = get_irn_mode(val);
2762 if (mode_is_float(mode) && is_Const(val)) {
2763 /* We can transform every floating const store
2764 into a sequence of integer stores.
2765 If the constant is already in a register,
2766 it would be better to use it, but we don't
2767 have this information here. */
2768 return gen_float_const_Store(node, val);
2770 return gen_general_Store(node);
2774 * Transforms a Switch.
2776 * @return the created ia32 SwitchJmp node
2778 static ir_node *gen_Switch(ir_node *node)
2780 dbg_info *dbgi = get_irn_dbg_info(node);
2781 ir_graph *irg = get_irn_irg(node);
2782 ir_node *block = be_transform_node(get_nodes_block(node));
2783 ir_node *sel = get_Switch_selector(node);
2784 ir_node *new_sel = be_transform_node(sel);
2785 ir_mode *sel_mode = get_irn_mode(sel);
2786 const ir_switch_table *table = get_Switch_table(node);
2787 unsigned n_outs = get_Switch_n_outs(node);
2791 assert(get_mode_size_bits(get_irn_mode(sel)) <= 32);
2792 if (get_mode_size_bits(sel_mode) != 32)
2793 new_sel = create_upconv(new_sel, sel);
2795 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2796 set_entity_visibility(entity, ir_visibility_private);
2797 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2799 table = ir_switch_table_duplicate(irg, table);
2801 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2802 set_ia32_am_scale(new_node, 2);
2803 set_ia32_am_sc(new_node, entity);
2804 set_ia32_op_type(new_node, ia32_AddrModeS);
2805 set_ia32_ls_mode(new_node, mode_Iu);
2806 SET_IA32_ORIG_NODE(new_node, node);
2807 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2808 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2814 * Transform a Cond node.
2816 static ir_node *gen_Cond(ir_node *node)
2818 ir_node *block = get_nodes_block(node);
2819 ir_node *new_block = be_transform_node(block);
2820 dbg_info *dbgi = get_irn_dbg_info(node);
2821 ir_node *sel = get_Cond_selector(node);
2822 ir_node *flags = NULL;
2824 ia32_condition_code_t cc;
2826 /* we get flags from a Cmp */
2827 flags = get_flags_node(sel, &cc);
2829 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2830 SET_IA32_ORIG_NODE(new_node, node);
2836 * Transform a be_Copy.
2838 static ir_node *gen_be_Copy(ir_node *node)
2840 ir_node *new_node = be_duplicate_node(node);
2841 ir_mode *mode = get_irn_mode(new_node);
2843 if (ia32_mode_needs_gp_reg(mode)) {
2844 set_irn_mode(new_node, mode_Iu);
2850 static ir_node *create_Fucom(ir_node *node)
2852 dbg_info *dbgi = get_irn_dbg_info(node);
2853 ir_node *block = get_nodes_block(node);
2854 ir_node *new_block = be_transform_node(block);
2855 ir_node *left = get_Cmp_left(node);
2856 ir_node *new_left = be_transform_node(left);
2857 ir_node *right = get_Cmp_right(node);
2861 if (ia32_cg_config.use_fucomi) {
2862 new_right = be_transform_node(right);
2863 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2865 set_ia32_commutative(new_node);
2866 SET_IA32_ORIG_NODE(new_node, node);
2868 if (is_Const_0(right)) {
2869 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2871 new_right = be_transform_node(right);
2872 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2873 set_ia32_commutative(new_node);
2876 SET_IA32_ORIG_NODE(new_node, node);
2878 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2879 SET_IA32_ORIG_NODE(new_node, node);
2885 static ir_node *create_Ucomi(ir_node *node)
2887 dbg_info *dbgi = get_irn_dbg_info(node);
2888 ir_node *src_block = get_nodes_block(node);
2889 ir_node *new_block = be_transform_node(src_block);
2890 ir_node *left = get_Cmp_left(node);
2891 ir_node *right = get_Cmp_right(node);
2893 ia32_address_mode_t am;
2894 ia32_address_t *addr = &am.addr;
2896 match_arguments(&am, src_block, left, right, NULL,
2897 match_commutative | match_am);
2899 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2900 addr->mem, am.new_op1, am.new_op2,
2902 set_am_attributes(new_node, &am);
2904 SET_IA32_ORIG_NODE(new_node, node);
2906 new_node = fix_mem_proj(new_node, &am);
2912 * returns true if it is assured, that the upper bits of a node are "clean"
2913 * which means for a 16 or 8 bit value, that the upper bits in the register
2914 * are 0 for unsigned and a copy of the last significant bit for signed
2917 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2919 assert(ia32_mode_needs_gp_reg(mode));
2920 if (get_mode_size_bits(mode) >= 32)
2923 if (is_Proj(transformed_node))
2924 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2926 switch (get_ia32_irn_opcode(transformed_node)) {
2927 case iro_ia32_Conv_I2I:
2928 case iro_ia32_Conv_I2I8Bit: {
2929 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2930 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2932 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2939 if (mode_is_signed(mode)) {
2940 return false; /* TODO handle signed modes */
2942 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2943 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2944 const ia32_immediate_attr_t *attr
2945 = get_ia32_immediate_attr_const(right);
2946 if (attr->symconst == 0 &&
2947 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2951 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2955 /* TODO too conservative if shift amount is constant */
2956 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2959 if (!mode_is_signed(mode)) {
2961 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2962 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2964 /* TODO if one is known to be zero extended, then || is sufficient */
2969 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2970 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2972 case iro_ia32_Const:
2973 case iro_ia32_Immediate: {
2974 const ia32_immediate_attr_t *attr =
2975 get_ia32_immediate_attr_const(transformed_node);
2976 if (mode_is_signed(mode)) {
2977 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2978 return shifted == 0 || shifted == -1;
2980 unsigned long shifted = (unsigned long)attr->offset;
2981 shifted >>= get_mode_size_bits(mode)-1;
2983 return shifted == 0;
2993 * Generate code for a Cmp.
2995 static ir_node *gen_Cmp(ir_node *node)
2997 dbg_info *dbgi = get_irn_dbg_info(node);
2998 ir_node *block = get_nodes_block(node);
2999 ir_node *new_block = be_transform_node(block);
3000 ir_node *left = get_Cmp_left(node);
3001 ir_node *right = get_Cmp_right(node);
3002 ir_mode *cmp_mode = get_irn_mode(left);
3004 ia32_address_mode_t am;
3005 ia32_address_t *addr = &am.addr;
3007 if (mode_is_float(cmp_mode)) {
3008 if (ia32_cg_config.use_sse2) {
3009 return create_Ucomi(node);
3011 return create_Fucom(node);
3015 assert(ia32_mode_needs_gp_reg(cmp_mode));
3017 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3018 if (is_Const_0(right) &&
3020 get_irn_n_edges(left) == 1) {
3021 /* Test(and_left, and_right) */
3022 ir_node *and_left = get_And_left(left);
3023 ir_node *and_right = get_And_right(left);
3025 /* matze: code here used mode instead of cmd_mode, I think it is always
3026 * the same as cmp_mode, but I leave this here to see if this is really
3029 assert(get_irn_mode(and_left) == cmp_mode);
3031 match_arguments(&am, block, and_left, and_right, NULL,
3033 match_am | match_8bit_am | match_16bit_am |
3034 match_am_and_immediates | match_immediate);
3036 /* use 32bit compare mode if possible since the opcode is smaller */
3037 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3038 upper_bits_clean(am.new_op2, cmp_mode)) {
3039 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3042 if (get_mode_size_bits(cmp_mode) == 8) {
3043 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3044 addr->index, addr->mem,
3045 am.new_op1, am.new_op2,
3048 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base,
3049 addr->index, addr->mem, am.new_op1,
3050 am.new_op2, am.ins_permuted);
3053 /* Cmp(left, right) */
3054 match_arguments(&am, block, left, right, NULL,
3055 match_commutative | match_am | match_8bit_am |
3056 match_16bit_am | match_am_and_immediates |
3058 /* use 32bit compare mode if possible since the opcode is smaller */
3059 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3060 upper_bits_clean(am.new_op2, cmp_mode)) {
3061 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3064 if (get_mode_size_bits(cmp_mode) == 8) {
3065 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3066 addr->index, addr->mem, am.new_op1,
3067 am.new_op2, am.ins_permuted);
3069 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3070 addr->mem, am.new_op1, am.new_op2,
3074 set_am_attributes(new_node, &am);
3075 set_ia32_ls_mode(new_node, cmp_mode);
3077 SET_IA32_ORIG_NODE(new_node, node);
3079 new_node = fix_mem_proj(new_node, &am);
3084 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3085 ia32_condition_code_t cc)
3087 dbg_info *dbgi = get_irn_dbg_info(node);
3088 ir_node *block = get_nodes_block(node);
3089 ir_node *new_block = be_transform_node(block);
3090 ir_node *val_true = get_Mux_true(node);
3091 ir_node *val_false = get_Mux_false(node);
3093 ia32_address_mode_t am;
3094 ia32_address_t *addr;
3096 assert(ia32_cg_config.use_cmov);
3097 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3101 match_arguments(&am, block, val_false, val_true, flags,
3102 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3104 if (am.ins_permuted)
3105 cc = ia32_negate_condition_code(cc);
3107 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3108 addr->mem, am.new_op1, am.new_op2, new_flags,
3110 set_am_attributes(new_node, &am);
3112 SET_IA32_ORIG_NODE(new_node, node);
3114 new_node = fix_mem_proj(new_node, &am);
3120 * Creates a ia32 Setcc instruction.
3122 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3123 ir_node *flags, ia32_condition_code_t cc,
3126 ir_mode *mode = get_irn_mode(orig_node);
3129 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3130 SET_IA32_ORIG_NODE(new_node, orig_node);
3132 /* we might need to conv the result up */
3133 if (get_mode_size_bits(mode) > 8) {
3134 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3135 nomem, new_node, mode_Bu);
3136 SET_IA32_ORIG_NODE(new_node, orig_node);
3143 * Create instruction for an unsigned Difference or Zero.
3145 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3147 ir_mode *mode = get_irn_mode(psi);
3157 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3158 match_mode_neutral | match_am | match_immediate | match_two_users);
3160 block = get_nodes_block(new_node);
3162 if (is_Proj(new_node)) {
3163 sub = get_Proj_pred(new_node);
3166 set_irn_mode(sub, mode_T);
3167 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3169 assert(is_ia32_Sub(sub));
3170 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3172 dbgi = get_irn_dbg_info(psi);
3173 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3174 set_ia32_ls_mode(sbb, mode_Iu);
3175 notn = new_bd_ia32_Not(dbgi, block, sbb);
3177 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3178 set_ia32_ls_mode(new_node, mode_Iu);
3179 set_ia32_commutative(new_node);
3184 * Create an const array of two float consts.
3186 * @param c0 the first constant
3187 * @param c1 the second constant
3188 * @param new_mode IN/OUT for the mode of the constants, if NULL
3189 * smallest possible mode will be used
3191 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3194 ir_mode *mode = *new_mode;
3196 ir_initializer_t *initializer;
3197 ir_tarval *tv0 = get_Const_tarval(c0);
3198 ir_tarval *tv1 = get_Const_tarval(c1);
3201 /* detect the best mode for the constants */
3202 mode = get_tarval_mode(tv0);
3204 if (mode != mode_F) {
3205 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3206 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3208 tv0 = tarval_convert_to(tv0, mode);
3209 tv1 = tarval_convert_to(tv1, mode);
3210 } else if (mode != mode_D) {
3211 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3212 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3214 tv0 = tarval_convert_to(tv0, mode);
3215 tv1 = tarval_convert_to(tv1, mode);
3222 tp = ia32_get_prim_type(mode);
3223 tp = ia32_create_float_array(tp);
3225 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3227 set_entity_ld_ident(ent, get_entity_ident(ent));
3228 set_entity_visibility(ent, ir_visibility_private);
3229 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3231 initializer = create_initializer_compound(2);
3233 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3234 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3236 set_entity_initializer(ent, initializer);
3243 * Possible transformations for creating a Setcc.
3245 enum setcc_transform_insn {
3258 typedef struct setcc_transform {
3260 ia32_condition_code_t cc;
3262 enum setcc_transform_insn transform;
3266 } setcc_transform_t;
3269 * Setcc can only handle 0 and 1 result.
3270 * Find a transformation that creates 0 and 1 from
3273 static void find_const_transform(ia32_condition_code_t cc,
3274 ir_tarval *t, ir_tarval *f,
3275 setcc_transform_t *res)
3281 if (tarval_is_null(t)) {
3285 cc = ia32_negate_condition_code(cc);
3286 } else if (tarval_cmp(t, f) == ir_relation_less) {
3287 // now, t is the bigger one
3291 cc = ia32_negate_condition_code(cc);
3295 if (! tarval_is_null(f)) {
3296 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3299 res->steps[step].transform = SETCC_TR_ADD;
3301 if (t == tarval_bad)
3302 panic("constant subtract failed");
3303 if (! tarval_is_long(f))
3304 panic("tarval is not long");
3306 res->steps[step].val = get_tarval_long(f);
3308 f = tarval_sub(f, f, NULL);
3309 assert(tarval_is_null(f));
3312 if (tarval_is_one(t)) {
3313 res->steps[step].transform = SETCC_TR_SET;
3314 res->num_steps = ++step;
3318 if (tarval_is_minus_one(t)) {
3319 res->steps[step].transform = SETCC_TR_NEG;
3321 res->steps[step].transform = SETCC_TR_SET;
3322 res->num_steps = ++step;
3325 if (tarval_is_long(t)) {
3326 long v = get_tarval_long(t);
3328 res->steps[step].val = 0;
3331 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3333 res->steps[step].transform = SETCC_TR_LEAxx;
3334 res->steps[step].scale = 3; /* (a << 3) + a */
3337 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3339 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3340 res->steps[step].scale = 3; /* (a << 3) */
3343 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3345 res->steps[step].transform = SETCC_TR_LEAxx;
3346 res->steps[step].scale = 2; /* (a << 2) + a */
3349 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3351 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3352 res->steps[step].scale = 2; /* (a << 2) */
3355 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3357 res->steps[step].transform = SETCC_TR_LEAxx;
3358 res->steps[step].scale = 1; /* (a << 1) + a */
3361 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3363 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3364 res->steps[step].scale = 1; /* (a << 1) */
3367 res->num_steps = step;
3370 if (! tarval_is_single_bit(t)) {
3371 res->steps[step].transform = SETCC_TR_AND;
3372 res->steps[step].val = v;
3374 res->steps[step].transform = SETCC_TR_NEG;
3376 int val = get_tarval_lowest_bit(t);
3379 res->steps[step].transform = SETCC_TR_SHL;
3380 res->steps[step].scale = val;
3384 res->steps[step].transform = SETCC_TR_SET;
3385 res->num_steps = ++step;
3388 panic("tarval is not long");
3392 * Transforms a Mux node into some code sequence.
3394 * @return The transformed node.
3396 static ir_node *gen_Mux(ir_node *node)
3398 dbg_info *dbgi = get_irn_dbg_info(node);
3399 ir_node *block = get_nodes_block(node);
3400 ir_node *new_block = be_transform_node(block);
3401 ir_node *mux_true = get_Mux_true(node);
3402 ir_node *mux_false = get_Mux_false(node);
3403 ir_node *sel = get_Mux_sel(node);
3404 ir_mode *mode = get_irn_mode(node);
3408 ia32_condition_code_t cc;
3410 assert(get_irn_mode(sel) == mode_b);
3412 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3414 if (ia32_mode_needs_gp_reg(mode)) {
3415 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3418 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3419 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3423 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3424 if (mode_is_float(mode)) {
3425 ir_node *cmp_left = get_Cmp_left(sel);
3426 ir_node *cmp_right = get_Cmp_right(sel);
3427 ir_relation relation = get_Cmp_relation(sel);
3429 if (ia32_cg_config.use_sse2) {
3430 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3431 if (cmp_left == mux_true && cmp_right == mux_false) {
3432 /* Mux(a <= b, a, b) => MIN */
3433 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3434 match_commutative | match_am | match_two_users);
3435 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3436 /* Mux(a <= b, b, a) => MAX */
3437 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3438 match_commutative | match_am | match_two_users);
3440 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3441 if (cmp_left == mux_true && cmp_right == mux_false) {
3442 /* Mux(a >= b, a, b) => MAX */
3443 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3444 match_commutative | match_am | match_two_users);
3445 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3446 /* Mux(a >= b, b, a) => MIN */
3447 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3448 match_commutative | match_am | match_two_users);
3453 if (is_Const(mux_true) && is_Const(mux_false)) {
3454 ia32_address_mode_t am;
3459 flags = get_flags_node(sel, &cc);
3460 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3462 if (ia32_cg_config.use_sse2) {
3463 /* cannot load from different mode on SSE */
3466 /* x87 can load any mode */
3470 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3472 if (new_mode == mode_F) {
3474 } else if (new_mode == mode_D) {
3476 } else if (new_mode == ia32_mode_E) {
3477 /* arg, shift 16 NOT supported */
3479 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3481 panic("Unsupported constant size");
3484 am.ls_mode = new_mode;
3485 am.addr.base = get_symconst_base();
3486 am.addr.index = new_node;
3487 am.addr.mem = nomem;
3489 am.addr.scale = scale;
3490 am.addr.use_frame = 0;
3491 am.addr.tls_segment = false;
3492 am.addr.frame_entity = NULL;
3493 am.addr.symconst_sign = 0;
3494 am.mem_proj = am.addr.mem;
3495 am.op_type = ia32_AddrModeS;
3498 am.pinned = op_pin_state_floats;
3500 am.ins_permuted = false;
3502 if (ia32_cg_config.use_sse2)
3503 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3505 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3506 set_am_attributes(load, &am);
3508 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3510 panic("cannot transform floating point Mux");
3513 assert(ia32_mode_needs_gp_reg(mode));
3516 ir_node *cmp_left = get_Cmp_left(sel);
3517 ir_node *cmp_right = get_Cmp_right(sel);
3518 ir_relation relation = get_Cmp_relation(sel);
3519 ir_node *val_true = mux_true;
3520 ir_node *val_false = mux_false;
3522 if (is_Const(val_true) && is_Const_null(val_true)) {
3523 ir_node *tmp = val_false;
3524 val_false = val_true;
3526 relation = get_negated_relation(relation);
3528 if (is_Const_0(val_false) && is_Sub(val_true)) {
3529 if ((relation & ir_relation_greater)
3530 && get_Sub_left(val_true) == cmp_left
3531 && get_Sub_right(val_true) == cmp_right) {
3532 return create_doz(node, cmp_left, cmp_right);
3534 if ((relation & ir_relation_less)
3535 && get_Sub_left(val_true) == cmp_right
3536 && get_Sub_right(val_true) == cmp_left) {
3537 return create_doz(node, cmp_right, cmp_left);
3542 flags = get_flags_node(sel, &cc);
3544 if (is_Const(mux_true) && is_Const(mux_false)) {
3545 /* both are const, good */
3546 ir_tarval *tv_true = get_Const_tarval(mux_true);
3547 ir_tarval *tv_false = get_Const_tarval(mux_false);
3548 setcc_transform_t res;
3551 find_const_transform(cc, tv_true, tv_false, &res);
3553 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3556 switch (res.steps[step].transform) {
3558 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3559 add_ia32_am_offs_int(new_node, res.steps[step].val);
3561 case SETCC_TR_ADDxx:
3562 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3565 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3566 set_ia32_am_scale(new_node, res.steps[step].scale);
3567 set_ia32_am_offs_int(new_node, res.steps[step].val);
3569 case SETCC_TR_LEAxx:
3570 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3571 set_ia32_am_scale(new_node, res.steps[step].scale);
3572 set_ia32_am_offs_int(new_node, res.steps[step].val);
3575 imm = ia32_immediate_from_long(res.steps[step].scale);
3576 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3579 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3582 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3585 imm = ia32_immediate_from_long(res.steps[step].val);
3586 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3589 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3592 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3595 panic("unknown setcc transform");
3599 new_node = create_CMov(node, sel, flags, cc);
3606 * Create a conversion from x87 state register to general purpose.
3608 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3610 ir_node *block = be_transform_node(get_nodes_block(node));
3611 ir_node *op = get_Conv_op(node);
3612 ir_node *new_op = be_transform_node(op);
3613 ir_graph *irg = current_ir_graph;
3614 dbg_info *dbgi = get_irn_dbg_info(node);
3615 ir_mode *mode = get_irn_mode(node);
3616 ir_node *frame = get_irg_frame(irg);
3617 ir_node *fist, *load, *mem;
3619 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3620 set_irn_pinned(fist, op_pin_state_floats);
3621 set_ia32_use_frame(fist);
3622 set_ia32_op_type(fist, ia32_AddrModeD);
3624 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3625 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3627 assert(get_mode_size_bits(mode) <= 32);
3628 /* exception we can only store signed 32 bit integers, so for unsigned
3629 we store a 64bit (signed) integer and load the lower bits */
3630 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3631 set_ia32_ls_mode(fist, mode_Ls);
3633 set_ia32_ls_mode(fist, mode_Is);
3635 SET_IA32_ORIG_NODE(fist, node);
3638 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3640 set_irn_pinned(load, op_pin_state_floats);
3641 set_ia32_use_frame(load);
3642 set_ia32_op_type(load, ia32_AddrModeS);
3643 set_ia32_ls_mode(load, mode_Is);
3644 if (get_ia32_ls_mode(fist) == mode_Ls) {
3645 ia32_attr_t *attr = get_ia32_attr(load);
3646 attr->data.need_64bit_stackent = 1;
3648 ia32_attr_t *attr = get_ia32_attr(load);
3649 attr->data.need_32bit_stackent = 1;
3651 SET_IA32_ORIG_NODE(load, node);
3653 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3657 * Creates a x87 strict Conv by placing a Store and a Load
3659 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3661 ir_node *block = get_nodes_block(node);
3662 ir_graph *irg = get_Block_irg(block);
3663 dbg_info *dbgi = get_irn_dbg_info(node);
3664 ir_node *frame = get_irg_frame(irg);
3666 ir_node *store, *load;
3669 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3670 set_ia32_use_frame(store);
3671 set_ia32_op_type(store, ia32_AddrModeD);
3672 SET_IA32_ORIG_NODE(store, node);
3674 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3676 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3677 set_ia32_use_frame(load);
3678 set_ia32_op_type(load, ia32_AddrModeS);
3679 SET_IA32_ORIG_NODE(load, node);
3681 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_vfld_res);
3685 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3686 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3688 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3690 func = get_mode_size_bits(mode) == 8 ?
3691 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3692 return func(dbgi, block, base, index, mem, val, mode);
3696 * Create a conversion from general purpose to x87 register
3698 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3700 ir_node *src_block = get_nodes_block(node);
3701 ir_node *block = be_transform_node(src_block);
3702 ir_graph *irg = get_Block_irg(block);
3703 dbg_info *dbgi = get_irn_dbg_info(node);
3704 ir_node *op = get_Conv_op(node);
3705 ir_node *new_op = NULL;
3707 ir_mode *store_mode;
3713 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3714 if (possible_int_mode_for_fp(src_mode)) {
3715 ia32_address_mode_t am;
3717 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3718 if (am.op_type == ia32_AddrModeS) {
3719 ia32_address_t *addr = &am.addr;
3721 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3722 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3724 set_am_attributes(fild, &am);
3725 SET_IA32_ORIG_NODE(fild, node);
3727 fix_mem_proj(fild, &am);
3732 if (new_op == NULL) {
3733 new_op = be_transform_node(op);
3736 mode = get_irn_mode(op);
3738 /* first convert to 32 bit signed if necessary */
3739 if (get_mode_size_bits(src_mode) < 32) {
3740 if (!upper_bits_clean(new_op, src_mode)) {
3741 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3742 SET_IA32_ORIG_NODE(new_op, node);
3747 assert(get_mode_size_bits(mode) == 32);
3750 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3752 set_ia32_use_frame(store);
3753 set_ia32_op_type(store, ia32_AddrModeD);
3754 set_ia32_ls_mode(store, mode_Iu);
3756 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3758 /* exception for 32bit unsigned, do a 64bit spill+load */
3759 if (!mode_is_signed(mode)) {
3762 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3764 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3765 noreg_GP, nomem, zero_const);
3766 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3768 set_ia32_use_frame(zero_store);
3769 set_ia32_op_type(zero_store, ia32_AddrModeD);
3770 add_ia32_am_offs_int(zero_store, 4);
3771 set_ia32_ls_mode(zero_store, mode_Iu);
3773 in[0] = zero_store_mem;
3776 store_mem = new_rd_Sync(dbgi, block, 2, in);
3777 store_mode = mode_Ls;
3779 store_mode = mode_Is;
3783 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3785 set_ia32_use_frame(fild);
3786 set_ia32_op_type(fild, ia32_AddrModeS);
3787 set_ia32_ls_mode(fild, store_mode);
3789 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3795 * Create a conversion from one integer mode into another one
3797 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3798 dbg_info *dbgi, ir_node *block, ir_node *op,
3801 ir_node *new_block = be_transform_node(block);
3803 ir_mode *smaller_mode;
3804 ia32_address_mode_t am;
3805 ia32_address_t *addr = &am.addr;
3808 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3809 smaller_mode = src_mode;
3811 smaller_mode = tgt_mode;
3814 #ifdef DEBUG_libfirm
3816 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3821 match_arguments(&am, block, NULL, op, NULL,
3822 match_am | match_8bit_am | match_16bit_am);
3824 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3825 /* unnecessary conv. in theory it shouldn't have been AM */
3826 assert(is_ia32_NoReg_GP(addr->base));
3827 assert(is_ia32_NoReg_GP(addr->index));
3828 assert(is_NoMem(addr->mem));
3829 assert(am.addr.offset == 0);
3830 assert(am.addr.symconst_ent == NULL);
3834 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3835 addr->mem, am.new_op2, smaller_mode);
3836 set_am_attributes(new_node, &am);
3837 /* match_arguments assume that out-mode = in-mode, this isn't true here
3839 set_ia32_ls_mode(new_node, smaller_mode);
3840 SET_IA32_ORIG_NODE(new_node, node);
3841 new_node = fix_mem_proj(new_node, &am);
3846 * Transforms a Conv node.
3848 * @return The created ia32 Conv node
3850 static ir_node *gen_Conv(ir_node *node)
3852 ir_node *block = get_nodes_block(node);
3853 ir_node *new_block = be_transform_node(block);
3854 ir_node *op = get_Conv_op(node);
3855 ir_node *new_op = NULL;
3856 dbg_info *dbgi = get_irn_dbg_info(node);
3857 ir_mode *src_mode = get_irn_mode(op);
3858 ir_mode *tgt_mode = get_irn_mode(node);
3859 int src_bits = get_mode_size_bits(src_mode);
3860 int tgt_bits = get_mode_size_bits(tgt_mode);
3861 ir_node *res = NULL;
3863 assert(!mode_is_int(src_mode) || src_bits <= 32);
3864 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3866 /* modeB -> X should already be lowered by the lower_mode_b pass */
3867 if (src_mode == mode_b) {
3868 panic("ConvB not lowered %+F", node);
3871 if (src_mode == tgt_mode) {
3872 if (get_Conv_strict(node)) {
3873 if (ia32_cg_config.use_sse2) {
3874 /* when we are in SSE mode, we can kill all strict no-op conversion */
3875 return be_transform_node(op);
3878 /* this should be optimized already, but who knows... */
3879 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3880 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3881 return be_transform_node(op);
3885 if (mode_is_float(src_mode)) {
3886 new_op = be_transform_node(op);
3887 /* we convert from float ... */
3888 if (mode_is_float(tgt_mode)) {
3890 if (ia32_cg_config.use_sse2) {
3891 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3892 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3894 set_ia32_ls_mode(res, tgt_mode);
3896 if (get_Conv_strict(node)) {
3897 /* if fp_no_float_fold is not set then we assume that we
3898 * don't have any float operations in a non
3899 * mode_float_arithmetic mode and can skip strict upconvs */
3900 if (src_bits < tgt_bits) {
3901 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3904 res = gen_x87_strict_conv(tgt_mode, new_op);
3905 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3909 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3914 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3915 if (ia32_cg_config.use_sse2) {
3916 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3918 set_ia32_ls_mode(res, src_mode);
3920 return gen_x87_fp_to_gp(node);
3924 /* we convert from int ... */
3925 if (mode_is_float(tgt_mode)) {
3927 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3928 if (ia32_cg_config.use_sse2) {
3929 new_op = be_transform_node(op);
3930 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3932 set_ia32_ls_mode(res, tgt_mode);
3934 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3935 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3936 res = gen_x87_gp_to_fp(node, src_mode);
3938 /* we need a strict-Conv, if the int mode has more bits than the
3940 if (float_mantissa < int_mantissa) {
3941 res = gen_x87_strict_conv(tgt_mode, res);
3942 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3946 } else if (tgt_mode == mode_b) {
3947 /* mode_b lowering already took care that we only have 0/1 values */
3948 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3949 src_mode, tgt_mode));
3950 return be_transform_node(op);
3953 if (src_bits == tgt_bits) {
3954 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3955 src_mode, tgt_mode));
3956 return be_transform_node(op);
3959 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3967 static ir_node *create_immediate_or_transform(ir_node *node,
3968 char immediate_constraint_type)
3970 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3971 if (new_node == NULL) {
3972 new_node = be_transform_node(node);
3978 * Transforms a FrameAddr into an ia32 Add.
3980 static ir_node *gen_be_FrameAddr(ir_node *node)
3982 ir_node *block = be_transform_node(get_nodes_block(node));
3983 ir_node *op = be_get_FrameAddr_frame(node);
3984 ir_node *new_op = be_transform_node(op);
3985 dbg_info *dbgi = get_irn_dbg_info(node);
3988 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3989 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3990 set_ia32_use_frame(new_node);
3992 SET_IA32_ORIG_NODE(new_node, node);
3998 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
4000 static ir_node *gen_be_Return(ir_node *node)
4002 ir_graph *irg = current_ir_graph;
4003 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4004 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4005 ir_node *new_ret_val = be_transform_node(ret_val);
4006 ir_node *new_ret_mem = be_transform_node(ret_mem);
4007 ir_entity *ent = get_irg_entity(irg);
4008 ir_type *tp = get_entity_type(ent);
4009 dbg_info *dbgi = get_irn_dbg_info(node);
4010 ir_node *block = be_transform_node(get_nodes_block(node));
4024 assert(ret_val != NULL);
4025 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4026 return be_duplicate_node(node);
4029 res_type = get_method_res_type(tp, 0);
4031 if (! is_Primitive_type(res_type)) {
4032 return be_duplicate_node(node);
4035 mode = get_type_mode(res_type);
4036 if (! mode_is_float(mode)) {
4037 return be_duplicate_node(node);
4040 assert(get_method_n_ress(tp) == 1);
4042 frame = get_irg_frame(irg);
4044 /* store xmm0 onto stack */
4045 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4046 new_ret_mem, new_ret_val);
4047 set_ia32_ls_mode(sse_store, mode);
4048 set_ia32_op_type(sse_store, ia32_AddrModeD);
4049 set_ia32_use_frame(sse_store);
4050 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4052 /* load into x87 register */
4053 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4054 set_ia32_op_type(fld, ia32_AddrModeS);
4055 set_ia32_use_frame(fld);
4057 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4058 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4060 /* create a new return */
4061 arity = get_irn_arity(node);
4062 in = ALLOCAN(ir_node*, arity);
4063 pop = be_Return_get_pop(node);
4064 for (i = 0; i < arity; ++i) {
4065 ir_node *op = get_irn_n(node, i);
4066 if (op == ret_val) {
4068 } else if (op == ret_mem) {
4071 in[i] = be_transform_node(op);
4074 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4075 copy_node_attr(irg, node, new_node);
4081 * Transform a be_AddSP into an ia32_SubSP.
4083 static ir_node *gen_be_AddSP(ir_node *node)
4085 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4086 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4088 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4089 match_am | match_immediate);
4090 assert(is_ia32_SubSP(new_node));
4091 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4092 &ia32_registers[REG_ESP]);
4097 * Transform a be_SubSP into an ia32_AddSP
4099 static ir_node *gen_be_SubSP(ir_node *node)
4101 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4102 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4104 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4105 match_am | match_immediate);
4106 assert(is_ia32_AddSP(new_node));
4107 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4108 &ia32_registers[REG_ESP]);
4113 * Change some phi modes
4115 static ir_node *gen_Phi(ir_node *node)
4117 const arch_register_req_t *req;
4118 ir_node *block = be_transform_node(get_nodes_block(node));
4119 ir_graph *irg = current_ir_graph;
4120 dbg_info *dbgi = get_irn_dbg_info(node);
4121 ir_mode *mode = get_irn_mode(node);
4124 if (ia32_mode_needs_gp_reg(mode)) {
4125 /* we shouldn't have any 64bit stuff around anymore */
4126 assert(get_mode_size_bits(mode) <= 32);
4127 /* all integer operations are on 32bit registers now */
4129 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4130 } else if (mode_is_float(mode)) {
4131 if (ia32_cg_config.use_sse2) {
4133 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4136 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4139 req = arch_no_register_req;
4142 /* phi nodes allow loops, so we use the old arguments for now
4143 * and fix this later */
4144 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4145 get_irn_in(node) + 1);
4146 copy_node_attr(irg, node, phi);
4147 be_duplicate_deps(node, phi);
4149 arch_set_irn_register_req_out(phi, 0, req);
4151 be_enqueue_preds(node);
4156 static ir_node *gen_Jmp(ir_node *node)
4158 ir_node *block = get_nodes_block(node);
4159 ir_node *new_block = be_transform_node(block);
4160 dbg_info *dbgi = get_irn_dbg_info(node);
4163 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4164 SET_IA32_ORIG_NODE(new_node, node);
4172 static ir_node *gen_IJmp(ir_node *node)
4174 ir_node *block = get_nodes_block(node);
4175 ir_node *new_block = be_transform_node(block);
4176 dbg_info *dbgi = get_irn_dbg_info(node);
4177 ir_node *op = get_IJmp_target(node);
4179 ia32_address_mode_t am;
4180 ia32_address_t *addr = &am.addr;
4182 assert(get_irn_mode(op) == mode_P);
4184 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4186 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4187 addr->mem, am.new_op2);
4188 set_am_attributes(new_node, &am);
4189 SET_IA32_ORIG_NODE(new_node, node);
4191 new_node = fix_mem_proj(new_node, &am);
4196 static ir_node *gen_ia32_l_Add(ir_node *node)
4198 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4199 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4200 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4201 match_commutative | match_am | match_immediate |
4202 match_mode_neutral);
4204 if (is_Proj(lowered)) {
4205 lowered = get_Proj_pred(lowered);
4207 assert(is_ia32_Add(lowered));
4208 set_irn_mode(lowered, mode_T);
4214 static ir_node *gen_ia32_l_Adc(ir_node *node)
4216 return gen_binop_flags(node, new_bd_ia32_Adc,
4217 match_commutative | match_am | match_immediate |
4218 match_mode_neutral);
4222 * Transforms a l_MulS into a "real" MulS node.
4224 * @return the created ia32 Mul node
4226 static ir_node *gen_ia32_l_Mul(ir_node *node)
4228 ir_node *left = get_binop_left(node);
4229 ir_node *right = get_binop_right(node);
4231 return gen_binop(node, left, right, new_bd_ia32_Mul,
4232 match_commutative | match_am | match_mode_neutral);
4236 * Transforms a l_IMulS into a "real" IMul1OPS node.
4238 * @return the created ia32 IMul1OP node
4240 static ir_node *gen_ia32_l_IMul(ir_node *node)
4242 ir_node *left = get_binop_left(node);
4243 ir_node *right = get_binop_right(node);
4245 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4246 match_commutative | match_am | match_mode_neutral);
4249 static ir_node *gen_ia32_l_Sub(ir_node *node)
4251 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4252 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4253 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4254 match_am | match_immediate | match_mode_neutral);
4256 if (is_Proj(lowered)) {
4257 lowered = get_Proj_pred(lowered);
4259 assert(is_ia32_Sub(lowered));
4260 set_irn_mode(lowered, mode_T);
4266 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4268 return gen_binop_flags(node, new_bd_ia32_Sbb,
4269 match_am | match_immediate | match_mode_neutral);
4272 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4274 ir_node *src_block = get_nodes_block(node);
4275 ir_node *block = be_transform_node(src_block);
4276 ir_graph *irg = current_ir_graph;
4277 dbg_info *dbgi = get_irn_dbg_info(node);
4278 ir_node *frame = get_irg_frame(irg);
4279 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4280 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4281 ir_node *new_val_low = be_transform_node(val_low);
4282 ir_node *new_val_high = be_transform_node(val_high);
4284 ir_node *sync, *fild, *res;
4286 ir_node *store_high;
4290 if (ia32_cg_config.use_sse2) {
4291 panic("not implemented for SSE2");
4295 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4297 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4299 SET_IA32_ORIG_NODE(store_low, node);
4300 SET_IA32_ORIG_NODE(store_high, node);
4302 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4303 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4305 set_ia32_use_frame(store_low);
4306 set_ia32_use_frame(store_high);
4307 set_ia32_op_type(store_low, ia32_AddrModeD);
4308 set_ia32_op_type(store_high, ia32_AddrModeD);
4309 set_ia32_ls_mode(store_low, mode_Iu);
4310 set_ia32_ls_mode(store_high, mode_Is);
4311 add_ia32_am_offs_int(store_high, 4);
4315 sync = new_rd_Sync(dbgi, block, 2, in);
4318 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4320 set_ia32_use_frame(fild);
4321 set_ia32_op_type(fild, ia32_AddrModeS);
4322 set_ia32_ls_mode(fild, mode_Ls);
4324 SET_IA32_ORIG_NODE(fild, node);
4326 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4328 if (! mode_is_signed(get_irn_mode(val_high))) {
4329 ia32_address_mode_t am;
4331 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4334 am.addr.base = get_symconst_base();
4335 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4336 am.addr.mem = nomem;
4339 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4340 am.addr.tls_segment = false;
4341 am.addr.use_frame = 0;
4342 am.addr.frame_entity = NULL;
4343 am.addr.symconst_sign = 0;
4344 am.ls_mode = mode_F;
4345 am.mem_proj = nomem;
4346 am.op_type = ia32_AddrModeS;
4348 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4349 am.pinned = op_pin_state_floats;
4351 am.ins_permuted = false;
4353 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4354 am.new_op1, am.new_op2, get_fpcw());
4355 set_am_attributes(fadd, &am);
4357 set_irn_mode(fadd, mode_T);
4358 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4363 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4365 ir_node *src_block = get_nodes_block(node);
4366 ir_node *block = be_transform_node(src_block);
4367 ir_graph *irg = get_Block_irg(block);
4368 dbg_info *dbgi = get_irn_dbg_info(node);
4369 ir_node *frame = get_irg_frame(irg);
4370 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4371 ir_node *new_val = be_transform_node(val);
4374 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4375 SET_IA32_ORIG_NODE(fist, node);
4376 set_ia32_use_frame(fist);
4377 set_ia32_op_type(fist, ia32_AddrModeD);
4378 set_ia32_ls_mode(fist, mode_Ls);
4380 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4381 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4384 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4386 ir_node *block = be_transform_node(get_nodes_block(node));
4387 ir_graph *irg = get_Block_irg(block);
4388 ir_node *pred = get_Proj_pred(node);
4389 ir_node *new_pred = be_transform_node(pred);
4390 ir_node *frame = get_irg_frame(irg);
4391 dbg_info *dbgi = get_irn_dbg_info(node);
4392 long pn = get_Proj_proj(node);
4397 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4398 SET_IA32_ORIG_NODE(load, node);
4399 set_ia32_use_frame(load);
4400 set_ia32_op_type(load, ia32_AddrModeS);
4401 set_ia32_ls_mode(load, mode_Iu);
4402 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4403 * 32 bit from it with this particular load */
4404 attr = get_ia32_attr(load);
4405 attr->data.need_64bit_stackent = 1;
4407 if (pn == pn_ia32_l_FloattoLL_res_high) {
4408 add_ia32_am_offs_int(load, 4);
4410 assert(pn == pn_ia32_l_FloattoLL_res_low);
4413 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4419 * Transform the Projs of an AddSP.
4421 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4423 ir_node *pred = get_Proj_pred(node);
4424 ir_node *new_pred = be_transform_node(pred);
4425 dbg_info *dbgi = get_irn_dbg_info(node);
4426 long proj = get_Proj_proj(node);
4428 if (proj == pn_be_AddSP_sp) {
4429 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4430 pn_ia32_SubSP_stack);
4431 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4433 } else if (proj == pn_be_AddSP_res) {
4434 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4435 pn_ia32_SubSP_addr);
4436 } else if (proj == pn_be_AddSP_M) {
4437 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4440 panic("No idea how to transform proj->AddSP");
4444 * Transform the Projs of a SubSP.
4446 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4448 ir_node *pred = get_Proj_pred(node);
4449 ir_node *new_pred = be_transform_node(pred);
4450 dbg_info *dbgi = get_irn_dbg_info(node);
4451 long proj = get_Proj_proj(node);
4453 if (proj == pn_be_SubSP_sp) {
4454 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4455 pn_ia32_AddSP_stack);
4456 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4458 } else if (proj == pn_be_SubSP_M) {
4459 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4462 panic("No idea how to transform proj->SubSP");
4466 * Transform and renumber the Projs from a Load.
4468 static ir_node *gen_Proj_Load(ir_node *node)
4471 ir_node *pred = get_Proj_pred(node);
4472 dbg_info *dbgi = get_irn_dbg_info(node);
4473 long proj = get_Proj_proj(node);
4475 /* loads might be part of source address mode matches, so we don't
4476 * transform the ProjMs yet (with the exception of loads whose result is
4479 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4482 /* this is needed, because sometimes we have loops that are only
4483 reachable through the ProjM */
4484 be_enqueue_preds(node);
4485 /* do it in 2 steps, to silence firm verifier */
4486 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4487 set_Proj_proj(res, pn_ia32_mem);
4491 /* renumber the proj */
4492 new_pred = be_transform_node(pred);
4493 if (is_ia32_Load(new_pred)) {
4494 switch ((pn_Load)proj) {
4496 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4498 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4499 case pn_Load_X_except:
4500 /* This Load might raise an exception. Mark it. */
4501 set_ia32_exc_label(new_pred, 1);
4502 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4503 case pn_Load_X_regular:
4504 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4506 } else if (is_ia32_Conv_I2I(new_pred) ||
4507 is_ia32_Conv_I2I8Bit(new_pred)) {
4508 set_irn_mode(new_pred, mode_T);
4509 switch ((pn_Load)proj) {
4511 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4513 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4514 case pn_Load_X_except:
4515 /* This Load might raise an exception. Mark it. */
4516 set_ia32_exc_label(new_pred, 1);
4517 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4518 case pn_Load_X_regular:
4519 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4521 } else if (is_ia32_xLoad(new_pred)) {
4522 switch ((pn_Load)proj) {
4524 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4526 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4527 case pn_Load_X_except:
4528 /* This Load might raise an exception. Mark it. */
4529 set_ia32_exc_label(new_pred, 1);
4530 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4531 case pn_Load_X_regular:
4532 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4534 } else if (is_ia32_vfld(new_pred)) {
4535 switch ((pn_Load)proj) {
4537 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4539 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4540 case pn_Load_X_except:
4541 /* This Load might raise an exception. Mark it. */
4542 set_ia32_exc_label(new_pred, 1);
4543 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4544 case pn_Load_X_regular:
4545 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4548 /* can happen for ProJMs when source address mode happened for the
4551 /* however it should not be the result proj, as that would mean the
4552 load had multiple users and should not have been used for
4554 if (proj != pn_Load_M) {
4555 panic("internal error: transformed node not a Load");
4557 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4560 panic("No idea how to transform Proj(Load) %+F", node);
4563 static ir_node *gen_Proj_Store(ir_node *node)
4565 ir_node *pred = get_Proj_pred(node);
4566 ir_node *new_pred = be_transform_node(pred);
4567 dbg_info *dbgi = get_irn_dbg_info(node);
4568 long pn = get_Proj_proj(node);
4570 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4571 switch ((pn_Store)pn) {
4573 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4574 case pn_Store_X_except:
4575 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4576 case pn_Store_X_regular:
4577 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4579 } else if (is_ia32_vfist(new_pred)) {
4580 switch ((pn_Store)pn) {
4582 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4583 case pn_Store_X_except:
4584 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4585 case pn_Store_X_regular:
4586 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4588 } else if (is_ia32_vfisttp(new_pred)) {
4589 switch ((pn_Store)pn) {
4591 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4592 case pn_Store_X_except:
4593 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4594 case pn_Store_X_regular:
4595 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4597 } else if (is_ia32_vfst(new_pred)) {
4598 switch ((pn_Store)pn) {
4600 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4601 case pn_Store_X_except:
4602 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4603 case pn_Store_X_regular:
4604 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4606 } else if (is_ia32_xStore(new_pred)) {
4607 switch ((pn_Store)pn) {
4609 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4610 case pn_Store_X_except:
4611 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4612 case pn_Store_X_regular:
4613 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4615 } else if (is_Sync(new_pred)) {
4616 /* hack for the case that gen_float_const_Store produced a Sync */
4617 if (pn == pn_Store_M) {
4620 panic("exception control flow not implemented yet");
4621 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4622 /* destination address mode */
4623 if (pn == pn_Store_M) {
4626 panic("exception control flow for destination AM not implemented yet");
4629 panic("No idea how to transform Proj(Store) %+F", node);
4633 * Transform and renumber the Projs from a Div or Mod instruction.
4635 static ir_node *gen_Proj_Div(ir_node *node)
4637 ir_node *pred = get_Proj_pred(node);
4638 ir_node *new_pred = be_transform_node(pred);
4639 dbg_info *dbgi = get_irn_dbg_info(node);
4640 long proj = get_Proj_proj(node);
4642 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4643 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4645 switch ((pn_Div)proj) {
4647 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4648 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4649 } else if (is_ia32_xDiv(new_pred)) {
4650 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4651 } else if (is_ia32_vfdiv(new_pred)) {
4652 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4654 panic("Div transformed to unexpected thing %+F", new_pred);
4657 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4658 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4659 } else if (is_ia32_xDiv(new_pred)) {
4660 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4661 } else if (is_ia32_vfdiv(new_pred)) {
4662 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4664 panic("Div transformed to unexpected thing %+F", new_pred);
4666 case pn_Div_X_except:
4667 set_ia32_exc_label(new_pred, 1);
4668 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4669 case pn_Div_X_regular:
4670 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4673 panic("No idea how to transform proj->Div");
4677 * Transform and renumber the Projs from a Div or Mod instruction.
4679 static ir_node *gen_Proj_Mod(ir_node *node)
4681 ir_node *pred = get_Proj_pred(node);
4682 ir_node *new_pred = be_transform_node(pred);
4683 dbg_info *dbgi = get_irn_dbg_info(node);
4684 long proj = get_Proj_proj(node);
4686 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4687 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4688 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4690 switch ((pn_Mod)proj) {
4692 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4694 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4695 case pn_Mod_X_except:
4696 set_ia32_exc_label(new_pred, 1);
4697 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4698 case pn_Mod_X_regular:
4699 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4701 panic("No idea how to transform proj->Mod");
4705 * Transform and renumber the Projs from a CopyB.
4707 static ir_node *gen_Proj_CopyB(ir_node *node)
4709 ir_node *pred = get_Proj_pred(node);
4710 ir_node *new_pred = be_transform_node(pred);
4711 dbg_info *dbgi = get_irn_dbg_info(node);
4712 long proj = get_Proj_proj(node);
4714 switch ((pn_CopyB)proj) {
4716 if (is_ia32_CopyB_i(new_pred)) {
4717 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4718 } else if (is_ia32_CopyB(new_pred)) {
4719 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4722 case pn_CopyB_X_regular:
4723 if (is_ia32_CopyB_i(new_pred)) {
4724 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4725 } else if (is_ia32_CopyB(new_pred)) {
4726 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4729 case pn_CopyB_X_except:
4730 if (is_ia32_CopyB_i(new_pred)) {
4731 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4732 } else if (is_ia32_CopyB(new_pred)) {
4733 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4738 panic("No idea how to transform proj->CopyB");
4741 static ir_node *gen_be_Call(ir_node *node)
4743 dbg_info *const dbgi = get_irn_dbg_info(node);
4744 ir_node *const src_block = get_nodes_block(node);
4745 ir_node *const block = be_transform_node(src_block);
4746 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4747 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4748 ir_node *const sp = be_transform_node(src_sp);
4749 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4750 ia32_address_mode_t am;
4751 ia32_address_t *const addr = &am.addr;
4756 ir_node * eax = noreg_GP;
4757 ir_node * ecx = noreg_GP;
4758 ir_node * edx = noreg_GP;
4759 unsigned const pop = be_Call_get_pop(node);
4760 ir_type *const call_tp = be_Call_get_type(node);
4761 int old_no_pic_adjust;
4762 int throws_exception = ir_throws_exception(node);
4764 /* Run the x87 simulator if the call returns a float value */
4765 if (get_method_n_ress(call_tp) > 0) {
4766 ir_type *const res_type = get_method_res_type(call_tp, 0);
4767 ir_mode *const res_mode = get_type_mode(res_type);
4769 if (res_mode != NULL && mode_is_float(res_mode)) {
4770 ir_graph *irg = current_ir_graph;
4771 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4772 irg_data->do_x87_sim = 1;
4776 /* We do not want be_Call direct calls */
4777 assert(be_Call_get_entity(node) == NULL);
4779 /* special case for PIC trampoline calls */
4780 old_no_pic_adjust = ia32_no_pic_adjust;
4781 ia32_no_pic_adjust = be_options.pic;
4783 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4784 match_am | match_immediate);
4786 ia32_no_pic_adjust = old_no_pic_adjust;
4788 i = get_irn_arity(node) - 1;
4789 fpcw = be_transform_node(get_irn_n(node, i--));
4790 for (; i >= n_be_Call_first_arg; --i) {
4791 arch_register_req_t const *const req
4792 = arch_get_irn_register_req_in(node, i);
4793 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4795 assert(req->type == arch_register_req_type_limited);
4796 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4798 switch (*req->limited) {
4799 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4800 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4801 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4802 default: panic("Invalid GP register for register parameter");
4806 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4807 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4808 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4809 ir_set_throws_exception(call, throws_exception);
4810 set_am_attributes(call, &am);
4811 call = fix_mem_proj(call, &am);
4813 if (get_irn_pinned(node) == op_pin_state_pinned)
4814 set_irn_pinned(call, op_pin_state_pinned);
4816 SET_IA32_ORIG_NODE(call, node);
4818 if (ia32_cg_config.use_sse2) {
4819 /* remember this call for post-processing */
4820 ARR_APP1(ir_node *, call_list, call);
4821 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4828 * Transform Builtin trap
4830 static ir_node *gen_trap(ir_node *node)
4832 dbg_info *dbgi = get_irn_dbg_info(node);
4833 ir_node *block = be_transform_node(get_nodes_block(node));
4834 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4836 return new_bd_ia32_UD2(dbgi, block, mem);
4840 * Transform Builtin debugbreak
4842 static ir_node *gen_debugbreak(ir_node *node)
4844 dbg_info *dbgi = get_irn_dbg_info(node);
4845 ir_node *block = be_transform_node(get_nodes_block(node));
4846 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4848 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4852 * Transform Builtin return_address
4854 static ir_node *gen_return_address(ir_node *node)
4856 ir_node *param = get_Builtin_param(node, 0);
4857 ir_node *frame = get_Builtin_param(node, 1);
4858 dbg_info *dbgi = get_irn_dbg_info(node);
4859 ir_tarval *tv = get_Const_tarval(param);
4860 ir_graph *irg = get_irn_irg(node);
4861 unsigned long value = get_tarval_long(tv);
4863 ir_node *block = be_transform_node(get_nodes_block(node));
4864 ir_node *ptr = be_transform_node(frame);
4868 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4869 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4870 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4873 /* load the return address from this frame */
4874 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4876 set_irn_pinned(load, get_irn_pinned(node));
4877 set_ia32_op_type(load, ia32_AddrModeS);
4878 set_ia32_ls_mode(load, mode_Iu);
4880 set_ia32_am_offs_int(load, 0);
4881 set_ia32_use_frame(load);
4882 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4884 if (get_irn_pinned(node) == op_pin_state_floats) {
4885 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4886 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4887 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4888 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4891 SET_IA32_ORIG_NODE(load, node);
4892 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4896 * Transform Builtin frame_address
4898 static ir_node *gen_frame_address(ir_node *node)
4900 ir_node *param = get_Builtin_param(node, 0);
4901 ir_node *frame = get_Builtin_param(node, 1);
4902 dbg_info *dbgi = get_irn_dbg_info(node);
4903 ir_tarval *tv = get_Const_tarval(param);
4904 ir_graph *irg = get_irn_irg(node);
4905 unsigned long value = get_tarval_long(tv);
4907 ir_node *block = be_transform_node(get_nodes_block(node));
4908 ir_node *ptr = be_transform_node(frame);
4913 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4914 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4915 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4918 /* load the frame address from this frame */
4919 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4921 set_irn_pinned(load, get_irn_pinned(node));
4922 set_ia32_op_type(load, ia32_AddrModeS);
4923 set_ia32_ls_mode(load, mode_Iu);
4925 ent = ia32_get_frame_address_entity(irg);
4927 set_ia32_am_offs_int(load, 0);
4928 set_ia32_use_frame(load);
4929 set_ia32_frame_ent(load, ent);
4931 /* will fail anyway, but gcc does this: */
4932 set_ia32_am_offs_int(load, 0);
4935 if (get_irn_pinned(node) == op_pin_state_floats) {
4936 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4937 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4938 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4939 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4942 SET_IA32_ORIG_NODE(load, node);
4943 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4947 * Transform Builtin frame_address
4949 static ir_node *gen_prefetch(ir_node *node)
4952 ir_node *ptr, *block, *mem, *base, *idx;
4953 ir_node *param, *new_node;
4956 ia32_address_t addr;
4958 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4959 /* no prefetch at all, route memory */
4960 return be_transform_node(get_Builtin_mem(node));
4963 param = get_Builtin_param(node, 1);
4964 tv = get_Const_tarval(param);
4965 rw = get_tarval_long(tv);
4967 /* construct load address */
4968 memset(&addr, 0, sizeof(addr));
4969 ptr = get_Builtin_param(node, 0);
4970 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4977 base = be_transform_node(base);
4983 idx = be_transform_node(idx);
4986 dbgi = get_irn_dbg_info(node);
4987 block = be_transform_node(get_nodes_block(node));
4988 mem = be_transform_node(get_Builtin_mem(node));
4990 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4991 /* we have 3DNow!, this was already checked above */
4992 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4993 } else if (ia32_cg_config.use_sse_prefetch) {
4994 /* note: rw == 1 is IGNORED in that case */
4995 param = get_Builtin_param(node, 2);
4996 tv = get_Const_tarval(param);
4997 locality = get_tarval_long(tv);
4999 /* SSE style prefetch */
5002 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
5005 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
5008 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
5011 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
5015 assert(ia32_cg_config.use_3dnow_prefetch);
5016 /* 3DNow! style prefetch */
5017 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
5020 set_irn_pinned(new_node, get_irn_pinned(node));
5021 set_ia32_op_type(new_node, ia32_AddrModeS);
5022 set_ia32_ls_mode(new_node, mode_Bu);
5023 set_address(new_node, &addr);
5025 SET_IA32_ORIG_NODE(new_node, node);
5027 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5031 * Transform bsf like node
5033 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5035 ir_node *param = get_Builtin_param(node, 0);
5036 dbg_info *dbgi = get_irn_dbg_info(node);
5038 ir_node *block = get_nodes_block(node);
5039 ir_node *new_block = be_transform_node(block);
5041 ia32_address_mode_t am;
5042 ia32_address_t *addr = &am.addr;
5045 match_arguments(&am, block, NULL, param, NULL, match_am);
5047 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5048 set_am_attributes(cnt, &am);
5049 set_ia32_ls_mode(cnt, get_irn_mode(param));
5051 SET_IA32_ORIG_NODE(cnt, node);
5052 return fix_mem_proj(cnt, &am);
5056 * Transform builtin ffs.
5058 static ir_node *gen_ffs(ir_node *node)
5060 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5061 ir_node *real = skip_Proj(bsf);
5062 dbg_info *dbgi = get_irn_dbg_info(real);
5063 ir_node *block = get_nodes_block(real);
5064 ir_node *flag, *set, *conv, *neg, *orn, *add;
5067 if (get_irn_mode(real) != mode_T) {
5068 set_irn_mode(real, mode_T);
5069 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5072 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5075 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5076 SET_IA32_ORIG_NODE(set, node);
5079 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5080 SET_IA32_ORIG_NODE(conv, node);
5083 neg = new_bd_ia32_Neg(dbgi, block, conv);
5086 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5087 set_ia32_ls_mode(orn, mode_Iu);
5088 set_ia32_commutative(orn);
5091 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5092 add_ia32_am_offs_int(add, 1);
5097 * Transform builtin clz.
5099 static ir_node *gen_clz(ir_node *node)
5101 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5102 ir_node *real = skip_Proj(bsr);
5103 dbg_info *dbgi = get_irn_dbg_info(real);
5104 ir_node *block = get_nodes_block(real);
5105 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5107 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5111 * Transform builtin ctz.
5113 static ir_node *gen_ctz(ir_node *node)
5115 return gen_unop_AM(node, new_bd_ia32_Bsf);
5119 * Transform builtin parity.
5121 static ir_node *gen_parity(ir_node *node)
5123 dbg_info *dbgi = get_irn_dbg_info(node);
5124 ir_node *block = get_nodes_block(node);
5125 ir_node *new_block = be_transform_node(block);
5126 ir_node *param = get_Builtin_param(node, 0);
5127 ir_node *new_param = be_transform_node(param);
5130 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5131 * so we have to do complicated xoring first.
5132 * (we should also better lower this before the backend so we still have a
5133 * chance for CSE, constant folding and other goodies for some of these
5136 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5137 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5138 ir_node *xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5140 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xorn);
5143 set_ia32_ls_mode(xorn, mode_Iu);
5144 set_ia32_commutative(xorn);
5146 set_irn_mode(xor2, mode_T);
5147 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5150 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5151 SET_IA32_ORIG_NODE(new_node, node);
5154 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5155 nomem, new_node, mode_Bu);
5156 SET_IA32_ORIG_NODE(new_node, node);
5161 * Transform builtin popcount
5163 static ir_node *gen_popcount(ir_node *node)
5165 ir_node *param = get_Builtin_param(node, 0);
5166 dbg_info *dbgi = get_irn_dbg_info(node);
5168 ir_node *block = get_nodes_block(node);
5169 ir_node *new_block = be_transform_node(block);
5172 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5174 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5175 if (ia32_cg_config.use_popcnt) {
5176 ia32_address_mode_t am;
5177 ia32_address_t *addr = &am.addr;
5180 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5182 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5183 set_am_attributes(cnt, &am);
5184 set_ia32_ls_mode(cnt, get_irn_mode(param));
5186 SET_IA32_ORIG_NODE(cnt, node);
5187 return fix_mem_proj(cnt, &am);
5190 new_param = be_transform_node(param);
5192 /* do the standard popcount algo */
5193 /* TODO: This is stupid, we should transform this before the backend,
5194 * to get CSE, localopts, etc. for the operations
5195 * TODO: This is also not the optimal algorithm (it is just the starting
5196 * example in hackers delight, they optimize it more on the following page)
5197 * But I'm too lazy to fix this now, as the code should get lowered before
5198 * the backend anyway.
5201 /* m1 = x & 0x55555555 */
5202 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5203 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5206 simm = ia32_create_Immediate(NULL, 0, 1);
5207 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5209 /* m2 = s1 & 0x55555555 */
5210 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5213 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5215 /* m4 = m3 & 0x33333333 */
5216 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5217 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5220 simm = ia32_create_Immediate(NULL, 0, 2);
5221 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5223 /* m5 = s2 & 0x33333333 */
5224 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5227 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5229 /* m7 = m6 & 0x0F0F0F0F */
5230 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5231 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5234 simm = ia32_create_Immediate(NULL, 0, 4);
5235 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5237 /* m8 = s3 & 0x0F0F0F0F */
5238 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5241 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5243 /* m10 = m9 & 0x00FF00FF */
5244 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5245 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5248 simm = ia32_create_Immediate(NULL, 0, 8);
5249 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5251 /* m11 = s4 & 0x00FF00FF */
5252 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5254 /* m12 = m10 + m11 */
5255 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5257 /* m13 = m12 & 0x0000FFFF */
5258 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5259 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5261 /* s5 = m12 >> 16 */
5262 simm = ia32_create_Immediate(NULL, 0, 16);
5263 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5265 /* res = m13 + s5 */
5266 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5270 * Transform builtin byte swap.
5272 static ir_node *gen_bswap(ir_node *node)
5274 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5275 dbg_info *dbgi = get_irn_dbg_info(node);
5277 ir_node *block = get_nodes_block(node);
5278 ir_node *new_block = be_transform_node(block);
5279 ir_mode *mode = get_irn_mode(param);
5280 unsigned size = get_mode_size_bits(mode);
5284 if (ia32_cg_config.use_bswap) {
5285 /* swap available */
5286 return new_bd_ia32_Bswap(dbgi, new_block, param);
5288 ir_node *i8 = ia32_create_Immediate(NULL, 0, 8);
5289 ir_node *rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5290 ir_node *i16 = ia32_create_Immediate(NULL, 0, 16);
5291 ir_node *rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5292 ir_node *rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5293 set_ia32_ls_mode(rol1, mode_Hu);
5294 set_ia32_ls_mode(rol2, mode_Iu);
5295 set_ia32_ls_mode(rol3, mode_Hu);
5300 /* swap16 always available */
5301 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5304 panic("Invalid bswap size (%d)", size);
5309 * Transform builtin outport.
5311 static ir_node *gen_outport(ir_node *node)
5313 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5314 ir_node *oldv = get_Builtin_param(node, 1);
5315 ir_mode *mode = get_irn_mode(oldv);
5316 ir_node *value = be_transform_node(oldv);
5317 ir_node *block = be_transform_node(get_nodes_block(node));
5318 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5319 dbg_info *dbgi = get_irn_dbg_info(node);
5321 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5322 set_ia32_ls_mode(res, mode);
5327 * Transform builtin inport.
5329 static ir_node *gen_inport(ir_node *node)
5331 ir_type *tp = get_Builtin_type(node);
5332 ir_type *rstp = get_method_res_type(tp, 0);
5333 ir_mode *mode = get_type_mode(rstp);
5334 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5335 ir_node *block = be_transform_node(get_nodes_block(node));
5336 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5337 dbg_info *dbgi = get_irn_dbg_info(node);
5339 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5340 set_ia32_ls_mode(res, mode);
5342 /* check for missing Result Proj */
5347 * Transform a builtin inner trampoline
5349 static ir_node *gen_inner_trampoline(ir_node *node)
5351 ir_node *ptr = get_Builtin_param(node, 0);
5352 ir_node *callee = get_Builtin_param(node, 1);
5353 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5354 ir_node *mem = get_Builtin_mem(node);
5355 ir_node *block = get_nodes_block(node);
5356 ir_node *new_block = be_transform_node(block);
5360 ir_node *trampoline;
5362 dbg_info *dbgi = get_irn_dbg_info(node);
5363 ia32_address_t addr;
5365 /* construct store address */
5366 memset(&addr, 0, sizeof(addr));
5367 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5369 if (addr.base == NULL) {
5370 addr.base = noreg_GP;
5372 addr.base = be_transform_node(addr.base);
5375 if (addr.index == NULL) {
5376 addr.index = noreg_GP;
5378 addr.index = be_transform_node(addr.index);
5380 addr.mem = be_transform_node(mem);
5382 /* mov ecx, <env> */
5383 val = ia32_create_Immediate(NULL, 0, 0xB9);
5384 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5385 addr.index, addr.mem, val);
5386 set_irn_pinned(store, get_irn_pinned(node));
5387 set_ia32_op_type(store, ia32_AddrModeD);
5388 set_ia32_ls_mode(store, mode_Bu);
5389 set_address(store, &addr);
5393 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5394 addr.index, addr.mem, env);
5395 set_irn_pinned(store, get_irn_pinned(node));
5396 set_ia32_op_type(store, ia32_AddrModeD);
5397 set_ia32_ls_mode(store, mode_Iu);
5398 set_address(store, &addr);
5402 /* jmp rel <callee> */
5403 val = ia32_create_Immediate(NULL, 0, 0xE9);
5404 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5405 addr.index, addr.mem, val);
5406 set_irn_pinned(store, get_irn_pinned(node));
5407 set_ia32_op_type(store, ia32_AddrModeD);
5408 set_ia32_ls_mode(store, mode_Bu);
5409 set_address(store, &addr);
5413 trampoline = be_transform_node(ptr);
5415 /* the callee is typically an immediate */
5416 if (is_SymConst(callee)) {
5417 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5419 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5420 add_ia32_am_offs_int(rel, -10);
5422 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5424 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5425 addr.index, addr.mem, rel);
5426 set_irn_pinned(store, get_irn_pinned(node));
5427 set_ia32_op_type(store, ia32_AddrModeD);
5428 set_ia32_ls_mode(store, mode_Iu);
5429 set_address(store, &addr);
5434 return new_r_Tuple(new_block, 2, in);
5438 * Transform Builtin node.
5440 static ir_node *gen_Builtin(ir_node *node)
5442 ir_builtin_kind kind = get_Builtin_kind(node);
5446 return gen_trap(node);
5447 case ir_bk_debugbreak:
5448 return gen_debugbreak(node);
5449 case ir_bk_return_address:
5450 return gen_return_address(node);
5451 case ir_bk_frame_address:
5452 return gen_frame_address(node);
5453 case ir_bk_prefetch:
5454 return gen_prefetch(node);
5456 return gen_ffs(node);
5458 return gen_clz(node);
5460 return gen_ctz(node);
5462 return gen_parity(node);
5463 case ir_bk_popcount:
5464 return gen_popcount(node);
5466 return gen_bswap(node);
5468 return gen_outport(node);
5470 return gen_inport(node);
5471 case ir_bk_inner_trampoline:
5472 return gen_inner_trampoline(node);
5474 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5478 * Transform Proj(Builtin) node.
5480 static ir_node *gen_Proj_Builtin(ir_node *proj)
5482 ir_node *node = get_Proj_pred(proj);
5483 ir_node *new_node = be_transform_node(node);
5484 ir_builtin_kind kind = get_Builtin_kind(node);
5487 case ir_bk_return_address:
5488 case ir_bk_frame_address:
5493 case ir_bk_popcount:
5495 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5498 case ir_bk_debugbreak:
5499 case ir_bk_prefetch:
5501 assert(get_Proj_proj(proj) == pn_Builtin_M);
5504 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5505 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5507 assert(get_Proj_proj(proj) == pn_Builtin_M);
5508 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5510 case ir_bk_inner_trampoline:
5511 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5512 return get_Tuple_pred(new_node, 1);
5514 assert(get_Proj_proj(proj) == pn_Builtin_M);
5515 return get_Tuple_pred(new_node, 0);
5518 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5521 static ir_node *gen_be_IncSP(ir_node *node)
5523 ir_node *res = be_duplicate_node(node);
5524 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5530 * Transform the Projs from a be_Call.
5532 static ir_node *gen_Proj_be_Call(ir_node *node)
5534 ir_node *call = get_Proj_pred(node);
5535 ir_node *new_call = be_transform_node(call);
5536 dbg_info *dbgi = get_irn_dbg_info(node);
5537 long proj = get_Proj_proj(node);
5538 ir_mode *mode = get_irn_mode(node);
5541 if (proj == pn_be_Call_M) {
5542 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5544 /* transform call modes */
5545 if (mode_is_data(mode)) {
5546 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5550 /* Map from be_Call to ia32_Call proj number */
5551 if (proj == pn_be_Call_sp) {
5552 proj = pn_ia32_Call_stack;
5553 } else if (proj == pn_be_Call_M) {
5554 proj = pn_ia32_Call_M;
5555 } else if (proj == pn_be_Call_X_except) {
5556 proj = pn_ia32_Call_X_except;
5557 } else if (proj == pn_be_Call_X_regular) {
5558 proj = pn_ia32_Call_X_regular;
5560 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5561 int const n_outs = arch_get_irn_n_outs(new_call);
5564 assert(proj >= pn_be_Call_first_res);
5565 assert(req->type & arch_register_req_type_limited);
5567 for (i = 0; i < n_outs; ++i) {
5568 arch_register_req_t const *const new_req
5569 = arch_get_irn_register_req_out(new_call, i);
5571 if (!(new_req->type & arch_register_req_type_limited) ||
5572 new_req->cls != req->cls ||
5573 *new_req->limited != *req->limited)
5582 res = new_rd_Proj(dbgi, new_call, mode, proj);
5584 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5586 case pn_ia32_Call_stack:
5587 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5590 case pn_ia32_Call_fpcw:
5591 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5598 static ir_node *gen_Proj_ASM(ir_node *node)
5600 ir_mode *mode = get_irn_mode(node);
5601 ir_node *pred = get_Proj_pred(node);
5602 ir_node *new_pred = be_transform_node(pred);
5603 long pos = get_Proj_proj(node);
5605 if (mode == mode_M) {
5606 pos = arch_get_irn_n_outs(new_pred)-1;
5607 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5609 } else if (mode_is_float(mode)) {
5612 panic("unexpected proj mode at ASM");
5615 return new_r_Proj(new_pred, mode, pos);
5619 * Transform and potentially renumber Proj nodes.
5621 static ir_node *gen_Proj(ir_node *node)
5623 ir_node *pred = get_Proj_pred(node);
5626 switch (get_irn_opcode(pred)) {
5628 return gen_Proj_Load(node);
5630 return gen_Proj_Store(node);
5632 return gen_Proj_ASM(node);
5634 return gen_Proj_Builtin(node);
5636 return gen_Proj_Div(node);
5638 return gen_Proj_Mod(node);
5640 return gen_Proj_CopyB(node);
5642 return gen_Proj_be_SubSP(node);
5644 return gen_Proj_be_AddSP(node);
5646 return gen_Proj_be_Call(node);
5648 proj = get_Proj_proj(node);
5650 case pn_Start_X_initial_exec: {
5651 ir_node *block = get_nodes_block(pred);
5652 ir_node *new_block = be_transform_node(block);
5653 dbg_info *dbgi = get_irn_dbg_info(node);
5654 /* we exchange the ProjX with a jump */
5655 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5663 if (is_ia32_l_FloattoLL(pred)) {
5664 return gen_Proj_l_FloattoLL(node);
5666 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5670 ir_mode *mode = get_irn_mode(node);
5671 if (ia32_mode_needs_gp_reg(mode)) {
5672 ir_node *new_pred = be_transform_node(pred);
5673 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5674 get_Proj_proj(node));
5675 new_proj->node_nr = node->node_nr;
5680 return be_duplicate_node(node);
5684 * Enters all transform functions into the generic pointer
5686 static void register_transformers(void)
5688 /* first clear the generic function pointer for all ops */
5689 be_start_transform_setup();
5691 be_set_transform_function(op_Add, gen_Add);
5692 be_set_transform_function(op_And, gen_And);
5693 be_set_transform_function(op_ASM, ia32_gen_ASM);
5694 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5695 be_set_transform_function(op_be_Call, gen_be_Call);
5696 be_set_transform_function(op_be_Copy, gen_be_Copy);
5697 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5698 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5699 be_set_transform_function(op_be_Return, gen_be_Return);
5700 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5701 be_set_transform_function(op_Builtin, gen_Builtin);
5702 be_set_transform_function(op_Cmp, gen_Cmp);
5703 be_set_transform_function(op_Cond, gen_Cond);
5704 be_set_transform_function(op_Const, gen_Const);
5705 be_set_transform_function(op_Conv, gen_Conv);
5706 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5707 be_set_transform_function(op_Div, gen_Div);
5708 be_set_transform_function(op_Eor, gen_Eor);
5709 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5710 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5711 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5712 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5713 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5714 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5715 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5716 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5717 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5718 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5719 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5720 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5721 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5722 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5723 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5724 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5725 be_set_transform_function(op_IJmp, gen_IJmp);
5726 be_set_transform_function(op_Jmp, gen_Jmp);
5727 be_set_transform_function(op_Load, gen_Load);
5728 be_set_transform_function(op_Minus, gen_Minus);
5729 be_set_transform_function(op_Mod, gen_Mod);
5730 be_set_transform_function(op_Mul, gen_Mul);
5731 be_set_transform_function(op_Mulh, gen_Mulh);
5732 be_set_transform_function(op_Mux, gen_Mux);
5733 be_set_transform_function(op_Not, gen_Not);
5734 be_set_transform_function(op_Or, gen_Or);
5735 be_set_transform_function(op_Phi, gen_Phi);
5736 be_set_transform_function(op_Proj, gen_Proj);
5737 be_set_transform_function(op_Rotl, gen_Rotl);
5738 be_set_transform_function(op_Shl, gen_Shl);
5739 be_set_transform_function(op_Shr, gen_Shr);
5740 be_set_transform_function(op_Shrs, gen_Shrs);
5741 be_set_transform_function(op_Store, gen_Store);
5742 be_set_transform_function(op_Sub, gen_Sub);
5743 be_set_transform_function(op_Switch, gen_Switch);
5744 be_set_transform_function(op_SymConst, gen_SymConst);
5745 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5749 * Pre-transform all unknown and noreg nodes.
5751 static void ia32_pretransform_node(void)
5753 ir_graph *irg = current_ir_graph;
5754 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5756 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5757 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5758 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5759 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5760 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5762 nomem = get_irg_no_mem(irg);
5763 noreg_GP = ia32_new_NoReg_gp(irg);
5767 * Post-process all calls if we are in SSE mode.
5768 * The ABI requires that the results are in st0, copy them
5769 * to a xmm register.
5771 static void postprocess_fp_call_results(void)
5775 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5776 ir_node *call = call_list[i];
5777 ir_type *mtp = call_types[i];
5780 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5781 ir_type *res_tp = get_method_res_type(mtp, j);
5782 ir_node *res, *new_res;
5785 if (! is_atomic_type(res_tp)) {
5786 /* no floating point return */
5789 res_mode = get_type_mode(res_tp);
5790 if (! mode_is_float(res_mode)) {
5791 /* no floating point return */
5795 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5798 /* now patch the users */
5799 foreach_out_edge_safe(res, edge) {
5800 ir_node *succ = get_edge_src_irn(edge);
5803 if (be_is_Keep(succ))
5806 if (is_ia32_xStore(succ)) {
5807 /* an xStore can be patched into an vfst */
5808 dbg_info *db = get_irn_dbg_info(succ);
5809 ir_node *block = get_nodes_block(succ);
5810 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5811 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5812 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5813 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5814 ir_mode *mode = get_ia32_ls_mode(succ);
5816 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5817 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5818 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5819 if (is_ia32_use_frame(succ))
5820 set_ia32_use_frame(st);
5821 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5822 set_irn_pinned(st, get_irn_pinned(succ));
5823 set_ia32_op_type(st, ia32_AddrModeD);
5825 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5826 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5827 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5834 if (new_res == NULL) {
5835 dbg_info *db = get_irn_dbg_info(call);
5836 ir_node *block = get_nodes_block(call);
5837 ir_node *frame = get_irg_frame(current_ir_graph);
5838 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5839 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5840 ir_node *vfst, *xld, *new_mem;
5843 /* store st(0) on stack */
5844 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5846 set_ia32_op_type(vfst, ia32_AddrModeD);
5847 set_ia32_use_frame(vfst);
5849 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5851 /* load into SSE register */
5852 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5854 set_ia32_op_type(xld, ia32_AddrModeS);
5855 set_ia32_use_frame(xld);
5857 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5858 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5860 if (old_mem != NULL) {
5861 edges_reroute(old_mem, new_mem);
5865 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5871 /* do the transformation */
5872 void ia32_transform_graph(ir_graph *irg)
5876 register_transformers();
5877 initial_fpcw = NULL;
5878 ia32_no_pic_adjust = 0;
5880 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5882 be_timer_push(T_HEIGHTS);
5883 ia32_heights = heights_new(irg);
5884 be_timer_pop(T_HEIGHTS);
5885 ia32_calculate_non_address_mode_nodes(irg);
5887 /* the transform phase is not safe for CSE (yet) because several nodes get
5888 * attributes set after their creation */
5889 cse_last = get_opt_cse();
5892 call_list = NEW_ARR_F(ir_node *, 0);
5893 call_types = NEW_ARR_F(ir_type *, 0);
5894 be_transform_graph(irg, ia32_pretransform_node);
5896 if (ia32_cg_config.use_sse2)
5897 postprocess_fp_call_results();
5898 DEL_ARR_F(call_types);
5899 DEL_ARR_F(call_list);
5901 set_opt_cse(cse_last);
5903 ia32_free_non_address_mode_nodes();
5904 heights_free(ia32_heights);
5905 ia32_heights = NULL;
5908 void ia32_init_transform(void)
5910 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");