2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
55 #include "betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_address_mode.h"
66 #include "ia32_architecture.h"
68 #include "gen_ia32_regalloc_if.h"
70 /* define this to construct SSE constants instead of load them */
71 #undef CONSTRUCT_SSE_CONST
73 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
74 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
76 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
78 static ir_node *old_initial_fpcw = NULL;
79 static ir_node *initial_fpcw = NULL;
80 int ia32_no_pic_adjust;
82 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
83 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
86 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
87 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
90 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
91 ir_node *op1, ir_node *op2);
93 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
94 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
96 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem);
99 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
100 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
103 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
105 static ir_node *create_immediate_or_transform(ir_node *node,
106 char immediate_constraint_type);
108 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
109 dbg_info *dbgi, ir_node *block,
110 ir_node *op, ir_node *orig_node);
112 /* its enough to have those once */
113 static ir_node *nomem, *noreg_GP;
115 /** a list to postprocess all calls */
116 static ir_node **call_list;
117 static ir_type **call_types;
119 /** Return non-zero is a node represents the 0 constant. */
120 static bool is_Const_0(ir_node *node)
122 return is_Const(node) && is_Const_null(node);
125 /** Return non-zero is a node represents the 1 constant. */
126 static bool is_Const_1(ir_node *node)
128 return is_Const(node) && is_Const_one(node);
131 /** Return non-zero is a node represents the -1 constant. */
132 static bool is_Const_Minus_1(ir_node *node)
134 return is_Const(node) && is_Const_all_one(node);
138 * returns true if constant can be created with a simple float command
140 static bool is_simple_x87_Const(ir_node *node)
142 ir_tarval *tv = get_Const_tarval(node);
143 if (tarval_is_null(tv) || tarval_is_one(tv))
146 /* TODO: match all the other float constants */
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_sse_Const(ir_node *node)
155 ir_tarval *tv = get_Const_tarval(node);
156 ir_mode *mode = get_tarval_mode(tv);
161 if (tarval_is_null(tv)
162 #ifdef CONSTRUCT_SSE_CONST
167 #ifdef CONSTRUCT_SSE_CONST
168 if (mode == mode_D) {
169 unsigned val = get_tarval_sub_bits(tv, 0) |
170 (get_tarval_sub_bits(tv, 1) << 8) |
171 (get_tarval_sub_bits(tv, 2) << 16) |
172 (get_tarval_sub_bits(tv, 3) << 24);
174 /* lower 32bit are zero, really a 32bit constant */
177 #endif /* CONSTRUCT_SSE_CONST */
178 /* TODO: match all the other float constants */
183 * return NoREG or pic_base in case of PIC.
184 * This is necessary as base address for newly created symbols
186 static ir_node *get_symconst_base(void)
188 ir_graph *irg = current_ir_graph;
190 if (be_options.pic) {
191 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
192 return arch_env->impl->get_pic_base(irg);
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
207 ir_tarval *tv = get_Const_tarval(node);
209 assert(is_Const(node));
211 if (mode_is_float(mode)) {
212 ir_graph *irg = get_irn_irg(node);
213 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
214 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
219 if (ia32_cg_config.use_sse2) {
220 if (tarval_is_null(tv)) {
221 load = new_bd_ia32_xZero(dbgi, block);
222 set_ia32_ls_mode(load, mode);
224 #ifdef CONSTRUCT_SSE_CONST
225 } else if (tarval_is_one(tv)) {
226 int cnst = mode == mode_F ? 26 : 55;
227 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
228 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
229 ir_node *pslld, *psrld;
231 load = new_bd_ia32_xAllOnes(dbgi, block);
232 set_ia32_ls_mode(load, mode);
233 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
234 set_ia32_ls_mode(pslld, mode);
235 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
236 set_ia32_ls_mode(psrld, mode);
238 #endif /* CONSTRUCT_SSE_CONST */
239 } else if (mode == mode_F) {
240 /* we can place any 32bit constant by using a movd gp, sse */
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
245 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
246 load = new_bd_ia32_xMovd(dbgi, block, cnst);
247 set_ia32_ls_mode(load, mode);
251 #ifdef CONSTRUCT_SSE_CONST
252 if (mode == mode_D) {
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
259 ir_node *cnst, *psllq;
261 /* fine, lower 32bit are zero, produce 32bit value */
262 val = get_tarval_sub_bits(tv, 4) |
263 (get_tarval_sub_bits(tv, 5) << 8) |
264 (get_tarval_sub_bits(tv, 6) << 16) |
265 (get_tarval_sub_bits(tv, 7) << 24);
266 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
267 load = new_bd_ia32_xMovd(dbgi, block, cnst);
268 set_ia32_ls_mode(load, mode);
269 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
270 set_ia32_ls_mode(psllq, mode);
275 #endif /* CONSTRUCT_SSE_CONST */
276 floatent = ia32_create_float_const_entity(isa, tv, NULL);
278 base = get_symconst_base();
279 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
281 set_ia32_op_type(load, ia32_AddrModeS);
282 set_ia32_am_sc(load, floatent);
283 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
284 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
287 if (tarval_is_null(tv)) {
288 load = new_bd_ia32_vfldz(dbgi, block);
290 set_ia32_ls_mode(load, mode);
291 } else if (tarval_is_one(tv)) {
292 load = new_bd_ia32_vfld1(dbgi, block);
294 set_ia32_ls_mode(load, mode);
299 floatent = ia32_create_float_const_entity(isa, tv, NULL);
300 /* create_float_const_ent is smart and sometimes creates
302 ls_mode = get_type_mode(get_entity_type(floatent));
303 base = get_symconst_base();
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 } else { /* non-float mode */
321 tv = tarval_convert_to(tv, mode_Iu);
323 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
325 panic("couldn't convert constant tarval (%+F)", node);
327 val = get_tarval_long(tv);
329 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
330 SET_IA32_ORIG_NODE(cnst, node);
337 * Transforms a SymConst.
339 static ir_node *gen_SymConst(ir_node *node)
341 ir_node *old_block = get_nodes_block(node);
342 ir_node *block = be_transform_node(old_block);
343 dbg_info *dbgi = get_irn_dbg_info(node);
344 ir_mode *mode = get_irn_mode(node);
347 if (mode_is_float(mode)) {
348 if (ia32_cg_config.use_sse2)
349 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
351 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
352 set_ia32_am_sc(cnst, get_SymConst_entity(node));
353 set_ia32_use_frame(cnst);
357 if (get_SymConst_kind(node) != symconst_addr_ent) {
358 panic("backend only support symconst_addr_ent (at %+F)", node);
360 entity = get_SymConst_entity(node);
361 if (get_entity_owner(entity) == get_tls_type()) {
362 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
363 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
364 set_ia32_am_sc(lea, entity);
367 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
371 SET_IA32_ORIG_NODE(cnst, node);
376 static ir_type *make_array_type(ir_type *tp)
378 unsigned alignment = get_type_alignment_bytes(tp);
379 unsigned size = get_type_size_bytes(tp);
380 ir_type *res = new_type_array(1, tp);
381 set_type_alignment_bytes(res, alignment);
382 set_array_bounds_int(res, 0, 0, 2);
383 if (alignment > size)
385 set_type_size_bytes(res, 2 * size);
386 set_type_state(res, layout_fixed);
391 * Create a float[2] array type for the given atomic type.
393 * @param tp the atomic type
395 static ir_type *ia32_create_float_array(ir_type *tp)
397 ir_mode *mode = get_type_mode(tp);
400 if (mode == mode_F) {
401 static ir_type *float_F;
405 arr = float_F = make_array_type(tp);
406 } else if (mode == mode_D) {
407 static ir_type *float_D;
411 arr = float_D = make_array_type(tp);
413 static ir_type *float_E;
417 arr = float_E = make_array_type(tp);
422 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
423 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
425 static const struct {
427 const char *cnst_str;
429 } names [ia32_known_const_max] = {
430 { "C_sfp_sign", "0x80000000", 0 },
431 { "C_dfp_sign", "0x8000000000000000", 1 },
432 { "C_sfp_abs", "0x7FFFFFFF", 0 },
433 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
434 { "C_ull_bias", "0x10000000000000000", 2 }
436 static ir_entity *ent_cache[ia32_known_const_max];
438 ir_entity *ent = ent_cache[kct];
441 ir_graph *irg = current_ir_graph;
442 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
443 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
444 const char *cnst_str = names[kct].cnst_str;
445 ident *name = new_id_from_str(names[kct].name);
448 switch (names[kct].mode) {
449 case 0: mode = mode_Iu; break;
450 case 1: mode = mode_Lu; break;
451 case 2: mode = mode_F; break;
452 default: panic("internal compiler error");
454 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
456 if (kct == ia32_ULLBIAS) {
457 ir_type *type = ia32_get_prim_type(mode_F);
458 ir_type *atype = ia32_create_float_array(type);
459 ir_initializer_t *initializer;
461 ent = new_entity(get_glob_type(), name, atype);
463 set_entity_ld_ident(ent, name);
464 set_entity_visibility(ent, ir_visibility_private);
465 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
467 initializer = create_initializer_compound(2);
468 set_initializer_compound_value(initializer, 0,
469 create_initializer_tarval(get_mode_null(mode)));
470 set_initializer_compound_value(initializer, 1,
471 create_initializer_tarval(tv));
472 set_entity_initializer(ent, initializer);
474 ent = ia32_create_float_const_entity(isa, tv, name);
476 /* cache the entry */
477 ent_cache[kct] = ent;
480 return ent_cache[kct];
484 * return true if the node is a Proj(Load) and could be used in source address
485 * mode for another node. Will return only true if the @p other node is not
486 * dependent on the memory of the Load (for binary operations use the other
487 * input here, for unary operations use NULL).
489 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
490 ir_node *other, ir_node *other2,
497 /* float constants are always available */
498 if (is_Const(node)) {
499 mode = get_irn_mode(node);
500 if (mode_is_float(mode)) {
501 ir_tarval *tv = get_Const_tarval(node);
502 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
504 if (ia32_cg_config.use_sse2) {
505 if (is_simple_sse_Const(node))
508 if (is_simple_x87_Const(node))
511 if (get_irn_n_edges(node) > 1)
520 load = get_Proj_pred(node);
521 pn = get_Proj_proj(node);
522 if (!is_Load(load) || pn != pn_Load_res)
524 if (get_nodes_block(load) != block)
526 mode = get_irn_mode(node);
527 /* we can't fold mode_E AM */
528 if (mode == ia32_mode_E)
530 /* we only use address mode if we're the only user of the load */
531 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
533 /* in some edge cases with address mode we might reach the load normally
534 * and through some AM sequence, if it is already materialized then we
535 * can't create an AM node from it */
536 if (be_is_transformed(node))
539 /* don't do AM if other node inputs depend on the load (via mem-proj) */
540 if (other != NULL && ia32_prevents_AM(block, load, other))
543 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
549 typedef struct ia32_address_mode_t ia32_address_mode_t;
550 struct ia32_address_mode_t {
555 ia32_op_type_t op_type;
559 unsigned commutative : 1;
560 unsigned ins_permuted : 1;
563 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
565 /* construct load address */
566 memset(addr, 0, sizeof(addr[0]));
567 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
569 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
570 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
571 addr->mem = be_transform_node(mem);
574 static void build_address(ia32_address_mode_t *am, ir_node *node,
575 ia32_create_am_flags_t flags)
577 ia32_address_t *addr = &am->addr;
583 /* floating point immediates */
584 if (is_Const(node)) {
585 ir_graph *irg = get_irn_irg(node);
586 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
587 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
588 ir_tarval *tv = get_Const_tarval(node);
589 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
590 addr->base = get_symconst_base();
591 addr->index = noreg_GP;
593 addr->symconst_ent = entity;
594 addr->tls_segment = false;
596 am->ls_mode = get_type_mode(get_entity_type(entity));
597 am->pinned = op_pin_state_floats;
601 load = get_Proj_pred(node);
602 ptr = get_Load_ptr(load);
603 mem = get_Load_mem(load);
604 new_mem = be_transform_node(mem);
605 am->pinned = get_irn_pinned(load);
606 am->ls_mode = get_Load_mode(load);
607 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
610 /* construct load address */
611 ia32_create_address_mode(addr, ptr, flags);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
618 static void set_address(ir_node *node, const ia32_address_t *addr)
620 set_ia32_am_scale(node, addr->scale);
621 set_ia32_am_sc(node, addr->symconst_ent);
622 set_ia32_am_offs_int(node, addr->offset);
623 set_ia32_am_tls_segment(node, addr->tls_segment);
624 if (addr->symconst_sign)
625 set_ia32_am_sc_sign(node);
627 set_ia32_use_frame(node);
628 set_ia32_frame_ent(node, addr->frame_entity);
632 * Apply attributes of a given address mode to a node.
634 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
636 set_address(node, &am->addr);
638 set_ia32_op_type(node, am->op_type);
639 set_ia32_ls_mode(node, am->ls_mode);
640 if (am->pinned == op_pin_state_pinned) {
641 /* beware: some nodes are already pinned and did not allow to change the state */
642 if (get_irn_pinned(node) != op_pin_state_pinned)
643 set_irn_pinned(node, op_pin_state_pinned);
646 set_ia32_commutative(node);
650 * Check, if a given node is a Down-Conv, ie. a integer Conv
651 * from a mode with a mode with more bits to a mode with lesser bits.
652 * Moreover, we return only true if the node has not more than 1 user.
654 * @param node the node
655 * @return non-zero if node is a Down-Conv
657 static int is_downconv(const ir_node *node)
665 /* we only want to skip the conv when we're the only user
666 * (because this test is used in the context of address-mode selection
667 * and we don't want to use address mode for multiple users) */
668 if (get_irn_n_edges(node) > 1)
671 src_mode = get_irn_mode(get_Conv_op(node));
672 dest_mode = get_irn_mode(node);
674 ia32_mode_needs_gp_reg(src_mode) &&
675 ia32_mode_needs_gp_reg(dest_mode) &&
676 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
679 /** Skip all Down-Conv's on a given node and return the resulting node. */
680 ir_node *ia32_skip_downconv(ir_node *node)
682 while (is_downconv(node))
683 node = get_Conv_op(node);
688 static bool is_sameconv(ir_node *node)
696 /* we only want to skip the conv when we're the only user
697 * (because this test is used in the context of address-mode selection
698 * and we don't want to use address mode for multiple users) */
699 if (get_irn_n_edges(node) > 1)
702 src_mode = get_irn_mode(get_Conv_op(node));
703 dest_mode = get_irn_mode(node);
705 ia32_mode_needs_gp_reg(src_mode) &&
706 ia32_mode_needs_gp_reg(dest_mode) &&
707 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
710 /** Skip all signedness convs */
711 static ir_node *ia32_skip_sameconv(ir_node *node)
713 while (is_sameconv(node))
714 node = get_Conv_op(node);
719 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
721 ir_mode *mode = get_irn_mode(node);
726 if (mode_is_signed(mode)) {
731 block = get_nodes_block(node);
732 dbgi = get_irn_dbg_info(node);
734 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
738 * matches operands of a node into ia32 addressing/operand modes. This covers
739 * usage of source address mode, immediates, operations with non 32-bit modes,
741 * The resulting data is filled into the @p am struct. block is the block
742 * of the node whose arguments are matched. op1, op2 are the first and second
743 * input that are matched (op1 may be NULL). other_op is another unrelated
744 * input that is not matched! but which is needed sometimes to check if AM
745 * for op1/op2 is legal.
746 * @p flags describes the supported modes of the operation in detail.
748 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
749 ir_node *op1, ir_node *op2, ir_node *other_op,
752 ia32_address_t *addr = &am->addr;
753 ir_mode *mode = get_irn_mode(op2);
754 int mode_bits = get_mode_size_bits(mode);
755 ir_node *new_op1, *new_op2;
757 unsigned commutative;
758 int use_am_and_immediates;
761 memset(am, 0, sizeof(am[0]));
763 commutative = (flags & match_commutative) != 0;
764 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
765 use_am = (flags & match_am) != 0;
766 use_immediate = (flags & match_immediate) != 0;
767 assert(!use_am_and_immediates || use_immediate);
770 assert(!commutative || op1 != NULL);
771 assert(use_am || !(flags & match_8bit_am));
772 assert(use_am || !(flags & match_16bit_am));
774 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
775 (mode_bits == 16 && !(flags & match_16bit_am))) {
779 /* we can simply skip downconvs for mode neutral nodes: the upper bits
780 * can be random for these operations */
781 if (flags & match_mode_neutral) {
782 op2 = ia32_skip_downconv(op2);
784 op1 = ia32_skip_downconv(op1);
787 op2 = ia32_skip_sameconv(op2);
789 op1 = ia32_skip_sameconv(op1);
793 /* match immediates. firm nodes are normalized: constants are always on the
796 if (!(flags & match_try_am) && use_immediate) {
797 new_op2 = ia32_try_create_Immediate(op2, 0);
800 if (new_op2 == NULL &&
801 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
802 build_address(am, op2, ia32_create_am_normal);
803 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
804 if (mode_is_float(mode)) {
805 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
809 am->op_type = ia32_AddrModeS;
810 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
812 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
814 build_address(am, op1, ia32_create_am_normal);
816 if (mode_is_float(mode)) {
817 noreg = ia32_new_NoReg_vfp(current_ir_graph);
822 if (new_op2 != NULL) {
825 new_op1 = be_transform_node(op2);
827 am->ins_permuted = true;
829 am->op_type = ia32_AddrModeS;
831 am->op_type = ia32_Normal;
833 if (flags & match_try_am) {
839 mode = get_irn_mode(op2);
840 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
841 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
843 new_op2 = create_upconv(op2, NULL);
844 am->ls_mode = mode_Iu;
846 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
848 new_op2 = be_transform_node(op2);
849 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
852 if (addr->base == NULL)
853 addr->base = noreg_GP;
854 if (addr->index == NULL)
855 addr->index = noreg_GP;
856 if (addr->mem == NULL)
859 am->new_op1 = new_op1;
860 am->new_op2 = new_op2;
861 am->commutative = commutative;
865 * "Fixes" a node that uses address mode by turning it into mode_T
866 * and returning a pn_ia32_res Proj.
868 * @param node the node
869 * @param am its address mode
871 * @return a Proj(pn_ia32_res) if a memory address mode is used,
874 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
879 if (am->mem_proj == NULL)
882 /* we have to create a mode_T so the old MemProj can attach to us */
883 mode = get_irn_mode(node);
884 load = get_Proj_pred(am->mem_proj);
886 be_set_transformed_node(load, node);
888 if (mode != mode_T) {
889 set_irn_mode(node, mode_T);
890 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
897 * Construct a standard binary operation, set AM and immediate if required.
899 * @param node The original node for which the binop is created
900 * @param op1 The first operand
901 * @param op2 The second operand
902 * @param func The node constructor function
903 * @return The constructed ia32 node.
905 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
906 construct_binop_func *func, match_flags_t flags)
909 ir_node *block, *new_block, *new_node;
910 ia32_address_mode_t am;
911 ia32_address_t *addr = &am.addr;
913 block = get_nodes_block(node);
914 match_arguments(&am, block, op1, op2, NULL, flags);
916 dbgi = get_irn_dbg_info(node);
917 new_block = be_transform_node(block);
918 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
919 am.new_op1, am.new_op2);
920 set_am_attributes(new_node, &am);
921 /* we can't use source address mode anymore when using immediates */
922 if (!(flags & match_am_and_immediates) &&
923 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
924 set_ia32_am_support(new_node, ia32_am_none);
925 SET_IA32_ORIG_NODE(new_node, node);
927 new_node = fix_mem_proj(new_node, &am);
933 * Generic names for the inputs of an ia32 binary op.
936 n_ia32_l_binop_left, /**< ia32 left input */
937 n_ia32_l_binop_right, /**< ia32 right input */
938 n_ia32_l_binop_eflags /**< ia32 eflags input */
940 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
941 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
942 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
943 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
944 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
945 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
948 * Construct a binary operation which also consumes the eflags.
950 * @param node The node to transform
951 * @param func The node constructor function
952 * @param flags The match flags
953 * @return The constructor ia32 node
955 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
958 ir_node *src_block = get_nodes_block(node);
959 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
960 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
961 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
963 ir_node *block, *new_node, *new_eflags;
964 ia32_address_mode_t am;
965 ia32_address_t *addr = &am.addr;
967 match_arguments(&am, src_block, op1, op2, eflags, flags);
969 dbgi = get_irn_dbg_info(node);
970 block = be_transform_node(src_block);
971 new_eflags = be_transform_node(eflags);
972 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
973 am.new_op1, am.new_op2, new_eflags);
974 set_am_attributes(new_node, &am);
975 /* we can't use source address mode anymore when using immediates */
976 if (!(flags & match_am_and_immediates) &&
977 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
978 set_ia32_am_support(new_node, ia32_am_none);
979 SET_IA32_ORIG_NODE(new_node, node);
981 new_node = fix_mem_proj(new_node, &am);
986 static ir_node *get_fpcw(void)
988 if (initial_fpcw != NULL)
991 initial_fpcw = be_transform_node(old_initial_fpcw);
995 static ir_node *skip_float_upconv(ir_node *node)
997 ir_mode *mode = get_irn_mode(node);
998 assert(mode_is_float(mode));
1000 while (is_Conv(node)) {
1001 ir_node *pred = get_Conv_op(node);
1002 ir_mode *pred_mode = get_irn_mode(pred);
1005 * suboptimal, but without this check the address mode matcher
1006 * can incorrectly think that something has only 1 user
1008 if (get_irn_n_edges(node) > 1)
1011 if (!mode_is_float(pred_mode)
1012 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1020 static void check_x87_floatmode(ir_mode *mode)
1022 if (mode != ia32_mode_E) {
1023 panic("ia32: x87 only supports x86 extended float mode");
1028 * Construct a standard binary operation, set AM and immediate if required.
1030 * @param op1 The first operand
1031 * @param op2 The second operand
1032 * @param func The node constructor function
1033 * @return The constructed ia32 node.
1035 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1036 construct_binop_float_func *func)
1042 ia32_address_mode_t am;
1043 ia32_address_t *addr = &am.addr;
1044 ia32_x87_attr_t *attr;
1045 /* All operations are considered commutative, because there are reverse
1047 match_flags_t flags = match_commutative | match_am;
1049 = is_Div(node) ? get_Div_resmode(node) : get_irn_mode(node);
1050 check_x87_floatmode(mode);
1052 op1 = skip_float_upconv(op1);
1053 op2 = skip_float_upconv(op2);
1055 block = get_nodes_block(node);
1056 match_arguments(&am, block, op1, op2, NULL, flags);
1058 dbgi = get_irn_dbg_info(node);
1059 new_block = be_transform_node(block);
1060 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1061 am.new_op1, am.new_op2, get_fpcw());
1062 set_am_attributes(new_node, &am);
1064 attr = get_ia32_x87_attr(new_node);
1065 attr->attr.data.ins_permuted = am.ins_permuted;
1067 SET_IA32_ORIG_NODE(new_node, node);
1069 new_node = fix_mem_proj(new_node, &am);
1075 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1077 * @param op1 The first operand
1078 * @param op2 The second operand
1079 * @param func The node constructor function
1080 * @return The constructed ia32 node.
1082 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1083 construct_shift_func *func,
1084 match_flags_t flags)
1087 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1088 ir_mode *mode = get_irn_mode(node);
1090 assert(! mode_is_float(mode));
1091 assert(flags & match_immediate);
1092 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1094 if (get_mode_modulo_shift(mode) != 32)
1095 panic("modulo shift!=32 not supported by ia32 backend");
1097 if (flags & match_mode_neutral) {
1098 op1 = ia32_skip_downconv(op1);
1099 new_op1 = be_transform_node(op1);
1100 } else if (get_mode_size_bits(mode) != 32) {
1101 new_op1 = create_upconv(op1, node);
1103 new_op1 = be_transform_node(op1);
1106 /* the shift amount can be any mode that is bigger than 5 bits, since all
1107 * other bits are ignored anyway */
1108 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1109 ir_node *const op = get_Conv_op(op2);
1110 if (mode_is_float(get_irn_mode(op)))
1113 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1115 new_op2 = create_immediate_or_transform(op2, 0);
1117 dbgi = get_irn_dbg_info(node);
1118 block = get_nodes_block(node);
1119 new_block = be_transform_node(block);
1120 new_node = func(dbgi, new_block, new_op1, new_op2);
1121 SET_IA32_ORIG_NODE(new_node, node);
1123 /* lowered shift instruction may have a dependency operand, handle it here */
1124 if (get_irn_arity(node) == 3) {
1125 /* we have a dependency */
1126 ir_node* dep = get_irn_n(node, 2);
1127 if (get_irn_n_edges(dep) > 1) {
1128 /* ... which has at least one user other than 'node' */
1129 ir_node *new_dep = be_transform_node(dep);
1130 add_irn_dep(new_node, new_dep);
1139 * Construct a standard unary operation, set AM and immediate if required.
1141 * @param op The operand
1142 * @param func The node constructor function
1143 * @return The constructed ia32 node.
1145 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1146 match_flags_t flags)
1149 ir_node *block, *new_block, *new_op, *new_node;
1151 assert(flags == 0 || flags == match_mode_neutral);
1152 if (flags & match_mode_neutral) {
1153 op = ia32_skip_downconv(op);
1156 new_op = be_transform_node(op);
1157 dbgi = get_irn_dbg_info(node);
1158 block = get_nodes_block(node);
1159 new_block = be_transform_node(block);
1160 new_node = func(dbgi, new_block, new_op);
1162 SET_IA32_ORIG_NODE(new_node, node);
1167 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1168 ia32_address_t *addr)
1178 base = be_transform_node(base);
1185 idx = be_transform_node(idx);
1188 /* segment overrides are ineffective for Leas :-( so we have to patch
1190 if (addr->tls_segment) {
1191 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1192 assert(addr->symconst_ent != NULL);
1193 if (base == noreg_GP)
1196 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1197 addr->tls_segment = false;
1200 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1201 set_address(res, addr);
1207 * Returns non-zero if a given address mode has a symbolic or
1208 * numerical offset != 0.
1210 static int am_has_immediates(const ia32_address_t *addr)
1212 return addr->offset != 0 || addr->symconst_ent != NULL
1213 || addr->frame_entity || addr->use_frame;
1216 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1217 ir_node *high, ir_node *low,
1221 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1222 * op1 - target to be shifted
1223 * op2 - contains bits to be shifted into target
1225 * Only op3 can be an immediate.
1227 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1228 ir_node *high, ir_node *low, ir_node *count,
1229 new_shiftd_func func)
1231 ir_node *new_block = be_transform_node(block);
1232 ir_node *new_high = be_transform_node(high);
1233 ir_node *new_low = be_transform_node(low);
1237 /* the shift amount can be any mode that is bigger than 5 bits, since all
1238 * other bits are ignored anyway */
1239 while (is_Conv(count) &&
1240 get_irn_n_edges(count) == 1 &&
1241 mode_is_int(get_irn_mode(count))) {
1242 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1243 count = get_Conv_op(count);
1245 new_count = create_immediate_or_transform(count, 0);
1247 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1252 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1255 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1257 if (is_Const(value1) && is_Const(value2)) {
1258 ir_tarval *tv1 = get_Const_tarval(value1);
1259 ir_tarval *tv2 = get_Const_tarval(value2);
1260 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1261 long v1 = get_tarval_long(tv1);
1262 long v2 = get_tarval_long(tv2);
1263 return v1 <= v2 && v2 == 32-v1;
1269 static ir_node *match_64bit_shift(ir_node *node)
1271 ir_node *op1 = get_binop_left(node);
1272 ir_node *op2 = get_binop_right(node);
1273 assert(is_Or(node) || is_Add(node));
1281 /* match ShlD operation */
1282 if (is_Shl(op1) && is_Shr(op2)) {
1283 ir_node *shl_right = get_Shl_right(op1);
1284 ir_node *shl_left = get_Shl_left(op1);
1285 ir_node *shr_right = get_Shr_right(op2);
1286 ir_node *shr_left = get_Shr_left(op2);
1287 /* constant ShlD operation */
1288 if (is_complementary_shifts(shl_right, shr_right)) {
1289 dbg_info *dbgi = get_irn_dbg_info(node);
1290 ir_node *block = get_nodes_block(node);
1291 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1294 /* constant ShrD operation */
1295 if (is_complementary_shifts(shr_right, shl_right)) {
1296 dbg_info *dbgi = get_irn_dbg_info(node);
1297 ir_node *block = get_nodes_block(node);
1298 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1301 /* lower_dw produces the following for ShlD:
1302 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1303 if (is_Shr(shr_left) && is_Not(shr_right)
1304 && is_Const_1(get_Shr_right(shr_left))
1305 && get_Not_op(shr_right) == shl_right) {
1306 dbg_info *dbgi = get_irn_dbg_info(node);
1307 ir_node *block = get_nodes_block(node);
1308 ir_node *val_h = get_Shr_left(shr_left);
1309 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1312 /* lower_dw produces the following for ShrD:
1313 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1314 if (is_Shl(shl_left) && is_Not(shl_right)
1315 && is_Const_1(get_Shl_right(shl_left))
1316 && get_Not_op(shl_right) == shr_right) {
1317 dbg_info *dbgi = get_irn_dbg_info(node);
1318 ir_node *block = get_nodes_block(node);
1319 ir_node *val_h = get_Shl_left(shl_left);
1320 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1329 * Creates an ia32 Add.
1331 * @return the created ia32 Add node
1333 static ir_node *gen_Add(ir_node *node)
1335 ir_mode *mode = get_irn_mode(node);
1336 ir_node *op1 = get_Add_left(node);
1337 ir_node *op2 = get_Add_right(node);
1339 ir_node *block, *new_block, *new_node, *add_immediate_op;
1340 ia32_address_t addr;
1341 ia32_address_mode_t am;
1343 new_node = match_64bit_shift(node);
1344 if (new_node != NULL)
1347 if (mode_is_float(mode)) {
1348 if (ia32_cg_config.use_sse2)
1349 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1350 match_commutative | match_am);
1352 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1355 ia32_mark_non_am(node);
1359 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1360 * 1. Add with immediate -> Lea
1361 * 2. Add with possible source address mode -> Add
1362 * 3. Otherwise -> Lea
1364 memset(&addr, 0, sizeof(addr));
1365 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1366 add_immediate_op = NULL;
1368 dbgi = get_irn_dbg_info(node);
1369 block = get_nodes_block(node);
1370 new_block = be_transform_node(block);
1373 if (addr.base == NULL && addr.index == NULL) {
1374 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1375 addr.symconst_sign, 0, addr.offset);
1376 SET_IA32_ORIG_NODE(new_node, node);
1379 /* add with immediate? */
1380 if (addr.index == NULL) {
1381 add_immediate_op = addr.base;
1382 } else if (addr.base == NULL && addr.scale == 0) {
1383 add_immediate_op = addr.index;
1386 if (add_immediate_op != NULL) {
1387 if (!am_has_immediates(&addr)) {
1388 #ifdef DEBUG_libfirm
1389 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1392 return be_transform_node(add_immediate_op);
1395 new_node = create_lea_from_address(dbgi, new_block, &addr);
1396 SET_IA32_ORIG_NODE(new_node, node);
1400 /* test if we can use source address mode */
1401 match_arguments(&am, block, op1, op2, NULL, match_commutative
1402 | match_mode_neutral | match_am | match_immediate | match_try_am);
1404 /* construct an Add with source address mode */
1405 if (am.op_type == ia32_AddrModeS) {
1406 ia32_address_t *am_addr = &am.addr;
1407 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1408 am_addr->index, am_addr->mem, am.new_op1,
1410 set_am_attributes(new_node, &am);
1411 SET_IA32_ORIG_NODE(new_node, node);
1413 new_node = fix_mem_proj(new_node, &am);
1418 /* otherwise construct a lea */
1419 new_node = create_lea_from_address(dbgi, new_block, &addr);
1420 SET_IA32_ORIG_NODE(new_node, node);
1425 * Creates an ia32 Mul.
1427 * @return the created ia32 Mul node
1429 static ir_node *gen_Mul(ir_node *node)
1431 ir_node *op1 = get_Mul_left(node);
1432 ir_node *op2 = get_Mul_right(node);
1433 ir_mode *mode = get_irn_mode(node);
1435 if (mode_is_float(mode)) {
1436 if (ia32_cg_config.use_sse2)
1437 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1438 match_commutative | match_am);
1440 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1442 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1443 match_commutative | match_am | match_mode_neutral |
1444 match_immediate | match_am_and_immediates);
1448 * Creates an ia32 Mulh.
1449 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1450 * this result while Mul returns the lower 32 bit.
1452 * @return the created ia32 Mulh node
1454 static ir_node *gen_Mulh(ir_node *node)
1456 dbg_info *dbgi = get_irn_dbg_info(node);
1457 ir_node *op1 = get_Mulh_left(node);
1458 ir_node *op2 = get_Mulh_right(node);
1459 ir_mode *mode = get_irn_mode(node);
1461 ir_node *proj_res_high;
1463 if (get_mode_size_bits(mode) != 32) {
1464 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1467 if (mode_is_signed(mode)) {
1468 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1469 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1471 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1472 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1474 return proj_res_high;
1478 * Creates an ia32 And.
1480 * @return The created ia32 And node
1482 static ir_node *gen_And(ir_node *node)
1484 ir_node *op1 = get_And_left(node);
1485 ir_node *op2 = get_And_right(node);
1486 assert(! mode_is_float(get_irn_mode(node)));
1488 /* is it a zero extension? */
1489 if (is_Const(op2)) {
1490 ir_tarval *tv = get_Const_tarval(op2);
1491 long v = get_tarval_long(tv);
1493 if (v == 0xFF || v == 0xFFFF) {
1494 dbg_info *dbgi = get_irn_dbg_info(node);
1495 ir_node *block = get_nodes_block(node);
1502 assert(v == 0xFFFF);
1505 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1510 return gen_binop(node, op1, op2, new_bd_ia32_And,
1511 match_commutative | match_mode_neutral | match_am | match_immediate);
1515 * Creates an ia32 Or.
1517 * @return The created ia32 Or node
1519 static ir_node *gen_Or(ir_node *node)
1521 ir_node *op1 = get_Or_left(node);
1522 ir_node *op2 = get_Or_right(node);
1525 res = match_64bit_shift(node);
1529 assert (! mode_is_float(get_irn_mode(node)));
1530 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1531 | match_mode_neutral | match_am | match_immediate);
1537 * Creates an ia32 Eor.
1539 * @return The created ia32 Eor node
1541 static ir_node *gen_Eor(ir_node *node)
1543 ir_node *op1 = get_Eor_left(node);
1544 ir_node *op2 = get_Eor_right(node);
1546 assert(! mode_is_float(get_irn_mode(node)));
1547 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1548 | match_mode_neutral | match_am | match_immediate);
1553 * Creates an ia32 Sub.
1555 * @return The created ia32 Sub node
1557 static ir_node *gen_Sub(ir_node *node)
1559 ir_node *op1 = get_Sub_left(node);
1560 ir_node *op2 = get_Sub_right(node);
1561 ir_mode *mode = get_irn_mode(node);
1563 if (mode_is_float(mode)) {
1564 if (ia32_cg_config.use_sse2)
1565 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1567 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1570 if (is_Const(op2)) {
1571 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1575 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1576 | match_am | match_immediate);
1579 static ir_node *transform_AM_mem(ir_node *const block,
1580 ir_node *const src_val,
1581 ir_node *const src_mem,
1582 ir_node *const am_mem)
1584 if (is_NoMem(am_mem)) {
1585 return be_transform_node(src_mem);
1586 } else if (is_Proj(src_val) &&
1588 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1589 /* avoid memory loop */
1591 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1592 ir_node *const ptr_pred = get_Proj_pred(src_val);
1593 int const arity = get_Sync_n_preds(src_mem);
1598 NEW_ARR_A(ir_node*, ins, arity + 1);
1600 /* NOTE: This sometimes produces dead-code because the old sync in
1601 * src_mem might not be used anymore, we should detect this case
1602 * and kill the sync... */
1603 for (i = arity - 1; i >= 0; --i) {
1604 ir_node *const pred = get_Sync_pred(src_mem, i);
1606 /* avoid memory loop */
1607 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1610 ins[n++] = be_transform_node(pred);
1613 if (n==1 && ins[0] == am_mem) {
1615 /* creating a new Sync and relying on CSE may fail,
1616 * if am_mem is a ProjM, which does not yet verify. */
1620 return new_r_Sync(block, n, ins);
1624 ins[0] = be_transform_node(src_mem);
1626 return new_r_Sync(block, 2, ins);
1631 * Create a 32bit to 64bit signed extension.
1633 * @param dbgi debug info
1634 * @param block the block where node nodes should be placed
1635 * @param val the value to extend
1636 * @param orig the original node
1638 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1639 ir_node *val, const ir_node *orig)
1644 if (ia32_cg_config.use_short_sex_eax) {
1645 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1646 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1648 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1649 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1651 SET_IA32_ORIG_NODE(res, orig);
1656 * Generates an ia32 Div with additional infrastructure for the
1657 * register allocator if needed.
1659 static ir_node *create_Div(ir_node *node)
1661 dbg_info *dbgi = get_irn_dbg_info(node);
1662 ir_node *block = get_nodes_block(node);
1663 ir_node *new_block = be_transform_node(block);
1664 int throws_exception = ir_throws_exception(node);
1671 ir_node *sign_extension;
1672 ia32_address_mode_t am;
1673 ia32_address_t *addr = &am.addr;
1675 /* the upper bits have random contents for smaller modes */
1676 switch (get_irn_opcode(node)) {
1678 op1 = get_Div_left(node);
1679 op2 = get_Div_right(node);
1680 mem = get_Div_mem(node);
1681 mode = get_Div_resmode(node);
1684 op1 = get_Mod_left(node);
1685 op2 = get_Mod_right(node);
1686 mem = get_Mod_mem(node);
1687 mode = get_Mod_resmode(node);
1690 panic("invalid divmod node %+F", node);
1693 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1695 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1696 is the memory of the consumed address. We can have only the second op as address
1697 in Div nodes, so check only op2. */
1698 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1700 if (mode_is_signed(mode)) {
1701 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1702 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1703 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1705 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1707 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1708 addr->index, new_mem, am.new_op2,
1709 am.new_op1, sign_extension);
1711 ir_set_throws_exception(new_node, throws_exception);
1713 set_irn_pinned(new_node, get_irn_pinned(node));
1715 set_am_attributes(new_node, &am);
1716 SET_IA32_ORIG_NODE(new_node, node);
1718 new_node = fix_mem_proj(new_node, &am);
1724 * Generates an ia32 Mod.
1726 static ir_node *gen_Mod(ir_node *node)
1728 return create_Div(node);
1732 * Generates an ia32 Div.
1734 static ir_node *gen_Div(ir_node *node)
1736 ir_mode *mode = get_Div_resmode(node);
1737 if (mode_is_float(mode)) {
1738 ir_node *op1 = get_Div_left(node);
1739 ir_node *op2 = get_Div_right(node);
1741 if (ia32_cg_config.use_sse2) {
1742 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1744 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1748 return create_Div(node);
1752 * Creates an ia32 Shl.
1754 * @return The created ia32 Shl node
1756 static ir_node *gen_Shl(ir_node *node)
1758 ir_node *left = get_Shl_left(node);
1759 ir_node *right = get_Shl_right(node);
1761 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1762 match_mode_neutral | match_immediate);
1766 * Creates an ia32 Shr.
1768 * @return The created ia32 Shr node
1770 static ir_node *gen_Shr(ir_node *node)
1772 ir_node *left = get_Shr_left(node);
1773 ir_node *right = get_Shr_right(node);
1775 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1781 * Creates an ia32 Sar.
1783 * @return The created ia32 Shrs node
1785 static ir_node *gen_Shrs(ir_node *node)
1787 ir_node *left = get_Shrs_left(node);
1788 ir_node *right = get_Shrs_right(node);
1790 if (is_Const(right)) {
1791 ir_tarval *tv = get_Const_tarval(right);
1792 long val = get_tarval_long(tv);
1794 /* this is a sign extension */
1795 dbg_info *dbgi = get_irn_dbg_info(node);
1796 ir_node *block = be_transform_node(get_nodes_block(node));
1797 ir_node *new_op = be_transform_node(left);
1799 return create_sex_32_64(dbgi, block, new_op, node);
1803 /* 8 or 16 bit sign extension? */
1804 if (is_Const(right) && is_Shl(left)) {
1805 ir_node *shl_left = get_Shl_left(left);
1806 ir_node *shl_right = get_Shl_right(left);
1807 if (is_Const(shl_right)) {
1808 ir_tarval *tv1 = get_Const_tarval(right);
1809 ir_tarval *tv2 = get_Const_tarval(shl_right);
1810 if (tv1 == tv2 && tarval_is_long(tv1)) {
1811 long val = get_tarval_long(tv1);
1812 if (val == 16 || val == 24) {
1813 dbg_info *dbgi = get_irn_dbg_info(node);
1814 ir_node *block = get_nodes_block(node);
1824 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1833 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1839 * Creates an ia32 Rol.
1841 * @param op1 The first operator
1842 * @param op2 The second operator
1843 * @return The created ia32 RotL node
1845 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1847 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1853 * Creates an ia32 Ror.
1854 * NOTE: There is no RotR with immediate because this would always be a RotL
1855 * "imm-mode_size_bits" which can be pre-calculated.
1857 * @param op1 The first operator
1858 * @param op2 The second operator
1859 * @return The created ia32 RotR node
1861 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1863 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1869 * Creates an ia32 RotR or RotL (depending on the found pattern).
1871 * @return The created ia32 RotL or RotR node
1873 static ir_node *gen_Rotl(ir_node *node)
1875 ir_node *op1 = get_Rotl_left(node);
1876 ir_node *op2 = get_Rotl_right(node);
1878 if (is_Minus(op2)) {
1879 return gen_Ror(node, op1, get_Minus_op(op2));
1882 return gen_Rol(node, op1, op2);
1888 * Transforms a Minus node.
1890 * @return The created ia32 Minus node
1892 static ir_node *gen_Minus(ir_node *node)
1894 ir_node *op = get_Minus_op(node);
1895 ir_node *block = be_transform_node(get_nodes_block(node));
1896 dbg_info *dbgi = get_irn_dbg_info(node);
1897 ir_mode *mode = get_irn_mode(node);
1902 if (mode_is_float(mode)) {
1903 ir_node *new_op = be_transform_node(op);
1904 if (ia32_cg_config.use_sse2) {
1905 /* TODO: non-optimal... if we have many xXors, then we should
1906 * rather create a load for the const and use that instead of
1907 * several AM nodes... */
1908 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1910 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1911 noreg_GP, nomem, new_op, noreg_xmm);
1913 size = get_mode_size_bits(mode);
1914 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1916 set_ia32_am_sc(new_node, ent);
1917 set_ia32_op_type(new_node, ia32_AddrModeS);
1918 set_ia32_ls_mode(new_node, mode);
1920 check_x87_floatmode(mode);
1921 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1924 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1927 SET_IA32_ORIG_NODE(new_node, node);
1933 * Transforms a Not node.
1935 * @return The created ia32 Not node
1937 static ir_node *gen_Not(ir_node *node)
1939 ir_node *op = get_Not_op(node);
1941 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1942 assert (! mode_is_float(get_irn_mode(node)));
1944 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1947 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1948 bool negate, ir_node *node)
1950 ir_node *new_block = be_transform_node(block);
1951 ir_mode *mode = get_irn_mode(op);
1952 ir_node *new_op = be_transform_node(op);
1957 assert(mode_is_float(mode));
1959 if (ia32_cg_config.use_sse2) {
1960 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1961 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1962 noreg_GP, nomem, new_op, noreg_fp);
1964 size = get_mode_size_bits(mode);
1965 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1967 set_ia32_am_sc(new_node, ent);
1969 SET_IA32_ORIG_NODE(new_node, node);
1971 set_ia32_op_type(new_node, ia32_AddrModeS);
1972 set_ia32_ls_mode(new_node, mode);
1974 /* TODO, implement -Abs case */
1977 check_x87_floatmode(mode);
1978 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1979 SET_IA32_ORIG_NODE(new_node, node);
1981 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1982 SET_IA32_ORIG_NODE(new_node, node);
1990 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1992 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1994 dbg_info *dbgi = get_irn_dbg_info(cmp);
1995 ir_node *block = get_nodes_block(cmp);
1996 ir_node *new_block = be_transform_node(block);
1997 ir_node *op1 = be_transform_node(x);
1998 ir_node *op2 = be_transform_node(n);
2000 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2003 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2005 bool overflow_possible)
2007 if (mode_is_float(mode)) {
2009 case ir_relation_equal: return ia32_cc_float_equal;
2010 case ir_relation_less: return ia32_cc_float_below;
2011 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2012 case ir_relation_greater: return ia32_cc_float_above;
2013 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2014 case ir_relation_less_greater: return ia32_cc_not_equal;
2015 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2016 case ir_relation_unordered: return ia32_cc_parity;
2017 case ir_relation_unordered_equal: return ia32_cc_equal;
2018 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2019 case ir_relation_unordered_less_equal:
2020 return ia32_cc_float_unordered_below_equal;
2021 case ir_relation_unordered_greater:
2022 return ia32_cc_float_unordered_above;
2023 case ir_relation_unordered_greater_equal:
2024 return ia32_cc_float_unordered_above_equal;
2025 case ir_relation_unordered_less_greater:
2026 return ia32_cc_float_not_equal;
2027 case ir_relation_false:
2028 case ir_relation_true:
2029 /* should we introduce a jump always/jump never? */
2032 panic("Unexpected float pnc");
2033 } else if (mode_is_signed(mode)) {
2035 case ir_relation_unordered_equal:
2036 case ir_relation_equal: return ia32_cc_equal;
2037 case ir_relation_unordered_less:
2038 case ir_relation_less:
2039 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2040 case ir_relation_unordered_less_equal:
2041 case ir_relation_less_equal: return ia32_cc_less_equal;
2042 case ir_relation_unordered_greater:
2043 case ir_relation_greater: return ia32_cc_greater;
2044 case ir_relation_unordered_greater_equal:
2045 case ir_relation_greater_equal:
2046 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2047 case ir_relation_unordered_less_greater:
2048 case ir_relation_less_greater: return ia32_cc_not_equal;
2049 case ir_relation_less_equal_greater:
2050 case ir_relation_unordered:
2051 case ir_relation_false:
2052 case ir_relation_true:
2053 /* introduce jump always/jump never? */
2056 panic("Unexpected pnc");
2059 case ir_relation_unordered_equal:
2060 case ir_relation_equal: return ia32_cc_equal;
2061 case ir_relation_unordered_less:
2062 case ir_relation_less: return ia32_cc_below;
2063 case ir_relation_unordered_less_equal:
2064 case ir_relation_less_equal: return ia32_cc_below_equal;
2065 case ir_relation_unordered_greater:
2066 case ir_relation_greater: return ia32_cc_above;
2067 case ir_relation_unordered_greater_equal:
2068 case ir_relation_greater_equal: return ia32_cc_above_equal;
2069 case ir_relation_unordered_less_greater:
2070 case ir_relation_less_greater: return ia32_cc_not_equal;
2071 case ir_relation_less_equal_greater:
2072 case ir_relation_unordered:
2073 case ir_relation_false:
2074 case ir_relation_true:
2075 /* introduce jump always/jump never? */
2078 panic("Unexpected pnc");
2082 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2084 /* must have a Cmp as input */
2085 ir_relation relation = get_Cmp_relation(cmp);
2086 ir_node *l = get_Cmp_left(cmp);
2087 ir_node *r = get_Cmp_right(cmp);
2088 ir_mode *mode = get_irn_mode(l);
2089 bool overflow_possible;
2092 /* check for bit-test */
2093 if (ia32_cg_config.use_bt
2094 && (relation == ir_relation_equal
2095 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2096 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2098 ir_node *la = get_And_left(l);
2099 ir_node *ra = get_And_right(l);
2106 ir_node *c = get_Shl_left(la);
2107 if (is_Const_1(c) && is_Const_0(r)) {
2108 /* (1 << n) & ra) */
2109 ir_node *n = get_Shl_right(la);
2110 flags = gen_bt(cmp, ra, n);
2111 /* the bit is copied into the CF flag */
2112 if (relation & ir_relation_equal)
2113 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2115 *cc_out = ia32_cc_below; /* test for CF=1 */
2121 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2122 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2123 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2124 * a predecessor node). So add the < bit.
2125 * (Note that we do not want to produce <=> (which can happen for
2126 * unoptimized code), because no x86 flag can represent that */
2127 if (!(relation & ir_relation_equal) && relation & ir_relation_less_greater)
2128 relation |= get_negated_relation(ir_get_possible_cmp_relations(l, r)) & ir_relation_less_greater;
2130 overflow_possible = true;
2131 if (is_Const(r) && is_Const_null(r))
2132 overflow_possible = false;
2134 /* just do a normal transformation of the Cmp */
2135 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2136 flags = be_transform_node(cmp);
2141 * Transforms a Load.
2143 * @return the created ia32 Load node
2145 static ir_node *gen_Load(ir_node *node)
2147 ir_node *old_block = get_nodes_block(node);
2148 ir_node *block = be_transform_node(old_block);
2149 ir_node *ptr = get_Load_ptr(node);
2150 ir_node *mem = get_Load_mem(node);
2151 ir_node *new_mem = be_transform_node(mem);
2152 dbg_info *dbgi = get_irn_dbg_info(node);
2153 ir_mode *mode = get_Load_mode(node);
2154 int throws_exception = ir_throws_exception(node);
2158 ia32_address_t addr;
2160 /* construct load address */
2161 memset(&addr, 0, sizeof(addr));
2162 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2169 base = be_transform_node(base);
2175 idx = be_transform_node(idx);
2178 if (mode_is_float(mode)) {
2179 if (ia32_cg_config.use_sse2) {
2180 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2183 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2187 assert(mode != mode_b);
2189 /* create a conv node with address mode for smaller modes */
2190 if (get_mode_size_bits(mode) < 32) {
2191 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2192 new_mem, noreg_GP, mode);
2194 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2197 ir_set_throws_exception(new_node, throws_exception);
2199 set_irn_pinned(new_node, get_irn_pinned(node));
2200 set_ia32_op_type(new_node, ia32_AddrModeS);
2201 set_ia32_ls_mode(new_node, mode);
2202 set_address(new_node, &addr);
2204 if (get_irn_pinned(node) == op_pin_state_floats) {
2205 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2206 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2207 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2208 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2211 SET_IA32_ORIG_NODE(new_node, node);
2216 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2217 ir_node *ptr, ir_node *other)
2224 /* we only use address mode if we're the only user of the load */
2225 if (get_irn_n_edges(node) > 1)
2228 load = get_Proj_pred(node);
2231 if (get_nodes_block(load) != block)
2234 /* store should have the same pointer as the load */
2235 if (get_Load_ptr(load) != ptr)
2238 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2239 if (other != NULL &&
2240 get_nodes_block(other) == block &&
2241 heights_reachable_in_block(ia32_heights, other, load)) {
2245 if (ia32_prevents_AM(block, load, mem))
2247 /* Store should be attached to the load via mem */
2248 assert(heights_reachable_in_block(ia32_heights, mem, load));
2253 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2254 ir_node *mem, ir_node *ptr, ir_mode *mode,
2255 construct_binop_dest_func *func,
2256 construct_binop_dest_func *func8bit,
2257 match_flags_t flags)
2259 ir_node *src_block = get_nodes_block(node);
2267 ia32_address_mode_t am;
2268 ia32_address_t *addr = &am.addr;
2269 memset(&am, 0, sizeof(am));
2271 assert(flags & match_immediate); /* there is no destam node without... */
2272 commutative = (flags & match_commutative) != 0;
2274 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2275 build_address(&am, op1, ia32_create_am_double_use);
2276 new_op = create_immediate_or_transform(op2, 0);
2277 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2278 build_address(&am, op2, ia32_create_am_double_use);
2279 new_op = create_immediate_or_transform(op1, 0);
2284 if (addr->base == NULL)
2285 addr->base = noreg_GP;
2286 if (addr->index == NULL)
2287 addr->index = noreg_GP;
2288 if (addr->mem == NULL)
2291 dbgi = get_irn_dbg_info(node);
2292 block = be_transform_node(src_block);
2293 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2295 if (get_mode_size_bits(mode) == 8) {
2296 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2298 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2300 set_address(new_node, addr);
2301 set_ia32_op_type(new_node, ia32_AddrModeD);
2302 set_ia32_ls_mode(new_node, mode);
2303 SET_IA32_ORIG_NODE(new_node, node);
2305 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2306 mem_proj = be_transform_node(am.mem_proj);
2307 be_set_transformed_node(am.mem_proj, new_node);
2308 be_set_transformed_node(mem_proj, new_node);
2313 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2314 ir_node *ptr, ir_mode *mode,
2315 construct_unop_dest_func *func)
2317 ir_node *src_block = get_nodes_block(node);
2323 ia32_address_mode_t am;
2324 ia32_address_t *addr = &am.addr;
2326 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2329 memset(&am, 0, sizeof(am));
2330 build_address(&am, op, ia32_create_am_double_use);
2332 dbgi = get_irn_dbg_info(node);
2333 block = be_transform_node(src_block);
2334 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2335 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2336 set_address(new_node, addr);
2337 set_ia32_op_type(new_node, ia32_AddrModeD);
2338 set_ia32_ls_mode(new_node, mode);
2339 SET_IA32_ORIG_NODE(new_node, node);
2341 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2342 mem_proj = be_transform_node(am.mem_proj);
2343 be_set_transformed_node(am.mem_proj, new_node);
2344 be_set_transformed_node(mem_proj, new_node);
2349 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2351 ir_mode *mode = get_irn_mode(node);
2352 ir_node *mux_true = get_Mux_true(node);
2353 ir_node *mux_false = get_Mux_false(node);
2361 ia32_condition_code_t cc;
2362 ia32_address_t addr;
2364 if (get_mode_size_bits(mode) != 8)
2367 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2369 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2375 cond = get_Mux_sel(node);
2376 flags = get_flags_node(cond, &cc);
2377 /* we can't handle the float special cases with SetM */
2378 if (cc & ia32_cc_additional_float_cases)
2381 cc = ia32_negate_condition_code(cc);
2383 build_address_ptr(&addr, ptr, mem);
2385 dbgi = get_irn_dbg_info(node);
2386 block = get_nodes_block(node);
2387 new_block = be_transform_node(block);
2388 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2389 addr.index, addr.mem, flags, cc);
2390 set_address(new_node, &addr);
2391 set_ia32_op_type(new_node, ia32_AddrModeD);
2392 set_ia32_ls_mode(new_node, mode);
2393 SET_IA32_ORIG_NODE(new_node, node);
2398 static ir_node *try_create_dest_am(ir_node *node)
2400 ir_node *val = get_Store_value(node);
2401 ir_node *mem = get_Store_mem(node);
2402 ir_node *ptr = get_Store_ptr(node);
2403 ir_mode *mode = get_irn_mode(val);
2404 unsigned bits = get_mode_size_bits(mode);
2409 /* handle only GP modes for now... */
2410 if (!ia32_mode_needs_gp_reg(mode))
2414 /* store must be the only user of the val node */
2415 if (get_irn_n_edges(val) > 1)
2417 /* skip pointless convs */
2419 ir_node *conv_op = get_Conv_op(val);
2420 ir_mode *pred_mode = get_irn_mode(conv_op);
2421 if (!ia32_mode_needs_gp_reg(pred_mode))
2423 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2431 /* value must be in the same block */
2432 if (get_nodes_block(node) != get_nodes_block(val))
2435 switch (get_irn_opcode(val)) {
2437 op1 = get_Add_left(val);
2438 op2 = get_Add_right(val);
2439 if (ia32_cg_config.use_incdec) {
2440 if (is_Const_1(op2)) {
2441 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2443 } else if (is_Const_Minus_1(op2)) {
2444 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2448 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2449 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2450 match_commutative | match_immediate);
2453 op1 = get_Sub_left(val);
2454 op2 = get_Sub_right(val);
2455 if (is_Const(op2)) {
2456 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2458 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2459 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2463 op1 = get_And_left(val);
2464 op2 = get_And_right(val);
2465 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2466 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2467 match_commutative | match_immediate);
2470 op1 = get_Or_left(val);
2471 op2 = get_Or_right(val);
2472 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2473 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2474 match_commutative | match_immediate);
2477 op1 = get_Eor_left(val);
2478 op2 = get_Eor_right(val);
2479 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2480 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2481 match_commutative | match_immediate);
2484 op1 = get_Shl_left(val);
2485 op2 = get_Shl_right(val);
2486 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2487 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2491 op1 = get_Shr_left(val);
2492 op2 = get_Shr_right(val);
2493 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2494 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2498 op1 = get_Shrs_left(val);
2499 op2 = get_Shrs_right(val);
2500 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2501 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2505 op1 = get_Rotl_left(val);
2506 op2 = get_Rotl_right(val);
2507 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2508 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2511 /* TODO: match ROR patterns... */
2513 new_node = try_create_SetMem(val, ptr, mem);
2517 op1 = get_Minus_op(val);
2518 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2521 /* should be lowered already */
2522 assert(mode != mode_b);
2523 op1 = get_Not_op(val);
2524 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2530 if (new_node != NULL) {
2531 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2532 get_irn_pinned(node) == op_pin_state_pinned) {
2533 set_irn_pinned(new_node, op_pin_state_pinned);
2540 static bool possible_int_mode_for_fp(ir_mode *mode)
2544 if (!mode_is_signed(mode))
2546 size = get_mode_size_bits(mode);
2547 if (size != 16 && size != 32)
2552 static int is_float_to_int_conv(const ir_node *node)
2554 ir_mode *mode = get_irn_mode(node);
2558 if (!possible_int_mode_for_fp(mode))
2563 conv_op = get_Conv_op(node);
2564 conv_mode = get_irn_mode(conv_op);
2566 if (!mode_is_float(conv_mode))
2573 * Transform a Store(floatConst) into a sequence of
2576 * @return the created ia32 Store node
2578 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2580 ir_mode *mode = get_irn_mode(cns);
2581 unsigned size = get_mode_size_bytes(mode);
2582 ir_tarval *tv = get_Const_tarval(cns);
2583 ir_node *block = get_nodes_block(node);
2584 ir_node *new_block = be_transform_node(block);
2585 ir_node *ptr = get_Store_ptr(node);
2586 ir_node *mem = get_Store_mem(node);
2587 dbg_info *dbgi = get_irn_dbg_info(node);
2590 int throws_exception = ir_throws_exception(node);
2592 ia32_address_t addr;
2594 build_address_ptr(&addr, ptr, mem);
2601 val= get_tarval_sub_bits(tv, ofs) |
2602 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2603 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2604 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2607 } else if (size >= 2) {
2608 val= get_tarval_sub_bits(tv, ofs) |
2609 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2613 panic("invalid size of Store float to mem (%+F)", node);
2615 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2617 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2618 addr.index, addr.mem, imm);
2619 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2621 ir_set_throws_exception(new_node, throws_exception);
2622 set_irn_pinned(new_node, get_irn_pinned(node));
2623 set_ia32_op_type(new_node, ia32_AddrModeD);
2624 set_ia32_ls_mode(new_node, mode);
2625 set_address(new_node, &addr);
2626 SET_IA32_ORIG_NODE(new_node, node);
2633 addr.offset += delta;
2634 } while (size != 0);
2637 return new_rd_Sync(dbgi, new_block, i, ins);
2639 return get_Proj_pred(ins[0]);
2644 * Generate a vfist or vfisttp instruction.
2646 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2647 ir_node *index, ir_node *mem, ir_node *val)
2649 if (ia32_cg_config.use_fisttp) {
2650 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2651 if other users exists */
2652 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2653 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_vfisttp_res);
2654 be_new_Keep(block, 1, &value);
2658 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2661 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2667 * Transforms a general (no special case) Store.
2669 * @return the created ia32 Store node
2671 static ir_node *gen_general_Store(ir_node *node)
2673 ir_node *val = get_Store_value(node);
2674 ir_mode *mode = get_irn_mode(val);
2675 ir_node *block = get_nodes_block(node);
2676 ir_node *new_block = be_transform_node(block);
2677 ir_node *ptr = get_Store_ptr(node);
2678 ir_node *mem = get_Store_mem(node);
2679 dbg_info *dbgi = get_irn_dbg_info(node);
2680 int throws_exception = ir_throws_exception(node);
2683 ia32_address_t addr;
2685 /* check for destination address mode */
2686 new_node = try_create_dest_am(node);
2687 if (new_node != NULL)
2690 /* construct store address */
2691 memset(&addr, 0, sizeof(addr));
2692 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2694 if (addr.base == NULL) {
2695 addr.base = noreg_GP;
2697 addr.base = be_transform_node(addr.base);
2700 if (addr.index == NULL) {
2701 addr.index = noreg_GP;
2703 addr.index = be_transform_node(addr.index);
2705 addr.mem = be_transform_node(mem);
2707 if (mode_is_float(mode)) {
2708 /* Convs before stores are unnecessary if the mode is the same. */
2709 while (is_Conv(val) && mode == get_irn_mode(val)) {
2710 ir_node *op = get_Conv_op(val);
2711 if (!mode_is_float(get_irn_mode(op)))
2715 new_val = be_transform_node(val);
2716 if (ia32_cg_config.use_sse2) {
2717 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2718 addr.index, addr.mem, new_val);
2720 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2721 addr.index, addr.mem, new_val, mode);
2723 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2724 val = get_Conv_op(val);
2726 /* TODO: is this optimisation still necessary at all (middleend)? */
2727 /* We can skip ALL float->float up-Convs before stores. */
2728 while (is_Conv(val)) {
2729 ir_node *op = get_Conv_op(val);
2730 if (!mode_is_float(get_irn_mode(op)))
2732 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2736 new_val = be_transform_node(val);
2737 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2739 new_val = create_immediate_or_transform(val, 0);
2740 assert(mode != mode_b);
2742 if (get_mode_size_bits(mode) == 8) {
2743 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2744 addr.index, addr.mem, new_val);
2746 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2747 addr.index, addr.mem, new_val);
2750 ir_set_throws_exception(new_node, throws_exception);
2752 set_irn_pinned(new_node, get_irn_pinned(node));
2753 set_ia32_op_type(new_node, ia32_AddrModeD);
2754 set_ia32_ls_mode(new_node, mode);
2756 set_address(new_node, &addr);
2757 SET_IA32_ORIG_NODE(new_node, node);
2763 * Transforms a Store.
2765 * @return the created ia32 Store node
2767 static ir_node *gen_Store(ir_node *node)
2769 ir_node *val = get_Store_value(node);
2770 ir_mode *mode = get_irn_mode(val);
2772 if (mode_is_float(mode) && is_Const(val)) {
2773 /* We can transform every floating const store
2774 into a sequence of integer stores.
2775 If the constant is already in a register,
2776 it would be better to use it, but we don't
2777 have this information here. */
2778 return gen_float_const_Store(node, val);
2780 return gen_general_Store(node);
2784 * Transforms a Switch.
2786 * @return the created ia32 SwitchJmp node
2788 static ir_node *gen_Switch(ir_node *node)
2790 dbg_info *dbgi = get_irn_dbg_info(node);
2791 ir_graph *irg = get_irn_irg(node);
2792 ir_node *block = be_transform_node(get_nodes_block(node));
2793 ir_node *sel = get_Switch_selector(node);
2794 ir_node *new_sel = be_transform_node(sel);
2795 ir_mode *sel_mode = get_irn_mode(sel);
2796 const ir_switch_table *table = get_Switch_table(node);
2797 unsigned n_outs = get_Switch_n_outs(node);
2801 assert(get_mode_size_bits(get_irn_mode(sel)) <= 32);
2802 if (get_mode_size_bits(sel_mode) != 32)
2803 new_sel = create_upconv(new_sel, sel);
2805 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2806 set_entity_visibility(entity, ir_visibility_private);
2807 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2809 table = ir_switch_table_duplicate(irg, table);
2811 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2812 set_ia32_am_scale(new_node, 2);
2813 set_ia32_am_sc(new_node, entity);
2814 set_ia32_op_type(new_node, ia32_AddrModeS);
2815 set_ia32_ls_mode(new_node, mode_Iu);
2816 SET_IA32_ORIG_NODE(new_node, node);
2817 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2818 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2824 * Transform a Cond node.
2826 static ir_node *gen_Cond(ir_node *node)
2828 ir_node *block = get_nodes_block(node);
2829 ir_node *new_block = be_transform_node(block);
2830 dbg_info *dbgi = get_irn_dbg_info(node);
2831 ir_node *sel = get_Cond_selector(node);
2832 ir_node *flags = NULL;
2834 ia32_condition_code_t cc;
2836 /* we get flags from a Cmp */
2837 flags = get_flags_node(sel, &cc);
2839 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2840 SET_IA32_ORIG_NODE(new_node, node);
2846 * Transform a be_Copy.
2848 static ir_node *gen_be_Copy(ir_node *node)
2850 ir_node *new_node = be_duplicate_node(node);
2851 ir_mode *mode = get_irn_mode(new_node);
2853 if (ia32_mode_needs_gp_reg(mode)) {
2854 set_irn_mode(new_node, mode_Iu);
2860 static ir_node *create_Fucom(ir_node *node)
2862 dbg_info *dbgi = get_irn_dbg_info(node);
2863 ir_node *block = get_nodes_block(node);
2864 ir_node *new_block = be_transform_node(block);
2865 ir_node *left = get_Cmp_left(node);
2866 ir_node *new_left = be_transform_node(left);
2867 ir_node *right = get_Cmp_right(node);
2868 ir_mode *cmp_mode = get_irn_mode(left);
2871 check_x87_floatmode(cmp_mode);
2873 if (ia32_cg_config.use_fucomi) {
2874 new_right = be_transform_node(right);
2875 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2877 set_ia32_commutative(new_node);
2878 SET_IA32_ORIG_NODE(new_node, node);
2880 if (is_Const_0(right)) {
2881 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2883 new_right = be_transform_node(right);
2884 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2885 set_ia32_commutative(new_node);
2888 SET_IA32_ORIG_NODE(new_node, node);
2890 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2891 SET_IA32_ORIG_NODE(new_node, node);
2897 static ir_node *create_Ucomi(ir_node *node)
2899 dbg_info *dbgi = get_irn_dbg_info(node);
2900 ir_node *src_block = get_nodes_block(node);
2901 ir_node *new_block = be_transform_node(src_block);
2902 ir_node *left = get_Cmp_left(node);
2903 ir_node *right = get_Cmp_right(node);
2905 ia32_address_mode_t am;
2906 ia32_address_t *addr = &am.addr;
2908 match_arguments(&am, src_block, left, right, NULL,
2909 match_commutative | match_am);
2911 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2912 addr->mem, am.new_op1, am.new_op2,
2914 set_am_attributes(new_node, &am);
2916 SET_IA32_ORIG_NODE(new_node, node);
2918 new_node = fix_mem_proj(new_node, &am);
2924 * returns true if it is assured, that the upper bits of a node are "clean"
2925 * which means for a 16 or 8 bit value, that the upper bits in the register
2926 * are 0 for unsigned and a copy of the last significant bit for signed
2929 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2931 assert(ia32_mode_needs_gp_reg(mode));
2932 if (get_mode_size_bits(mode) >= 32)
2935 if (is_Proj(transformed_node))
2936 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2938 switch (get_ia32_irn_opcode(transformed_node)) {
2939 case iro_ia32_Conv_I2I:
2940 case iro_ia32_Conv_I2I8Bit: {
2941 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2942 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2944 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2951 if (mode_is_signed(mode)) {
2952 return false; /* TODO handle signed modes */
2954 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2955 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2956 const ia32_immediate_attr_t *attr
2957 = get_ia32_immediate_attr_const(right);
2958 if (attr->symconst == 0 &&
2959 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2963 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2967 /* TODO too conservative if shift amount is constant */
2968 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2971 if (!mode_is_signed(mode)) {
2973 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2974 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2976 /* TODO if one is known to be zero extended, then || is sufficient */
2981 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2982 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2984 case iro_ia32_Const:
2985 case iro_ia32_Immediate: {
2986 const ia32_immediate_attr_t *attr =
2987 get_ia32_immediate_attr_const(transformed_node);
2988 if (mode_is_signed(mode)) {
2989 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2990 return shifted == 0 || shifted == -1;
2992 unsigned long shifted = (unsigned long)attr->offset;
2993 shifted >>= get_mode_size_bits(mode)-1;
2995 return shifted == 0;
3005 * Generate code for a Cmp.
3007 static ir_node *gen_Cmp(ir_node *node)
3009 dbg_info *dbgi = get_irn_dbg_info(node);
3010 ir_node *block = get_nodes_block(node);
3011 ir_node *new_block = be_transform_node(block);
3012 ir_node *left = get_Cmp_left(node);
3013 ir_node *right = get_Cmp_right(node);
3014 ir_mode *cmp_mode = get_irn_mode(left);
3016 ia32_address_mode_t am;
3017 ia32_address_t *addr = &am.addr;
3019 if (mode_is_float(cmp_mode)) {
3020 if (ia32_cg_config.use_sse2) {
3021 return create_Ucomi(node);
3023 return create_Fucom(node);
3027 assert(ia32_mode_needs_gp_reg(cmp_mode));
3029 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3030 if (is_Const_0(right) &&
3032 get_irn_n_edges(left) == 1) {
3033 /* Test(and_left, and_right) */
3034 ir_node *and_left = get_And_left(left);
3035 ir_node *and_right = get_And_right(left);
3037 /* matze: code here used mode instead of cmd_mode, I think it is always
3038 * the same as cmp_mode, but I leave this here to see if this is really
3041 assert(get_irn_mode(and_left) == cmp_mode);
3043 match_arguments(&am, block, and_left, and_right, NULL,
3045 match_am | match_8bit_am | match_16bit_am |
3046 match_am_and_immediates | match_immediate);
3048 /* use 32bit compare mode if possible since the opcode is smaller */
3049 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3050 upper_bits_clean(am.new_op2, cmp_mode)) {
3051 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3054 if (get_mode_size_bits(cmp_mode) == 8) {
3055 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3056 addr->index, addr->mem,
3057 am.new_op1, am.new_op2,
3060 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base,
3061 addr->index, addr->mem, am.new_op1,
3062 am.new_op2, am.ins_permuted);
3065 /* Cmp(left, right) */
3066 match_arguments(&am, block, left, right, NULL,
3067 match_commutative | match_am | match_8bit_am |
3068 match_16bit_am | match_am_and_immediates |
3070 /* use 32bit compare mode if possible since the opcode is smaller */
3071 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3072 upper_bits_clean(am.new_op2, cmp_mode)) {
3073 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3076 if (get_mode_size_bits(cmp_mode) == 8) {
3077 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3078 addr->index, addr->mem, am.new_op1,
3079 am.new_op2, am.ins_permuted);
3081 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3082 addr->mem, am.new_op1, am.new_op2,
3086 set_am_attributes(new_node, &am);
3087 set_ia32_ls_mode(new_node, cmp_mode);
3089 SET_IA32_ORIG_NODE(new_node, node);
3091 new_node = fix_mem_proj(new_node, &am);
3096 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3097 ia32_condition_code_t cc)
3099 dbg_info *dbgi = get_irn_dbg_info(node);
3100 ir_node *block = get_nodes_block(node);
3101 ir_node *new_block = be_transform_node(block);
3102 ir_node *val_true = get_Mux_true(node);
3103 ir_node *val_false = get_Mux_false(node);
3105 ia32_address_mode_t am;
3106 ia32_address_t *addr;
3108 assert(ia32_cg_config.use_cmov);
3109 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3113 match_arguments(&am, block, val_false, val_true, flags,
3114 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3116 if (am.ins_permuted)
3117 cc = ia32_negate_condition_code(cc);
3119 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3120 addr->mem, am.new_op1, am.new_op2, new_flags,
3122 set_am_attributes(new_node, &am);
3124 SET_IA32_ORIG_NODE(new_node, node);
3126 new_node = fix_mem_proj(new_node, &am);
3132 * Creates a ia32 Setcc instruction.
3134 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3135 ir_node *flags, ia32_condition_code_t cc,
3138 ir_mode *mode = get_irn_mode(orig_node);
3141 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3142 SET_IA32_ORIG_NODE(new_node, orig_node);
3144 /* we might need to conv the result up */
3145 if (get_mode_size_bits(mode) > 8) {
3146 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3147 nomem, new_node, mode_Bu);
3148 SET_IA32_ORIG_NODE(new_node, orig_node);
3155 * Create instruction for an unsigned Difference or Zero.
3157 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3159 ir_mode *mode = get_irn_mode(psi);
3169 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3170 match_mode_neutral | match_am | match_immediate | match_two_users);
3172 block = get_nodes_block(new_node);
3174 if (is_Proj(new_node)) {
3175 sub = get_Proj_pred(new_node);
3178 set_irn_mode(sub, mode_T);
3179 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3181 assert(is_ia32_Sub(sub));
3182 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3184 dbgi = get_irn_dbg_info(psi);
3185 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3186 set_ia32_ls_mode(sbb, mode_Iu);
3187 notn = new_bd_ia32_Not(dbgi, block, sbb);
3189 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3190 set_ia32_ls_mode(new_node, mode_Iu);
3191 set_ia32_commutative(new_node);
3196 * Create an const array of two float consts.
3198 * @param c0 the first constant
3199 * @param c1 the second constant
3200 * @param new_mode IN/OUT for the mode of the constants, if NULL
3201 * smallest possible mode will be used
3203 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3206 ir_mode *mode = *new_mode;
3208 ir_initializer_t *initializer;
3209 ir_tarval *tv0 = get_Const_tarval(c0);
3210 ir_tarval *tv1 = get_Const_tarval(c1);
3213 /* detect the best mode for the constants */
3214 mode = get_tarval_mode(tv0);
3216 if (mode != mode_F) {
3217 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3218 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3220 tv0 = tarval_convert_to(tv0, mode);
3221 tv1 = tarval_convert_to(tv1, mode);
3222 } else if (mode != mode_D) {
3223 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3224 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3226 tv0 = tarval_convert_to(tv0, mode);
3227 tv1 = tarval_convert_to(tv1, mode);
3234 tp = ia32_get_prim_type(mode);
3235 tp = ia32_create_float_array(tp);
3237 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3239 set_entity_ld_ident(ent, get_entity_ident(ent));
3240 set_entity_visibility(ent, ir_visibility_private);
3241 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3243 initializer = create_initializer_compound(2);
3245 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3246 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3248 set_entity_initializer(ent, initializer);
3255 * Possible transformations for creating a Setcc.
3257 enum setcc_transform_insn {
3270 typedef struct setcc_transform {
3272 ia32_condition_code_t cc;
3274 enum setcc_transform_insn transform;
3278 } setcc_transform_t;
3281 * Setcc can only handle 0 and 1 result.
3282 * Find a transformation that creates 0 and 1 from
3285 static void find_const_transform(ia32_condition_code_t cc,
3286 ir_tarval *t, ir_tarval *f,
3287 setcc_transform_t *res)
3293 if (tarval_is_null(t)) {
3297 cc = ia32_negate_condition_code(cc);
3298 } else if (tarval_cmp(t, f) == ir_relation_less) {
3299 // now, t is the bigger one
3303 cc = ia32_negate_condition_code(cc);
3307 if (! tarval_is_null(f)) {
3308 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3311 res->steps[step].transform = SETCC_TR_ADD;
3313 if (t == tarval_bad)
3314 panic("constant subtract failed");
3315 if (! tarval_is_long(f))
3316 panic("tarval is not long");
3318 res->steps[step].val = get_tarval_long(f);
3320 f = tarval_sub(f, f, NULL);
3321 assert(tarval_is_null(f));
3324 if (tarval_is_one(t)) {
3325 res->steps[step].transform = SETCC_TR_SET;
3326 res->num_steps = ++step;
3330 if (tarval_is_minus_one(t)) {
3331 res->steps[step].transform = SETCC_TR_NEG;
3333 res->steps[step].transform = SETCC_TR_SET;
3334 res->num_steps = ++step;
3337 if (tarval_is_long(t)) {
3338 long v = get_tarval_long(t);
3340 res->steps[step].val = 0;
3343 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3345 res->steps[step].transform = SETCC_TR_LEAxx;
3346 res->steps[step].scale = 3; /* (a << 3) + a */
3349 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3351 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3352 res->steps[step].scale = 3; /* (a << 3) */
3355 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3357 res->steps[step].transform = SETCC_TR_LEAxx;
3358 res->steps[step].scale = 2; /* (a << 2) + a */
3361 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3363 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3364 res->steps[step].scale = 2; /* (a << 2) */
3367 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3369 res->steps[step].transform = SETCC_TR_LEAxx;
3370 res->steps[step].scale = 1; /* (a << 1) + a */
3373 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3375 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3376 res->steps[step].scale = 1; /* (a << 1) */
3379 res->num_steps = step;
3382 if (! tarval_is_single_bit(t)) {
3383 res->steps[step].transform = SETCC_TR_AND;
3384 res->steps[step].val = v;
3386 res->steps[step].transform = SETCC_TR_NEG;
3388 int val = get_tarval_lowest_bit(t);
3391 res->steps[step].transform = SETCC_TR_SHL;
3392 res->steps[step].scale = val;
3396 res->steps[step].transform = SETCC_TR_SET;
3397 res->num_steps = ++step;
3400 panic("tarval is not long");
3404 * Transforms a Mux node into some code sequence.
3406 * @return The transformed node.
3408 static ir_node *gen_Mux(ir_node *node)
3410 dbg_info *dbgi = get_irn_dbg_info(node);
3411 ir_node *block = get_nodes_block(node);
3412 ir_node *new_block = be_transform_node(block);
3413 ir_node *mux_true = get_Mux_true(node);
3414 ir_node *mux_false = get_Mux_false(node);
3415 ir_node *sel = get_Mux_sel(node);
3416 ir_mode *mode = get_irn_mode(node);
3420 ia32_condition_code_t cc;
3422 assert(get_irn_mode(sel) == mode_b);
3424 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3426 if (ia32_mode_needs_gp_reg(mode)) {
3427 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3430 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3431 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3435 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3436 if (mode_is_float(mode)) {
3437 ir_node *cmp_left = get_Cmp_left(sel);
3438 ir_node *cmp_right = get_Cmp_right(sel);
3439 ir_relation relation = get_Cmp_relation(sel);
3441 if (ia32_cg_config.use_sse2) {
3442 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3443 if (cmp_left == mux_true && cmp_right == mux_false) {
3444 /* Mux(a <= b, a, b) => MIN */
3445 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3446 match_commutative | match_am | match_two_users);
3447 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3448 /* Mux(a <= b, b, a) => MAX */
3449 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3450 match_commutative | match_am | match_two_users);
3452 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3453 if (cmp_left == mux_true && cmp_right == mux_false) {
3454 /* Mux(a >= b, a, b) => MAX */
3455 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3456 match_commutative | match_am | match_two_users);
3457 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3458 /* Mux(a >= b, b, a) => MIN */
3459 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3460 match_commutative | match_am | match_two_users);
3465 if (is_Const(mux_true) && is_Const(mux_false)) {
3466 ia32_address_mode_t am;
3471 flags = get_flags_node(sel, &cc);
3472 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3474 if (ia32_cg_config.use_sse2) {
3475 /* cannot load from different mode on SSE */
3478 /* x87 can load any mode */
3482 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3484 if (new_mode == mode_F) {
3486 } else if (new_mode == mode_D) {
3488 } else if (new_mode == ia32_mode_E) {
3489 /* arg, shift 16 NOT supported */
3491 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3493 panic("Unsupported constant size");
3496 am.ls_mode = new_mode;
3497 am.addr.base = get_symconst_base();
3498 am.addr.index = new_node;
3499 am.addr.mem = nomem;
3501 am.addr.scale = scale;
3502 am.addr.use_frame = 0;
3503 am.addr.tls_segment = false;
3504 am.addr.frame_entity = NULL;
3505 am.addr.symconst_sign = 0;
3506 am.mem_proj = am.addr.mem;
3507 am.op_type = ia32_AddrModeS;
3510 am.pinned = op_pin_state_floats;
3512 am.ins_permuted = false;
3514 if (ia32_cg_config.use_sse2)
3515 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3517 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3518 set_am_attributes(load, &am);
3520 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3522 panic("cannot transform floating point Mux");
3525 assert(ia32_mode_needs_gp_reg(mode));
3528 ir_node *cmp_left = get_Cmp_left(sel);
3529 ir_node *cmp_right = get_Cmp_right(sel);
3530 ir_relation relation = get_Cmp_relation(sel);
3531 ir_node *val_true = mux_true;
3532 ir_node *val_false = mux_false;
3534 if (is_Const(val_true) && is_Const_null(val_true)) {
3535 ir_node *tmp = val_false;
3536 val_false = val_true;
3538 relation = get_negated_relation(relation);
3540 if (is_Const_0(val_false) && is_Sub(val_true)) {
3541 if ((relation & ir_relation_greater)
3542 && get_Sub_left(val_true) == cmp_left
3543 && get_Sub_right(val_true) == cmp_right) {
3544 return create_doz(node, cmp_left, cmp_right);
3546 if ((relation & ir_relation_less)
3547 && get_Sub_left(val_true) == cmp_right
3548 && get_Sub_right(val_true) == cmp_left) {
3549 return create_doz(node, cmp_right, cmp_left);
3554 flags = get_flags_node(sel, &cc);
3556 if (is_Const(mux_true) && is_Const(mux_false)) {
3557 /* both are const, good */
3558 ir_tarval *tv_true = get_Const_tarval(mux_true);
3559 ir_tarval *tv_false = get_Const_tarval(mux_false);
3560 setcc_transform_t res;
3563 find_const_transform(cc, tv_true, tv_false, &res);
3565 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3568 switch (res.steps[step].transform) {
3570 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3571 add_ia32_am_offs_int(new_node, res.steps[step].val);
3573 case SETCC_TR_ADDxx:
3574 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3577 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3578 set_ia32_am_scale(new_node, res.steps[step].scale);
3579 set_ia32_am_offs_int(new_node, res.steps[step].val);
3581 case SETCC_TR_LEAxx:
3582 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3583 set_ia32_am_scale(new_node, res.steps[step].scale);
3584 set_ia32_am_offs_int(new_node, res.steps[step].val);
3587 imm = ia32_immediate_from_long(res.steps[step].scale);
3588 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3591 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3594 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3597 imm = ia32_immediate_from_long(res.steps[step].val);
3598 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3601 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3604 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3607 panic("unknown setcc transform");
3611 new_node = create_CMov(node, sel, flags, cc);
3618 * Create a conversion from x87 state register to general purpose.
3620 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3622 ir_node *block = be_transform_node(get_nodes_block(node));
3623 ir_node *op = get_Conv_op(node);
3624 ir_node *new_op = be_transform_node(op);
3625 ir_graph *irg = current_ir_graph;
3626 dbg_info *dbgi = get_irn_dbg_info(node);
3627 ir_mode *mode = get_irn_mode(node);
3628 ir_node *frame = get_irg_frame(irg);
3629 ir_node *fist, *load, *mem;
3631 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3632 set_irn_pinned(fist, op_pin_state_floats);
3633 set_ia32_use_frame(fist);
3634 set_ia32_op_type(fist, ia32_AddrModeD);
3636 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3637 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3639 assert(get_mode_size_bits(mode) <= 32);
3640 /* exception we can only store signed 32 bit integers, so for unsigned
3641 we store a 64bit (signed) integer and load the lower bits */
3642 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3643 set_ia32_ls_mode(fist, mode_Ls);
3645 set_ia32_ls_mode(fist, mode_Is);
3647 SET_IA32_ORIG_NODE(fist, node);
3650 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3652 set_irn_pinned(load, op_pin_state_floats);
3653 set_ia32_use_frame(load);
3654 set_ia32_op_type(load, ia32_AddrModeS);
3655 set_ia32_ls_mode(load, mode_Is);
3656 if (get_ia32_ls_mode(fist) == mode_Ls) {
3657 ia32_attr_t *attr = get_ia32_attr(load);
3658 attr->data.need_64bit_stackent = 1;
3660 ia32_attr_t *attr = get_ia32_attr(load);
3661 attr->data.need_32bit_stackent = 1;
3663 SET_IA32_ORIG_NODE(load, node);
3665 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3669 * Creates a x87 Conv by placing a Store and a Load
3671 static ir_node *gen_x87_conv(ir_mode *tgt_mode, ir_node *node)
3673 ir_node *block = get_nodes_block(node);
3674 ir_graph *irg = get_Block_irg(block);
3675 dbg_info *dbgi = get_irn_dbg_info(node);
3676 ir_node *frame = get_irg_frame(irg);
3678 ir_node *store, *load;
3681 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3682 set_ia32_use_frame(store);
3683 set_ia32_op_type(store, ia32_AddrModeD);
3684 SET_IA32_ORIG_NODE(store, node);
3686 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3688 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3689 set_ia32_use_frame(load);
3690 set_ia32_op_type(load, ia32_AddrModeS);
3691 SET_IA32_ORIG_NODE(load, node);
3693 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_vfld_res);
3697 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3698 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3700 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3702 func = get_mode_size_bits(mode) == 8 ?
3703 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3704 return func(dbgi, block, base, index, mem, val, mode);
3708 * Create a conversion from general purpose to x87 register
3710 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3712 ir_node *src_block = get_nodes_block(node);
3713 ir_node *block = be_transform_node(src_block);
3714 ir_graph *irg = get_Block_irg(block);
3715 dbg_info *dbgi = get_irn_dbg_info(node);
3716 ir_node *op = get_Conv_op(node);
3717 ir_node *new_op = NULL;
3719 ir_mode *store_mode;
3725 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3726 if (possible_int_mode_for_fp(src_mode)) {
3727 ia32_address_mode_t am;
3729 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3730 if (am.op_type == ia32_AddrModeS) {
3731 ia32_address_t *addr = &am.addr;
3733 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3734 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3736 set_am_attributes(fild, &am);
3737 SET_IA32_ORIG_NODE(fild, node);
3739 fix_mem_proj(fild, &am);
3744 if (new_op == NULL) {
3745 new_op = be_transform_node(op);
3748 mode = get_irn_mode(op);
3750 /* first convert to 32 bit signed if necessary */
3751 if (get_mode_size_bits(src_mode) < 32) {
3752 if (!upper_bits_clean(new_op, src_mode)) {
3753 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3754 SET_IA32_ORIG_NODE(new_op, node);
3759 assert(get_mode_size_bits(mode) == 32);
3762 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3764 set_ia32_use_frame(store);
3765 set_ia32_op_type(store, ia32_AddrModeD);
3766 set_ia32_ls_mode(store, mode_Iu);
3768 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3770 /* exception for 32bit unsigned, do a 64bit spill+load */
3771 if (!mode_is_signed(mode)) {
3774 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3776 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3777 noreg_GP, nomem, zero_const);
3778 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3780 set_ia32_use_frame(zero_store);
3781 set_ia32_op_type(zero_store, ia32_AddrModeD);
3782 add_ia32_am_offs_int(zero_store, 4);
3783 set_ia32_ls_mode(zero_store, mode_Iu);
3785 in[0] = zero_store_mem;
3788 store_mem = new_rd_Sync(dbgi, block, 2, in);
3789 store_mode = mode_Ls;
3791 store_mode = mode_Is;
3795 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3797 set_ia32_use_frame(fild);
3798 set_ia32_op_type(fild, ia32_AddrModeS);
3799 set_ia32_ls_mode(fild, store_mode);
3801 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3807 * Create a conversion from one integer mode into another one
3809 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3810 dbg_info *dbgi, ir_node *block, ir_node *op,
3813 ir_node *new_block = be_transform_node(block);
3815 ir_mode *smaller_mode;
3816 ia32_address_mode_t am;
3817 ia32_address_t *addr = &am.addr;
3820 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3821 smaller_mode = src_mode;
3823 smaller_mode = tgt_mode;
3826 #ifdef DEBUG_libfirm
3828 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3833 match_arguments(&am, block, NULL, op, NULL,
3834 match_am | match_8bit_am | match_16bit_am);
3836 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3837 /* unnecessary conv. in theory it shouldn't have been AM */
3838 assert(is_ia32_NoReg_GP(addr->base));
3839 assert(is_ia32_NoReg_GP(addr->index));
3840 assert(is_NoMem(addr->mem));
3841 assert(am.addr.offset == 0);
3842 assert(am.addr.symconst_ent == NULL);
3846 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3847 addr->mem, am.new_op2, smaller_mode);
3848 set_am_attributes(new_node, &am);
3849 /* match_arguments assume that out-mode = in-mode, this isn't true here
3851 set_ia32_ls_mode(new_node, smaller_mode);
3852 SET_IA32_ORIG_NODE(new_node, node);
3853 new_node = fix_mem_proj(new_node, &am);
3858 * Transforms a Conv node.
3860 * @return The created ia32 Conv node
3862 static ir_node *gen_Conv(ir_node *node)
3864 ir_node *block = get_nodes_block(node);
3865 ir_node *new_block = be_transform_node(block);
3866 ir_node *op = get_Conv_op(node);
3867 ir_node *new_op = NULL;
3868 dbg_info *dbgi = get_irn_dbg_info(node);
3869 ir_mode *src_mode = get_irn_mode(op);
3870 ir_mode *tgt_mode = get_irn_mode(node);
3871 int src_bits = get_mode_size_bits(src_mode);
3872 int tgt_bits = get_mode_size_bits(tgt_mode);
3873 ir_node *res = NULL;
3875 assert(!mode_is_int(src_mode) || src_bits <= 32);
3876 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3878 /* modeB -> X should already be lowered by the lower_mode_b pass */
3879 if (src_mode == mode_b) {
3880 panic("ConvB not lowered %+F", node);
3883 if (src_mode == tgt_mode) {
3884 /* this should be optimized already, but who knows... */
3885 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3886 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3887 return be_transform_node(op);
3890 if (mode_is_float(src_mode)) {
3891 new_op = be_transform_node(op);
3892 /* we convert from float ... */
3893 if (mode_is_float(tgt_mode)) {
3895 if (ia32_cg_config.use_sse2) {
3896 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3897 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3899 set_ia32_ls_mode(res, tgt_mode);
3901 if (src_bits < tgt_bits) {
3902 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3905 res = gen_x87_conv(tgt_mode, new_op);
3906 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3912 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3913 if (ia32_cg_config.use_sse2) {
3914 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3916 set_ia32_ls_mode(res, src_mode);
3918 return gen_x87_fp_to_gp(node);
3922 /* we convert from int ... */
3923 if (mode_is_float(tgt_mode)) {
3925 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3926 if (ia32_cg_config.use_sse2) {
3927 new_op = be_transform_node(op);
3928 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3930 set_ia32_ls_mode(res, tgt_mode);
3932 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3933 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3934 res = gen_x87_gp_to_fp(node, src_mode);
3936 /* we need a float-conv, if the int mode has more bits than the
3938 if (float_mantissa < int_mantissa) {
3939 res = gen_x87_conv(tgt_mode, res);
3940 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3944 } else if (tgt_mode == mode_b) {
3945 /* mode_b lowering already took care that we only have 0/1 values */
3946 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3947 src_mode, tgt_mode));
3948 return be_transform_node(op);
3951 if (src_bits == tgt_bits) {
3952 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3953 src_mode, tgt_mode));
3954 return be_transform_node(op);
3957 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3965 static ir_node *create_immediate_or_transform(ir_node *node,
3966 char immediate_constraint_type)
3968 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3969 if (new_node == NULL) {
3970 new_node = be_transform_node(node);
3976 * Transforms a FrameAddr into an ia32 Add.
3978 static ir_node *gen_be_FrameAddr(ir_node *node)
3980 ir_node *block = be_transform_node(get_nodes_block(node));
3981 ir_node *op = be_get_FrameAddr_frame(node);
3982 ir_node *new_op = be_transform_node(op);
3983 dbg_info *dbgi = get_irn_dbg_info(node);
3986 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3987 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3988 set_ia32_use_frame(new_node);
3990 SET_IA32_ORIG_NODE(new_node, node);
3996 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3998 static ir_node *gen_be_Return(ir_node *node)
4000 ir_graph *irg = current_ir_graph;
4001 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4002 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4003 ir_node *new_ret_val = be_transform_node(ret_val);
4004 ir_node *new_ret_mem = be_transform_node(ret_mem);
4005 ir_entity *ent = get_irg_entity(irg);
4006 ir_type *tp = get_entity_type(ent);
4007 dbg_info *dbgi = get_irn_dbg_info(node);
4008 ir_node *block = be_transform_node(get_nodes_block(node));
4022 assert(ret_val != NULL);
4023 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4024 return be_duplicate_node(node);
4027 res_type = get_method_res_type(tp, 0);
4029 if (! is_Primitive_type(res_type)) {
4030 return be_duplicate_node(node);
4033 mode = get_type_mode(res_type);
4034 if (! mode_is_float(mode)) {
4035 return be_duplicate_node(node);
4038 assert(get_method_n_ress(tp) == 1);
4040 frame = get_irg_frame(irg);
4042 /* store xmm0 onto stack */
4043 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4044 new_ret_mem, new_ret_val);
4045 set_ia32_ls_mode(sse_store, mode);
4046 set_ia32_op_type(sse_store, ia32_AddrModeD);
4047 set_ia32_use_frame(sse_store);
4048 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4050 /* load into x87 register */
4051 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4052 set_ia32_op_type(fld, ia32_AddrModeS);
4053 set_ia32_use_frame(fld);
4055 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4056 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4058 /* create a new return */
4059 arity = get_irn_arity(node);
4060 in = ALLOCAN(ir_node*, arity);
4061 pop = be_Return_get_pop(node);
4062 for (i = 0; i < arity; ++i) {
4063 ir_node *op = get_irn_n(node, i);
4064 if (op == ret_val) {
4066 } else if (op == ret_mem) {
4069 in[i] = be_transform_node(op);
4072 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4073 copy_node_attr(irg, node, new_node);
4079 * Transform a be_AddSP into an ia32_SubSP.
4081 static ir_node *gen_be_AddSP(ir_node *node)
4083 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4084 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4086 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4087 match_am | match_immediate);
4088 assert(is_ia32_SubSP(new_node));
4089 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4090 &ia32_registers[REG_ESP]);
4095 * Transform a be_SubSP into an ia32_AddSP
4097 static ir_node *gen_be_SubSP(ir_node *node)
4099 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4100 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4102 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4103 match_am | match_immediate);
4104 assert(is_ia32_AddSP(new_node));
4105 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4106 &ia32_registers[REG_ESP]);
4111 * Change some phi modes
4113 static ir_node *gen_Phi(ir_node *node)
4115 const arch_register_req_t *req;
4116 ir_node *block = be_transform_node(get_nodes_block(node));
4117 ir_graph *irg = current_ir_graph;
4118 dbg_info *dbgi = get_irn_dbg_info(node);
4119 ir_mode *mode = get_irn_mode(node);
4122 if (ia32_mode_needs_gp_reg(mode)) {
4123 /* we shouldn't have any 64bit stuff around anymore */
4124 assert(get_mode_size_bits(mode) <= 32);
4125 /* all integer operations are on 32bit registers now */
4127 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4128 } else if (mode_is_float(mode)) {
4129 if (ia32_cg_config.use_sse2) {
4131 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4134 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4137 req = arch_no_register_req;
4140 /* phi nodes allow loops, so we use the old arguments for now
4141 * and fix this later */
4142 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4143 get_irn_in(node) + 1);
4144 copy_node_attr(irg, node, phi);
4145 be_duplicate_deps(node, phi);
4147 arch_set_irn_register_req_out(phi, 0, req);
4149 be_enqueue_preds(node);
4154 static ir_node *gen_Jmp(ir_node *node)
4156 ir_node *block = get_nodes_block(node);
4157 ir_node *new_block = be_transform_node(block);
4158 dbg_info *dbgi = get_irn_dbg_info(node);
4161 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4162 SET_IA32_ORIG_NODE(new_node, node);
4170 static ir_node *gen_IJmp(ir_node *node)
4172 ir_node *block = get_nodes_block(node);
4173 ir_node *new_block = be_transform_node(block);
4174 dbg_info *dbgi = get_irn_dbg_info(node);
4175 ir_node *op = get_IJmp_target(node);
4177 ia32_address_mode_t am;
4178 ia32_address_t *addr = &am.addr;
4180 assert(get_irn_mode(op) == mode_P);
4182 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4184 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4185 addr->mem, am.new_op2);
4186 set_am_attributes(new_node, &am);
4187 SET_IA32_ORIG_NODE(new_node, node);
4189 new_node = fix_mem_proj(new_node, &am);
4194 static ir_node *gen_ia32_l_Add(ir_node *node)
4196 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4197 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4198 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4199 match_commutative | match_am | match_immediate |
4200 match_mode_neutral);
4202 if (is_Proj(lowered)) {
4203 lowered = get_Proj_pred(lowered);
4205 assert(is_ia32_Add(lowered));
4206 set_irn_mode(lowered, mode_T);
4212 static ir_node *gen_ia32_l_Adc(ir_node *node)
4214 return gen_binop_flags(node, new_bd_ia32_Adc,
4215 match_commutative | match_am | match_immediate |
4216 match_mode_neutral);
4220 * Transforms a l_MulS into a "real" MulS node.
4222 * @return the created ia32 Mul node
4224 static ir_node *gen_ia32_l_Mul(ir_node *node)
4226 ir_node *left = get_binop_left(node);
4227 ir_node *right = get_binop_right(node);
4229 return gen_binop(node, left, right, new_bd_ia32_Mul,
4230 match_commutative | match_am | match_mode_neutral);
4234 * Transforms a l_IMulS into a "real" IMul1OPS node.
4236 * @return the created ia32 IMul1OP node
4238 static ir_node *gen_ia32_l_IMul(ir_node *node)
4240 ir_node *left = get_binop_left(node);
4241 ir_node *right = get_binop_right(node);
4243 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4244 match_commutative | match_am | match_mode_neutral);
4247 static ir_node *gen_ia32_l_Sub(ir_node *node)
4249 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4250 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4251 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4252 match_am | match_immediate | match_mode_neutral);
4254 if (is_Proj(lowered)) {
4255 lowered = get_Proj_pred(lowered);
4257 assert(is_ia32_Sub(lowered));
4258 set_irn_mode(lowered, mode_T);
4264 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4266 return gen_binop_flags(node, new_bd_ia32_Sbb,
4267 match_am | match_immediate | match_mode_neutral);
4270 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4272 ir_node *src_block = get_nodes_block(node);
4273 ir_node *block = be_transform_node(src_block);
4274 ir_graph *irg = current_ir_graph;
4275 dbg_info *dbgi = get_irn_dbg_info(node);
4276 ir_node *frame = get_irg_frame(irg);
4277 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4278 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4279 ir_node *new_val_low = be_transform_node(val_low);
4280 ir_node *new_val_high = be_transform_node(val_high);
4282 ir_node *sync, *fild, *res;
4284 ir_node *store_high;
4288 if (ia32_cg_config.use_sse2) {
4289 panic("not implemented for SSE2");
4293 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4295 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4297 SET_IA32_ORIG_NODE(store_low, node);
4298 SET_IA32_ORIG_NODE(store_high, node);
4300 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4301 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4303 set_ia32_use_frame(store_low);
4304 set_ia32_use_frame(store_high);
4305 set_ia32_op_type(store_low, ia32_AddrModeD);
4306 set_ia32_op_type(store_high, ia32_AddrModeD);
4307 set_ia32_ls_mode(store_low, mode_Iu);
4308 set_ia32_ls_mode(store_high, mode_Is);
4309 add_ia32_am_offs_int(store_high, 4);
4313 sync = new_rd_Sync(dbgi, block, 2, in);
4316 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4318 set_ia32_use_frame(fild);
4319 set_ia32_op_type(fild, ia32_AddrModeS);
4320 set_ia32_ls_mode(fild, mode_Ls);
4322 SET_IA32_ORIG_NODE(fild, node);
4324 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4326 if (! mode_is_signed(get_irn_mode(val_high))) {
4327 ia32_address_mode_t am;
4329 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4332 am.addr.base = get_symconst_base();
4333 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4334 am.addr.mem = nomem;
4337 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4338 am.addr.tls_segment = false;
4339 am.addr.use_frame = 0;
4340 am.addr.frame_entity = NULL;
4341 am.addr.symconst_sign = 0;
4342 am.ls_mode = mode_F;
4343 am.mem_proj = nomem;
4344 am.op_type = ia32_AddrModeS;
4346 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4347 am.pinned = op_pin_state_floats;
4349 am.ins_permuted = false;
4351 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4352 am.new_op1, am.new_op2, get_fpcw());
4353 set_am_attributes(fadd, &am);
4355 set_irn_mode(fadd, mode_T);
4356 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4361 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4363 ir_node *src_block = get_nodes_block(node);
4364 ir_node *block = be_transform_node(src_block);
4365 ir_graph *irg = get_Block_irg(block);
4366 dbg_info *dbgi = get_irn_dbg_info(node);
4367 ir_node *frame = get_irg_frame(irg);
4368 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4369 ir_node *new_val = be_transform_node(val);
4372 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4373 SET_IA32_ORIG_NODE(fist, node);
4374 set_ia32_use_frame(fist);
4375 set_ia32_op_type(fist, ia32_AddrModeD);
4376 set_ia32_ls_mode(fist, mode_Ls);
4378 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4379 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4382 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4384 ir_node *block = be_transform_node(get_nodes_block(node));
4385 ir_graph *irg = get_Block_irg(block);
4386 ir_node *pred = get_Proj_pred(node);
4387 ir_node *new_pred = be_transform_node(pred);
4388 ir_node *frame = get_irg_frame(irg);
4389 dbg_info *dbgi = get_irn_dbg_info(node);
4390 long pn = get_Proj_proj(node);
4395 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4396 SET_IA32_ORIG_NODE(load, node);
4397 set_ia32_use_frame(load);
4398 set_ia32_op_type(load, ia32_AddrModeS);
4399 set_ia32_ls_mode(load, mode_Iu);
4400 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4401 * 32 bit from it with this particular load */
4402 attr = get_ia32_attr(load);
4403 attr->data.need_64bit_stackent = 1;
4405 if (pn == pn_ia32_l_FloattoLL_res_high) {
4406 add_ia32_am_offs_int(load, 4);
4408 assert(pn == pn_ia32_l_FloattoLL_res_low);
4411 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4417 * Transform the Projs of an AddSP.
4419 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4421 ir_node *pred = get_Proj_pred(node);
4422 ir_node *new_pred = be_transform_node(pred);
4423 dbg_info *dbgi = get_irn_dbg_info(node);
4424 long proj = get_Proj_proj(node);
4426 if (proj == pn_be_AddSP_sp) {
4427 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4428 pn_ia32_SubSP_stack);
4429 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4431 } else if (proj == pn_be_AddSP_res) {
4432 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4433 pn_ia32_SubSP_addr);
4434 } else if (proj == pn_be_AddSP_M) {
4435 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4438 panic("No idea how to transform proj->AddSP");
4442 * Transform the Projs of a SubSP.
4444 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4446 ir_node *pred = get_Proj_pred(node);
4447 ir_node *new_pred = be_transform_node(pred);
4448 dbg_info *dbgi = get_irn_dbg_info(node);
4449 long proj = get_Proj_proj(node);
4451 if (proj == pn_be_SubSP_sp) {
4452 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4453 pn_ia32_AddSP_stack);
4454 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4456 } else if (proj == pn_be_SubSP_M) {
4457 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4460 panic("No idea how to transform proj->SubSP");
4464 * Transform and renumber the Projs from a Load.
4466 static ir_node *gen_Proj_Load(ir_node *node)
4469 ir_node *pred = get_Proj_pred(node);
4470 dbg_info *dbgi = get_irn_dbg_info(node);
4471 long proj = get_Proj_proj(node);
4473 /* loads might be part of source address mode matches, so we don't
4474 * transform the ProjMs yet (with the exception of loads whose result is
4477 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4480 /* this is needed, because sometimes we have loops that are only
4481 reachable through the ProjM */
4482 be_enqueue_preds(node);
4483 /* do it in 2 steps, to silence firm verifier */
4484 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4485 set_Proj_proj(res, pn_ia32_mem);
4489 /* renumber the proj */
4490 new_pred = be_transform_node(pred);
4491 if (is_ia32_Load(new_pred)) {
4492 switch ((pn_Load)proj) {
4494 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4496 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4497 case pn_Load_X_except:
4498 /* This Load might raise an exception. Mark it. */
4499 set_ia32_exc_label(new_pred, 1);
4500 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4501 case pn_Load_X_regular:
4502 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4504 } else if (is_ia32_Conv_I2I(new_pred) ||
4505 is_ia32_Conv_I2I8Bit(new_pred)) {
4506 set_irn_mode(new_pred, mode_T);
4507 switch ((pn_Load)proj) {
4509 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4511 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4512 case pn_Load_X_except:
4513 /* This Load might raise an exception. Mark it. */
4514 set_ia32_exc_label(new_pred, 1);
4515 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4516 case pn_Load_X_regular:
4517 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4519 } else if (is_ia32_xLoad(new_pred)) {
4520 switch ((pn_Load)proj) {
4522 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4524 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4525 case pn_Load_X_except:
4526 /* This Load might raise an exception. Mark it. */
4527 set_ia32_exc_label(new_pred, 1);
4528 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4529 case pn_Load_X_regular:
4530 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4532 } else if (is_ia32_vfld(new_pred)) {
4533 switch ((pn_Load)proj) {
4535 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4537 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4538 case pn_Load_X_except:
4539 /* This Load might raise an exception. Mark it. */
4540 set_ia32_exc_label(new_pred, 1);
4541 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4542 case pn_Load_X_regular:
4543 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4546 /* can happen for ProJMs when source address mode happened for the
4549 /* however it should not be the result proj, as that would mean the
4550 load had multiple users and should not have been used for
4552 if (proj != pn_Load_M) {
4553 panic("internal error: transformed node not a Load");
4555 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4558 panic("No idea how to transform Proj(Load) %+F", node);
4561 static ir_node *gen_Proj_Store(ir_node *node)
4563 ir_node *pred = get_Proj_pred(node);
4564 ir_node *new_pred = be_transform_node(pred);
4565 dbg_info *dbgi = get_irn_dbg_info(node);
4566 long pn = get_Proj_proj(node);
4568 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4569 switch ((pn_Store)pn) {
4571 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4572 case pn_Store_X_except:
4573 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4574 case pn_Store_X_regular:
4575 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4577 } else if (is_ia32_vfist(new_pred)) {
4578 switch ((pn_Store)pn) {
4580 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4581 case pn_Store_X_except:
4582 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4583 case pn_Store_X_regular:
4584 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4586 } else if (is_ia32_vfisttp(new_pred)) {
4587 switch ((pn_Store)pn) {
4589 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4590 case pn_Store_X_except:
4591 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4592 case pn_Store_X_regular:
4593 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4595 } else if (is_ia32_vfst(new_pred)) {
4596 switch ((pn_Store)pn) {
4598 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4599 case pn_Store_X_except:
4600 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4601 case pn_Store_X_regular:
4602 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4604 } else if (is_ia32_xStore(new_pred)) {
4605 switch ((pn_Store)pn) {
4607 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4608 case pn_Store_X_except:
4609 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4610 case pn_Store_X_regular:
4611 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4613 } else if (is_Sync(new_pred)) {
4614 /* hack for the case that gen_float_const_Store produced a Sync */
4615 if (pn == pn_Store_M) {
4618 panic("exception control flow not implemented yet");
4619 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4620 /* destination address mode */
4621 if (pn == pn_Store_M) {
4624 panic("exception control flow for destination AM not implemented yet");
4627 panic("No idea how to transform Proj(Store) %+F", node);
4631 * Transform and renumber the Projs from a Div or Mod instruction.
4633 static ir_node *gen_Proj_Div(ir_node *node)
4635 ir_node *pred = get_Proj_pred(node);
4636 ir_node *new_pred = be_transform_node(pred);
4637 dbg_info *dbgi = get_irn_dbg_info(node);
4638 long proj = get_Proj_proj(node);
4640 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4641 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4643 switch ((pn_Div)proj) {
4645 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4646 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4647 } else if (is_ia32_xDiv(new_pred)) {
4648 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4649 } else if (is_ia32_vfdiv(new_pred)) {
4650 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4652 panic("Div transformed to unexpected thing %+F", new_pred);
4655 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4656 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4657 } else if (is_ia32_xDiv(new_pred)) {
4658 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4659 } else if (is_ia32_vfdiv(new_pred)) {
4660 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4662 panic("Div transformed to unexpected thing %+F", new_pred);
4664 case pn_Div_X_except:
4665 set_ia32_exc_label(new_pred, 1);
4666 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4667 case pn_Div_X_regular:
4668 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4671 panic("No idea how to transform proj->Div");
4675 * Transform and renumber the Projs from a Div or Mod instruction.
4677 static ir_node *gen_Proj_Mod(ir_node *node)
4679 ir_node *pred = get_Proj_pred(node);
4680 ir_node *new_pred = be_transform_node(pred);
4681 dbg_info *dbgi = get_irn_dbg_info(node);
4682 long proj = get_Proj_proj(node);
4684 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4685 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4686 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4688 switch ((pn_Mod)proj) {
4690 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4692 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4693 case pn_Mod_X_except:
4694 set_ia32_exc_label(new_pred, 1);
4695 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4696 case pn_Mod_X_regular:
4697 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4699 panic("No idea how to transform proj->Mod");
4703 * Transform and renumber the Projs from a CopyB.
4705 static ir_node *gen_Proj_CopyB(ir_node *node)
4707 ir_node *pred = get_Proj_pred(node);
4708 ir_node *new_pred = be_transform_node(pred);
4709 dbg_info *dbgi = get_irn_dbg_info(node);
4710 long proj = get_Proj_proj(node);
4712 switch ((pn_CopyB)proj) {
4714 if (is_ia32_CopyB_i(new_pred)) {
4715 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4716 } else if (is_ia32_CopyB(new_pred)) {
4717 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4720 case pn_CopyB_X_regular:
4721 if (is_ia32_CopyB_i(new_pred)) {
4722 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4723 } else if (is_ia32_CopyB(new_pred)) {
4724 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4727 case pn_CopyB_X_except:
4728 if (is_ia32_CopyB_i(new_pred)) {
4729 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4730 } else if (is_ia32_CopyB(new_pred)) {
4731 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4736 panic("No idea how to transform proj->CopyB");
4739 static ir_node *gen_be_Call(ir_node *node)
4741 dbg_info *const dbgi = get_irn_dbg_info(node);
4742 ir_node *const src_block = get_nodes_block(node);
4743 ir_node *const block = be_transform_node(src_block);
4744 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4745 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4746 ir_node *const sp = be_transform_node(src_sp);
4747 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4748 ia32_address_mode_t am;
4749 ia32_address_t *const addr = &am.addr;
4754 ir_node * eax = noreg_GP;
4755 ir_node * ecx = noreg_GP;
4756 ir_node * edx = noreg_GP;
4757 unsigned const pop = be_Call_get_pop(node);
4758 ir_type *const call_tp = be_Call_get_type(node);
4759 int old_no_pic_adjust;
4760 int throws_exception = ir_throws_exception(node);
4762 /* Run the x87 simulator if the call returns a float value */
4763 if (get_method_n_ress(call_tp) > 0) {
4764 ir_type *const res_type = get_method_res_type(call_tp, 0);
4765 ir_mode *const res_mode = get_type_mode(res_type);
4767 if (res_mode != NULL && mode_is_float(res_mode)) {
4768 ir_graph *irg = current_ir_graph;
4769 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4770 irg_data->do_x87_sim = 1;
4774 /* We do not want be_Call direct calls */
4775 assert(be_Call_get_entity(node) == NULL);
4777 /* special case for PIC trampoline calls */
4778 old_no_pic_adjust = ia32_no_pic_adjust;
4779 ia32_no_pic_adjust = be_options.pic;
4781 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4782 match_am | match_immediate);
4784 ia32_no_pic_adjust = old_no_pic_adjust;
4786 i = get_irn_arity(node) - 1;
4787 fpcw = be_transform_node(get_irn_n(node, i--));
4788 for (; i >= n_be_Call_first_arg; --i) {
4789 arch_register_req_t const *const req
4790 = arch_get_irn_register_req_in(node, i);
4791 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4793 assert(req->type == arch_register_req_type_limited);
4794 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4796 switch (*req->limited) {
4797 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4798 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4799 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4800 default: panic("Invalid GP register for register parameter");
4804 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4805 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4806 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4807 ir_set_throws_exception(call, throws_exception);
4808 set_am_attributes(call, &am);
4809 call = fix_mem_proj(call, &am);
4811 if (get_irn_pinned(node) == op_pin_state_pinned)
4812 set_irn_pinned(call, op_pin_state_pinned);
4814 SET_IA32_ORIG_NODE(call, node);
4816 if (ia32_cg_config.use_sse2) {
4817 /* remember this call for post-processing */
4818 ARR_APP1(ir_node *, call_list, call);
4819 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4826 * Transform Builtin trap
4828 static ir_node *gen_trap(ir_node *node)
4830 dbg_info *dbgi = get_irn_dbg_info(node);
4831 ir_node *block = be_transform_node(get_nodes_block(node));
4832 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4834 return new_bd_ia32_UD2(dbgi, block, mem);
4838 * Transform Builtin debugbreak
4840 static ir_node *gen_debugbreak(ir_node *node)
4842 dbg_info *dbgi = get_irn_dbg_info(node);
4843 ir_node *block = be_transform_node(get_nodes_block(node));
4844 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4846 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4850 * Transform Builtin return_address
4852 static ir_node *gen_return_address(ir_node *node)
4854 ir_node *param = get_Builtin_param(node, 0);
4855 ir_node *frame = get_Builtin_param(node, 1);
4856 dbg_info *dbgi = get_irn_dbg_info(node);
4857 ir_tarval *tv = get_Const_tarval(param);
4858 ir_graph *irg = get_irn_irg(node);
4859 unsigned long value = get_tarval_long(tv);
4861 ir_node *block = be_transform_node(get_nodes_block(node));
4862 ir_node *ptr = be_transform_node(frame);
4866 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4867 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4868 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4871 /* load the return address from this frame */
4872 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4874 set_irn_pinned(load, get_irn_pinned(node));
4875 set_ia32_op_type(load, ia32_AddrModeS);
4876 set_ia32_ls_mode(load, mode_Iu);
4878 set_ia32_am_offs_int(load, 0);
4879 set_ia32_use_frame(load);
4880 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4882 if (get_irn_pinned(node) == op_pin_state_floats) {
4883 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4884 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4885 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4886 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4889 SET_IA32_ORIG_NODE(load, node);
4890 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4894 * Transform Builtin frame_address
4896 static ir_node *gen_frame_address(ir_node *node)
4898 ir_node *param = get_Builtin_param(node, 0);
4899 ir_node *frame = get_Builtin_param(node, 1);
4900 dbg_info *dbgi = get_irn_dbg_info(node);
4901 ir_tarval *tv = get_Const_tarval(param);
4902 ir_graph *irg = get_irn_irg(node);
4903 unsigned long value = get_tarval_long(tv);
4905 ir_node *block = be_transform_node(get_nodes_block(node));
4906 ir_node *ptr = be_transform_node(frame);
4911 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4912 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4913 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4916 /* load the frame address from this frame */
4917 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4919 set_irn_pinned(load, get_irn_pinned(node));
4920 set_ia32_op_type(load, ia32_AddrModeS);
4921 set_ia32_ls_mode(load, mode_Iu);
4923 ent = ia32_get_frame_address_entity(irg);
4925 set_ia32_am_offs_int(load, 0);
4926 set_ia32_use_frame(load);
4927 set_ia32_frame_ent(load, ent);
4929 /* will fail anyway, but gcc does this: */
4930 set_ia32_am_offs_int(load, 0);
4933 if (get_irn_pinned(node) == op_pin_state_floats) {
4934 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4935 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4936 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4937 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4940 SET_IA32_ORIG_NODE(load, node);
4941 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4945 * Transform Builtin frame_address
4947 static ir_node *gen_prefetch(ir_node *node)
4950 ir_node *ptr, *block, *mem, *base, *idx;
4951 ir_node *param, *new_node;
4954 ia32_address_t addr;
4956 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4957 /* no prefetch at all, route memory */
4958 return be_transform_node(get_Builtin_mem(node));
4961 param = get_Builtin_param(node, 1);
4962 tv = get_Const_tarval(param);
4963 rw = get_tarval_long(tv);
4965 /* construct load address */
4966 memset(&addr, 0, sizeof(addr));
4967 ptr = get_Builtin_param(node, 0);
4968 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4975 base = be_transform_node(base);
4981 idx = be_transform_node(idx);
4984 dbgi = get_irn_dbg_info(node);
4985 block = be_transform_node(get_nodes_block(node));
4986 mem = be_transform_node(get_Builtin_mem(node));
4988 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4989 /* we have 3DNow!, this was already checked above */
4990 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4991 } else if (ia32_cg_config.use_sse_prefetch) {
4992 /* note: rw == 1 is IGNORED in that case */
4993 param = get_Builtin_param(node, 2);
4994 tv = get_Const_tarval(param);
4995 locality = get_tarval_long(tv);
4997 /* SSE style prefetch */
5000 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
5003 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
5006 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
5009 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
5013 assert(ia32_cg_config.use_3dnow_prefetch);
5014 /* 3DNow! style prefetch */
5015 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
5018 set_irn_pinned(new_node, get_irn_pinned(node));
5019 set_ia32_op_type(new_node, ia32_AddrModeS);
5020 set_ia32_ls_mode(new_node, mode_Bu);
5021 set_address(new_node, &addr);
5023 SET_IA32_ORIG_NODE(new_node, node);
5025 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5029 * Transform bsf like node
5031 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5033 ir_node *param = get_Builtin_param(node, 0);
5034 dbg_info *dbgi = get_irn_dbg_info(node);
5036 ir_node *block = get_nodes_block(node);
5037 ir_node *new_block = be_transform_node(block);
5039 ia32_address_mode_t am;
5040 ia32_address_t *addr = &am.addr;
5043 match_arguments(&am, block, NULL, param, NULL, match_am);
5045 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5046 set_am_attributes(cnt, &am);
5047 set_ia32_ls_mode(cnt, get_irn_mode(param));
5049 SET_IA32_ORIG_NODE(cnt, node);
5050 return fix_mem_proj(cnt, &am);
5054 * Transform builtin ffs.
5056 static ir_node *gen_ffs(ir_node *node)
5058 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5059 ir_node *real = skip_Proj(bsf);
5060 dbg_info *dbgi = get_irn_dbg_info(real);
5061 ir_node *block = get_nodes_block(real);
5062 ir_node *flag, *set, *conv, *neg, *orn, *add;
5065 if (get_irn_mode(real) != mode_T) {
5066 set_irn_mode(real, mode_T);
5067 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5070 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5073 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5074 SET_IA32_ORIG_NODE(set, node);
5077 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5078 SET_IA32_ORIG_NODE(conv, node);
5081 neg = new_bd_ia32_Neg(dbgi, block, conv);
5084 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5085 set_ia32_ls_mode(orn, mode_Iu);
5086 set_ia32_commutative(orn);
5089 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5090 add_ia32_am_offs_int(add, 1);
5095 * Transform builtin clz.
5097 static ir_node *gen_clz(ir_node *node)
5099 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5100 ir_node *real = skip_Proj(bsr);
5101 dbg_info *dbgi = get_irn_dbg_info(real);
5102 ir_node *block = get_nodes_block(real);
5103 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5105 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5109 * Transform builtin ctz.
5111 static ir_node *gen_ctz(ir_node *node)
5113 return gen_unop_AM(node, new_bd_ia32_Bsf);
5117 * Transform builtin parity.
5119 static ir_node *gen_parity(ir_node *node)
5121 dbg_info *dbgi = get_irn_dbg_info(node);
5122 ir_node *block = get_nodes_block(node);
5123 ir_node *new_block = be_transform_node(block);
5124 ir_node *param = get_Builtin_param(node, 0);
5125 ir_node *new_param = be_transform_node(param);
5128 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5129 * so we have to do complicated xoring first.
5130 * (we should also better lower this before the backend so we still have a
5131 * chance for CSE, constant folding and other goodies for some of these
5134 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5135 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5136 ir_node *xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5138 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xorn);
5141 set_ia32_ls_mode(xorn, mode_Iu);
5142 set_ia32_commutative(xorn);
5144 set_irn_mode(xor2, mode_T);
5145 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5148 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5149 SET_IA32_ORIG_NODE(new_node, node);
5152 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5153 nomem, new_node, mode_Bu);
5154 SET_IA32_ORIG_NODE(new_node, node);
5159 * Transform builtin popcount
5161 static ir_node *gen_popcount(ir_node *node)
5163 ir_node *param = get_Builtin_param(node, 0);
5164 dbg_info *dbgi = get_irn_dbg_info(node);
5166 ir_node *block = get_nodes_block(node);
5167 ir_node *new_block = be_transform_node(block);
5170 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5172 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5173 if (ia32_cg_config.use_popcnt) {
5174 ia32_address_mode_t am;
5175 ia32_address_t *addr = &am.addr;
5178 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5180 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5181 set_am_attributes(cnt, &am);
5182 set_ia32_ls_mode(cnt, get_irn_mode(param));
5184 SET_IA32_ORIG_NODE(cnt, node);
5185 return fix_mem_proj(cnt, &am);
5188 new_param = be_transform_node(param);
5190 /* do the standard popcount algo */
5191 /* TODO: This is stupid, we should transform this before the backend,
5192 * to get CSE, localopts, etc. for the operations
5193 * TODO: This is also not the optimal algorithm (it is just the starting
5194 * example in hackers delight, they optimize it more on the following page)
5195 * But I'm too lazy to fix this now, as the code should get lowered before
5196 * the backend anyway.
5199 /* m1 = x & 0x55555555 */
5200 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5201 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5204 simm = ia32_create_Immediate(NULL, 0, 1);
5205 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5207 /* m2 = s1 & 0x55555555 */
5208 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5211 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5213 /* m4 = m3 & 0x33333333 */
5214 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5215 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5218 simm = ia32_create_Immediate(NULL, 0, 2);
5219 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5221 /* m5 = s2 & 0x33333333 */
5222 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5225 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5227 /* m7 = m6 & 0x0F0F0F0F */
5228 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5229 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5232 simm = ia32_create_Immediate(NULL, 0, 4);
5233 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5235 /* m8 = s3 & 0x0F0F0F0F */
5236 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5239 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5241 /* m10 = m9 & 0x00FF00FF */
5242 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5243 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5246 simm = ia32_create_Immediate(NULL, 0, 8);
5247 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5249 /* m11 = s4 & 0x00FF00FF */
5250 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5252 /* m12 = m10 + m11 */
5253 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5255 /* m13 = m12 & 0x0000FFFF */
5256 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5257 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5259 /* s5 = m12 >> 16 */
5260 simm = ia32_create_Immediate(NULL, 0, 16);
5261 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5263 /* res = m13 + s5 */
5264 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5268 * Transform builtin byte swap.
5270 static ir_node *gen_bswap(ir_node *node)
5272 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5273 dbg_info *dbgi = get_irn_dbg_info(node);
5275 ir_node *block = get_nodes_block(node);
5276 ir_node *new_block = be_transform_node(block);
5277 ir_mode *mode = get_irn_mode(param);
5278 unsigned size = get_mode_size_bits(mode);
5282 if (ia32_cg_config.use_bswap) {
5283 /* swap available */
5284 return new_bd_ia32_Bswap(dbgi, new_block, param);
5286 ir_node *i8 = ia32_create_Immediate(NULL, 0, 8);
5287 ir_node *rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5288 ir_node *i16 = ia32_create_Immediate(NULL, 0, 16);
5289 ir_node *rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5290 ir_node *rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5291 set_ia32_ls_mode(rol1, mode_Hu);
5292 set_ia32_ls_mode(rol2, mode_Iu);
5293 set_ia32_ls_mode(rol3, mode_Hu);
5298 /* swap16 always available */
5299 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5302 panic("Invalid bswap size (%d)", size);
5307 * Transform builtin outport.
5309 static ir_node *gen_outport(ir_node *node)
5311 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5312 ir_node *oldv = get_Builtin_param(node, 1);
5313 ir_mode *mode = get_irn_mode(oldv);
5314 ir_node *value = be_transform_node(oldv);
5315 ir_node *block = be_transform_node(get_nodes_block(node));
5316 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5317 dbg_info *dbgi = get_irn_dbg_info(node);
5319 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5320 set_ia32_ls_mode(res, mode);
5325 * Transform builtin inport.
5327 static ir_node *gen_inport(ir_node *node)
5329 ir_type *tp = get_Builtin_type(node);
5330 ir_type *rstp = get_method_res_type(tp, 0);
5331 ir_mode *mode = get_type_mode(rstp);
5332 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5333 ir_node *block = be_transform_node(get_nodes_block(node));
5334 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5335 dbg_info *dbgi = get_irn_dbg_info(node);
5337 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5338 set_ia32_ls_mode(res, mode);
5340 /* check for missing Result Proj */
5345 * Transform a builtin inner trampoline
5347 static ir_node *gen_inner_trampoline(ir_node *node)
5349 ir_node *ptr = get_Builtin_param(node, 0);
5350 ir_node *callee = get_Builtin_param(node, 1);
5351 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5352 ir_node *mem = get_Builtin_mem(node);
5353 ir_node *block = get_nodes_block(node);
5354 ir_node *new_block = be_transform_node(block);
5358 ir_node *trampoline;
5360 dbg_info *dbgi = get_irn_dbg_info(node);
5361 ia32_address_t addr;
5363 /* construct store address */
5364 memset(&addr, 0, sizeof(addr));
5365 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5367 if (addr.base == NULL) {
5368 addr.base = noreg_GP;
5370 addr.base = be_transform_node(addr.base);
5373 if (addr.index == NULL) {
5374 addr.index = noreg_GP;
5376 addr.index = be_transform_node(addr.index);
5378 addr.mem = be_transform_node(mem);
5380 /* mov ecx, <env> */
5381 val = ia32_create_Immediate(NULL, 0, 0xB9);
5382 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5383 addr.index, addr.mem, val);
5384 set_irn_pinned(store, get_irn_pinned(node));
5385 set_ia32_op_type(store, ia32_AddrModeD);
5386 set_ia32_ls_mode(store, mode_Bu);
5387 set_address(store, &addr);
5391 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5392 addr.index, addr.mem, env);
5393 set_irn_pinned(store, get_irn_pinned(node));
5394 set_ia32_op_type(store, ia32_AddrModeD);
5395 set_ia32_ls_mode(store, mode_Iu);
5396 set_address(store, &addr);
5400 /* jmp rel <callee> */
5401 val = ia32_create_Immediate(NULL, 0, 0xE9);
5402 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5403 addr.index, addr.mem, val);
5404 set_irn_pinned(store, get_irn_pinned(node));
5405 set_ia32_op_type(store, ia32_AddrModeD);
5406 set_ia32_ls_mode(store, mode_Bu);
5407 set_address(store, &addr);
5411 trampoline = be_transform_node(ptr);
5413 /* the callee is typically an immediate */
5414 if (is_SymConst(callee)) {
5415 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5417 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5418 add_ia32_am_offs_int(rel, -10);
5420 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5422 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5423 addr.index, addr.mem, rel);
5424 set_irn_pinned(store, get_irn_pinned(node));
5425 set_ia32_op_type(store, ia32_AddrModeD);
5426 set_ia32_ls_mode(store, mode_Iu);
5427 set_address(store, &addr);
5432 return new_r_Tuple(new_block, 2, in);
5436 * Transform Builtin node.
5438 static ir_node *gen_Builtin(ir_node *node)
5440 ir_builtin_kind kind = get_Builtin_kind(node);
5444 return gen_trap(node);
5445 case ir_bk_debugbreak:
5446 return gen_debugbreak(node);
5447 case ir_bk_return_address:
5448 return gen_return_address(node);
5449 case ir_bk_frame_address:
5450 return gen_frame_address(node);
5451 case ir_bk_prefetch:
5452 return gen_prefetch(node);
5454 return gen_ffs(node);
5456 return gen_clz(node);
5458 return gen_ctz(node);
5460 return gen_parity(node);
5461 case ir_bk_popcount:
5462 return gen_popcount(node);
5464 return gen_bswap(node);
5466 return gen_outport(node);
5468 return gen_inport(node);
5469 case ir_bk_inner_trampoline:
5470 return gen_inner_trampoline(node);
5472 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5476 * Transform Proj(Builtin) node.
5478 static ir_node *gen_Proj_Builtin(ir_node *proj)
5480 ir_node *node = get_Proj_pred(proj);
5481 ir_node *new_node = be_transform_node(node);
5482 ir_builtin_kind kind = get_Builtin_kind(node);
5485 case ir_bk_return_address:
5486 case ir_bk_frame_address:
5491 case ir_bk_popcount:
5493 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5496 case ir_bk_debugbreak:
5497 case ir_bk_prefetch:
5499 assert(get_Proj_proj(proj) == pn_Builtin_M);
5502 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5503 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5505 assert(get_Proj_proj(proj) == pn_Builtin_M);
5506 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5508 case ir_bk_inner_trampoline:
5509 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5510 return get_Tuple_pred(new_node, 1);
5512 assert(get_Proj_proj(proj) == pn_Builtin_M);
5513 return get_Tuple_pred(new_node, 0);
5516 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5519 static ir_node *gen_be_IncSP(ir_node *node)
5521 ir_node *res = be_duplicate_node(node);
5522 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5528 * Transform the Projs from a be_Call.
5530 static ir_node *gen_Proj_be_Call(ir_node *node)
5532 ir_node *call = get_Proj_pred(node);
5533 ir_node *new_call = be_transform_node(call);
5534 dbg_info *dbgi = get_irn_dbg_info(node);
5535 long proj = get_Proj_proj(node);
5536 ir_mode *mode = get_irn_mode(node);
5539 if (proj == pn_be_Call_M) {
5540 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5542 /* transform call modes */
5543 if (mode_is_data(mode)) {
5544 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5548 /* Map from be_Call to ia32_Call proj number */
5549 if (proj == pn_be_Call_sp) {
5550 proj = pn_ia32_Call_stack;
5551 } else if (proj == pn_be_Call_M) {
5552 proj = pn_ia32_Call_M;
5553 } else if (proj == pn_be_Call_X_except) {
5554 proj = pn_ia32_Call_X_except;
5555 } else if (proj == pn_be_Call_X_regular) {
5556 proj = pn_ia32_Call_X_regular;
5558 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5559 int const n_outs = arch_get_irn_n_outs(new_call);
5562 assert(proj >= pn_be_Call_first_res);
5563 assert(req->type & arch_register_req_type_limited);
5565 for (i = 0; i < n_outs; ++i) {
5566 arch_register_req_t const *const new_req
5567 = arch_get_irn_register_req_out(new_call, i);
5569 if (!(new_req->type & arch_register_req_type_limited) ||
5570 new_req->cls != req->cls ||
5571 *new_req->limited != *req->limited)
5580 res = new_rd_Proj(dbgi, new_call, mode, proj);
5582 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5584 case pn_ia32_Call_stack:
5585 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5588 case pn_ia32_Call_fpcw:
5589 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5596 static ir_node *gen_Proj_ASM(ir_node *node)
5598 ir_mode *mode = get_irn_mode(node);
5599 ir_node *pred = get_Proj_pred(node);
5600 ir_node *new_pred = be_transform_node(pred);
5601 long pos = get_Proj_proj(node);
5603 if (mode == mode_M) {
5604 pos = arch_get_irn_n_outs(new_pred)-1;
5605 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5607 } else if (mode_is_float(mode)) {
5610 panic("unexpected proj mode at ASM");
5613 return new_r_Proj(new_pred, mode, pos);
5617 * Transform and potentially renumber Proj nodes.
5619 static ir_node *gen_Proj(ir_node *node)
5621 ir_node *pred = get_Proj_pred(node);
5624 switch (get_irn_opcode(pred)) {
5626 return gen_Proj_Load(node);
5628 return gen_Proj_Store(node);
5630 return gen_Proj_ASM(node);
5632 return gen_Proj_Builtin(node);
5634 return gen_Proj_Div(node);
5636 return gen_Proj_Mod(node);
5638 return gen_Proj_CopyB(node);
5640 return gen_Proj_be_SubSP(node);
5642 return gen_Proj_be_AddSP(node);
5644 return gen_Proj_be_Call(node);
5646 proj = get_Proj_proj(node);
5648 case pn_Start_X_initial_exec: {
5649 ir_node *block = get_nodes_block(pred);
5650 ir_node *new_block = be_transform_node(block);
5651 dbg_info *dbgi = get_irn_dbg_info(node);
5652 /* we exchange the ProjX with a jump */
5653 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5661 if (is_ia32_l_FloattoLL(pred)) {
5662 return gen_Proj_l_FloattoLL(node);
5664 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5668 ir_mode *mode = get_irn_mode(node);
5669 if (ia32_mode_needs_gp_reg(mode)) {
5670 ir_node *new_pred = be_transform_node(pred);
5671 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5672 get_Proj_proj(node));
5673 new_proj->node_nr = node->node_nr;
5678 return be_duplicate_node(node);
5682 * Enters all transform functions into the generic pointer
5684 static void register_transformers(void)
5686 /* first clear the generic function pointer for all ops */
5687 be_start_transform_setup();
5689 be_set_transform_function(op_Add, gen_Add);
5690 be_set_transform_function(op_And, gen_And);
5691 be_set_transform_function(op_ASM, ia32_gen_ASM);
5692 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5693 be_set_transform_function(op_be_Call, gen_be_Call);
5694 be_set_transform_function(op_be_Copy, gen_be_Copy);
5695 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5696 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5697 be_set_transform_function(op_be_Return, gen_be_Return);
5698 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5699 be_set_transform_function(op_Builtin, gen_Builtin);
5700 be_set_transform_function(op_Cmp, gen_Cmp);
5701 be_set_transform_function(op_Cond, gen_Cond);
5702 be_set_transform_function(op_Const, gen_Const);
5703 be_set_transform_function(op_Conv, gen_Conv);
5704 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5705 be_set_transform_function(op_Div, gen_Div);
5706 be_set_transform_function(op_Eor, gen_Eor);
5707 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5708 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5709 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5710 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5711 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5712 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5713 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5714 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5715 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5716 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5717 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5718 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5719 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5720 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5721 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5722 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5723 be_set_transform_function(op_IJmp, gen_IJmp);
5724 be_set_transform_function(op_Jmp, gen_Jmp);
5725 be_set_transform_function(op_Load, gen_Load);
5726 be_set_transform_function(op_Minus, gen_Minus);
5727 be_set_transform_function(op_Mod, gen_Mod);
5728 be_set_transform_function(op_Mul, gen_Mul);
5729 be_set_transform_function(op_Mulh, gen_Mulh);
5730 be_set_transform_function(op_Mux, gen_Mux);
5731 be_set_transform_function(op_Not, gen_Not);
5732 be_set_transform_function(op_Or, gen_Or);
5733 be_set_transform_function(op_Phi, gen_Phi);
5734 be_set_transform_function(op_Proj, gen_Proj);
5735 be_set_transform_function(op_Rotl, gen_Rotl);
5736 be_set_transform_function(op_Shl, gen_Shl);
5737 be_set_transform_function(op_Shr, gen_Shr);
5738 be_set_transform_function(op_Shrs, gen_Shrs);
5739 be_set_transform_function(op_Store, gen_Store);
5740 be_set_transform_function(op_Sub, gen_Sub);
5741 be_set_transform_function(op_Switch, gen_Switch);
5742 be_set_transform_function(op_SymConst, gen_SymConst);
5743 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5747 * Pre-transform all unknown and noreg nodes.
5749 static void ia32_pretransform_node(void)
5751 ir_graph *irg = current_ir_graph;
5752 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5754 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5755 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5756 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5757 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5758 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5760 nomem = get_irg_no_mem(irg);
5761 noreg_GP = ia32_new_NoReg_gp(irg);
5765 * Post-process all calls if we are in SSE mode.
5766 * The ABI requires that the results are in st0, copy them
5767 * to a xmm register.
5769 static void postprocess_fp_call_results(void)
5773 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5774 ir_node *call = call_list[i];
5775 ir_type *mtp = call_types[i];
5778 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5779 ir_type *res_tp = get_method_res_type(mtp, j);
5780 ir_node *res, *new_res;
5783 if (! is_atomic_type(res_tp)) {
5784 /* no floating point return */
5787 res_mode = get_type_mode(res_tp);
5788 if (! mode_is_float(res_mode)) {
5789 /* no floating point return */
5793 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5796 /* now patch the users */
5797 foreach_out_edge_safe(res, edge) {
5798 ir_node *succ = get_edge_src_irn(edge);
5801 if (be_is_Keep(succ))
5804 if (is_ia32_xStore(succ)) {
5805 /* an xStore can be patched into an vfst */
5806 dbg_info *db = get_irn_dbg_info(succ);
5807 ir_node *block = get_nodes_block(succ);
5808 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5809 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5810 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5811 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5812 ir_mode *mode = get_ia32_ls_mode(succ);
5814 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5815 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5816 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5817 if (is_ia32_use_frame(succ))
5818 set_ia32_use_frame(st);
5819 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5820 set_irn_pinned(st, get_irn_pinned(succ));
5821 set_ia32_op_type(st, ia32_AddrModeD);
5823 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5824 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5825 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5832 if (new_res == NULL) {
5833 dbg_info *db = get_irn_dbg_info(call);
5834 ir_node *block = get_nodes_block(call);
5835 ir_node *frame = get_irg_frame(current_ir_graph);
5836 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5837 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5838 ir_node *vfst, *xld, *new_mem;
5841 /* store st(0) on stack */
5842 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5844 set_ia32_op_type(vfst, ia32_AddrModeD);
5845 set_ia32_use_frame(vfst);
5847 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5849 /* load into SSE register */
5850 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5852 set_ia32_op_type(xld, ia32_AddrModeS);
5853 set_ia32_use_frame(xld);
5855 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5856 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5858 if (old_mem != NULL) {
5859 edges_reroute(old_mem, new_mem);
5863 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5869 /* do the transformation */
5870 void ia32_transform_graph(ir_graph *irg)
5874 register_transformers();
5875 initial_fpcw = NULL;
5876 ia32_no_pic_adjust = 0;
5878 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5880 be_timer_push(T_HEIGHTS);
5881 ia32_heights = heights_new(irg);
5882 be_timer_pop(T_HEIGHTS);
5883 ia32_calculate_non_address_mode_nodes(irg);
5885 /* the transform phase is not safe for CSE (yet) because several nodes get
5886 * attributes set after their creation */
5887 cse_last = get_opt_cse();
5890 call_list = NEW_ARR_F(ir_node *, 0);
5891 call_types = NEW_ARR_F(ir_type *, 0);
5892 be_transform_graph(irg, ia32_pretransform_node);
5894 if (ia32_cg_config.use_sse2)
5895 postprocess_fp_call_results();
5896 DEL_ARR_F(call_types);
5897 DEL_ARR_F(call_list);
5899 set_opt_cse(cse_last);
5901 ia32_free_non_address_mode_nodes();
5902 heights_free(ia32_heights);
5903 ia32_heights = NULL;
5906 void ia32_init_transform(void)
5908 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");