2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
55 #include "betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_address_mode.h"
66 #include "ia32_architecture.h"
68 #include "gen_ia32_regalloc_if.h"
70 /* define this to construct SSE constants instead of load them */
71 #undef CONSTRUCT_SSE_CONST
73 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
74 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
76 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
78 static ir_node *old_initial_fpcw = NULL;
79 static ir_node *initial_fpcw = NULL;
80 int ia32_no_pic_adjust;
82 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
83 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
86 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
87 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
90 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
91 ir_node *op1, ir_node *op2);
93 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
94 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
96 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem);
99 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
100 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
103 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
105 static ir_node *create_immediate_or_transform(ir_node *node,
106 char immediate_constraint_type);
108 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
109 dbg_info *dbgi, ir_node *block,
110 ir_node *op, ir_node *orig_node);
112 /* its enough to have those once */
113 static ir_node *nomem, *noreg_GP;
115 /** a list to postprocess all calls */
116 static ir_node **call_list;
117 static ir_type **call_types;
119 /** Return non-zero is a node represents the 0 constant. */
120 static bool is_Const_0(ir_node *node)
122 return is_Const(node) && is_Const_null(node);
125 /** Return non-zero is a node represents the 1 constant. */
126 static bool is_Const_1(ir_node *node)
128 return is_Const(node) && is_Const_one(node);
131 /** Return non-zero is a node represents the -1 constant. */
132 static bool is_Const_Minus_1(ir_node *node)
134 return is_Const(node) && is_Const_all_one(node);
138 * returns true if constant can be created with a simple float command
140 static bool is_simple_x87_Const(ir_node *node)
142 ir_tarval *tv = get_Const_tarval(node);
143 if (tarval_is_null(tv) || tarval_is_one(tv))
146 /* TODO: match all the other float constants */
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_sse_Const(ir_node *node)
155 ir_tarval *tv = get_Const_tarval(node);
156 ir_mode *mode = get_tarval_mode(tv);
161 if (tarval_is_null(tv)
162 #ifdef CONSTRUCT_SSE_CONST
167 #ifdef CONSTRUCT_SSE_CONST
168 if (mode == mode_D) {
169 unsigned val = get_tarval_sub_bits(tv, 0) |
170 (get_tarval_sub_bits(tv, 1) << 8) |
171 (get_tarval_sub_bits(tv, 2) << 16) |
172 (get_tarval_sub_bits(tv, 3) << 24);
174 /* lower 32bit are zero, really a 32bit constant */
177 #endif /* CONSTRUCT_SSE_CONST */
178 /* TODO: match all the other float constants */
183 * return NoREG or pic_base in case of PIC.
184 * This is necessary as base address for newly created symbols
186 static ir_node *get_symconst_base(void)
188 ir_graph *irg = current_ir_graph;
190 if (be_get_irg_options(irg)->pic) {
191 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
192 return arch_env->impl->get_pic_base(irg);
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
207 ir_tarval *tv = get_Const_tarval(node);
209 assert(is_Const(node));
211 if (mode_is_float(mode)) {
212 ir_graph *irg = get_irn_irg(node);
213 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
214 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
219 if (ia32_cg_config.use_sse2) {
220 if (tarval_is_null(tv)) {
221 load = new_bd_ia32_xZero(dbgi, block);
222 set_ia32_ls_mode(load, mode);
224 #ifdef CONSTRUCT_SSE_CONST
225 } else if (tarval_is_one(tv)) {
226 int cnst = mode == mode_F ? 26 : 55;
227 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
228 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
229 ir_node *pslld, *psrld;
231 load = new_bd_ia32_xAllOnes(dbgi, block);
232 set_ia32_ls_mode(load, mode);
233 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
234 set_ia32_ls_mode(pslld, mode);
235 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
236 set_ia32_ls_mode(psrld, mode);
238 #endif /* CONSTRUCT_SSE_CONST */
239 } else if (mode == mode_F) {
240 /* we can place any 32bit constant by using a movd gp, sse */
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
245 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
246 load = new_bd_ia32_xMovd(dbgi, block, cnst);
247 set_ia32_ls_mode(load, mode);
251 #ifdef CONSTRUCT_SSE_CONST
252 if (mode == mode_D) {
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
259 ir_node *cnst, *psllq;
261 /* fine, lower 32bit are zero, produce 32bit value */
262 val = get_tarval_sub_bits(tv, 4) |
263 (get_tarval_sub_bits(tv, 5) << 8) |
264 (get_tarval_sub_bits(tv, 6) << 16) |
265 (get_tarval_sub_bits(tv, 7) << 24);
266 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
267 load = new_bd_ia32_xMovd(dbgi, block, cnst);
268 set_ia32_ls_mode(load, mode);
269 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
270 set_ia32_ls_mode(psllq, mode);
275 #endif /* CONSTRUCT_SSE_CONST */
276 floatent = ia32_create_float_const_entity(isa, tv, NULL);
278 base = get_symconst_base();
279 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
281 set_ia32_op_type(load, ia32_AddrModeS);
282 set_ia32_am_sc(load, floatent);
283 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
284 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
287 if (tarval_is_null(tv)) {
288 load = new_bd_ia32_vfldz(dbgi, block);
290 set_ia32_ls_mode(load, mode);
291 } else if (tarval_is_one(tv)) {
292 load = new_bd_ia32_vfld1(dbgi, block);
294 set_ia32_ls_mode(load, mode);
299 floatent = ia32_create_float_const_entity(isa, tv, NULL);
300 /* create_float_const_ent is smart and sometimes creates
302 ls_mode = get_type_mode(get_entity_type(floatent));
303 base = get_symconst_base();
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 } else { /* non-float mode */
321 tv = tarval_convert_to(tv, mode_Iu);
323 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
325 panic("couldn't convert constant tarval (%+F)", node);
327 val = get_tarval_long(tv);
329 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
330 SET_IA32_ORIG_NODE(cnst, node);
337 * Transforms a SymConst.
339 static ir_node *gen_SymConst(ir_node *node)
341 ir_node *old_block = get_nodes_block(node);
342 ir_node *block = be_transform_node(old_block);
343 dbg_info *dbgi = get_irn_dbg_info(node);
344 ir_mode *mode = get_irn_mode(node);
347 if (mode_is_float(mode)) {
348 if (ia32_cg_config.use_sse2)
349 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
351 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
352 set_ia32_am_sc(cnst, get_SymConst_entity(node));
353 set_ia32_use_frame(cnst);
357 if (get_SymConst_kind(node) != symconst_addr_ent) {
358 panic("backend only support symconst_addr_ent (at %+F)", node);
360 entity = get_SymConst_entity(node);
361 if (get_entity_owner(entity) == get_tls_type()) {
362 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
363 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
364 set_ia32_am_sc(lea, entity);
367 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
371 SET_IA32_ORIG_NODE(cnst, node);
376 static ir_type *make_array_type(ir_type *tp)
378 unsigned alignment = get_type_alignment_bytes(tp);
379 unsigned size = get_type_size_bytes(tp);
380 ir_type *res = new_type_array(1, tp);
381 set_type_alignment_bytes(res, alignment);
382 set_array_bounds_int(res, 0, 0, 2);
383 if (alignment > size)
385 set_type_size_bytes(res, 2 * size);
386 set_type_state(res, layout_fixed);
391 * Create a float[2] array type for the given atomic type.
393 * @param tp the atomic type
395 static ir_type *ia32_create_float_array(ir_type *tp)
397 ir_mode *mode = get_type_mode(tp);
400 if (mode == mode_F) {
401 static ir_type *float_F;
405 arr = float_F = make_array_type(tp);
406 } else if (mode == mode_D) {
407 static ir_type *float_D;
411 arr = float_D = make_array_type(tp);
413 static ir_type *float_E;
417 arr = float_E = make_array_type(tp);
422 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
423 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
425 static const struct {
427 const char *cnst_str;
429 } names [ia32_known_const_max] = {
430 { "C_sfp_sign", "0x80000000", 0 },
431 { "C_dfp_sign", "0x8000000000000000", 1 },
432 { "C_sfp_abs", "0x7FFFFFFF", 0 },
433 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
434 { "C_ull_bias", "0x10000000000000000", 2 }
436 static ir_entity *ent_cache[ia32_known_const_max];
438 ir_entity *ent = ent_cache[kct];
441 ir_graph *irg = current_ir_graph;
442 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
443 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
444 const char *cnst_str = names[kct].cnst_str;
445 ident *name = new_id_from_str(names[kct].name);
448 switch (names[kct].mode) {
449 case 0: mode = mode_Iu; break;
450 case 1: mode = mode_Lu; break;
451 case 2: mode = mode_F; break;
452 default: panic("internal compiler error (ia32_gen_fp_known_const)");
454 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
456 if (kct == ia32_ULLBIAS) {
457 ir_type *type = ia32_get_prim_type(mode_F);
458 ir_type *atype = ia32_create_float_array(type);
459 ir_initializer_t *initializer;
461 ent = new_entity(get_glob_type(), name, atype);
463 set_entity_ld_ident(ent, name);
464 set_entity_visibility(ent, ir_visibility_private);
465 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
467 initializer = create_initializer_compound(2);
468 set_initializer_compound_value(initializer, 0,
469 create_initializer_tarval(get_mode_null(mode)));
470 set_initializer_compound_value(initializer, 1,
471 create_initializer_tarval(tv));
472 set_entity_initializer(ent, initializer);
474 ent = ia32_create_float_const_entity(isa, tv, name);
476 /* cache the entry */
477 ent_cache[kct] = ent;
480 return ent_cache[kct];
484 * return true if the node is a Proj(Load) and could be used in source address
485 * mode for another node. Will return only true if the @p other node is not
486 * dependent on the memory of the Load (for binary operations use the other
487 * input here, for unary operations use NULL).
489 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
490 ir_node *other, ir_node *other2,
497 /* float constants are always available */
498 if (is_Const(node)) {
499 mode = get_irn_mode(node);
500 if (mode_is_float(mode)) {
501 ir_tarval *tv = get_Const_tarval(node);
502 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
504 if (ia32_cg_config.use_sse2) {
505 if (is_simple_sse_Const(node))
508 if (is_simple_x87_Const(node))
511 if (get_irn_n_edges(node) > 1)
520 load = get_Proj_pred(node);
521 pn = get_Proj_proj(node);
522 if (!is_Load(load) || pn != pn_Load_res)
524 if (get_nodes_block(load) != block)
526 mode = get_irn_mode(node);
527 /* we can't fold mode_E AM */
528 if (mode == ia32_mode_E)
530 /* we only use address mode if we're the only user of the load */
531 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
533 /* in some edge cases with address mode we might reach the load normally
534 * and through some AM sequence, if it is already materialized then we
535 * can't create an AM node from it */
536 if (be_is_transformed(node))
539 /* don't do AM if other node inputs depend on the load (via mem-proj) */
540 if (other != NULL && ia32_prevents_AM(block, load, other))
543 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
549 typedef struct ia32_address_mode_t ia32_address_mode_t;
550 struct ia32_address_mode_t {
555 ia32_op_type_t op_type;
559 unsigned commutative : 1;
560 unsigned ins_permuted : 1;
563 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
565 /* construct load address */
566 memset(addr, 0, sizeof(addr[0]));
567 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
569 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
570 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
571 addr->mem = be_transform_node(mem);
574 static void build_address(ia32_address_mode_t *am, ir_node *node,
575 ia32_create_am_flags_t flags)
577 ia32_address_t *addr = &am->addr;
583 /* floating point immediates */
584 if (is_Const(node)) {
585 ir_graph *irg = get_irn_irg(node);
586 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
587 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
588 ir_tarval *tv = get_Const_tarval(node);
589 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
590 addr->base = get_symconst_base();
591 addr->index = noreg_GP;
593 addr->symconst_ent = entity;
594 addr->tls_segment = false;
596 am->ls_mode = get_type_mode(get_entity_type(entity));
597 am->pinned = op_pin_state_floats;
601 load = get_Proj_pred(node);
602 ptr = get_Load_ptr(load);
603 mem = get_Load_mem(load);
604 new_mem = be_transform_node(mem);
605 am->pinned = get_irn_pinned(load);
606 am->ls_mode = get_Load_mode(load);
607 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
610 /* construct load address */
611 ia32_create_address_mode(addr, ptr, flags);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
618 static void set_address(ir_node *node, const ia32_address_t *addr)
620 set_ia32_am_scale(node, addr->scale);
621 set_ia32_am_sc(node, addr->symconst_ent);
622 set_ia32_am_offs_int(node, addr->offset);
623 set_ia32_am_tls_segment(node, addr->tls_segment);
624 if (addr->symconst_sign)
625 set_ia32_am_sc_sign(node);
627 set_ia32_use_frame(node);
628 set_ia32_frame_ent(node, addr->frame_entity);
632 * Apply attributes of a given address mode to a node.
634 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
636 set_address(node, &am->addr);
638 set_ia32_op_type(node, am->op_type);
639 set_ia32_ls_mode(node, am->ls_mode);
640 if (am->pinned == op_pin_state_pinned) {
641 /* beware: some nodes are already pinned and did not allow to change the state */
642 if (get_irn_pinned(node) != op_pin_state_pinned)
643 set_irn_pinned(node, op_pin_state_pinned);
646 set_ia32_commutative(node);
650 * Check, if a given node is a Down-Conv, ie. a integer Conv
651 * from a mode with a mode with more bits to a mode with lesser bits.
652 * Moreover, we return only true if the node has not more than 1 user.
654 * @param node the node
655 * @return non-zero if node is a Down-Conv
657 static int is_downconv(const ir_node *node)
665 /* we only want to skip the conv when we're the only user
666 * (because this test is used in the context of address-mode selection
667 * and we don't want to use address mode for multiple users) */
668 if (get_irn_n_edges(node) > 1)
671 src_mode = get_irn_mode(get_Conv_op(node));
672 dest_mode = get_irn_mode(node);
674 ia32_mode_needs_gp_reg(src_mode) &&
675 ia32_mode_needs_gp_reg(dest_mode) &&
676 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
679 /** Skip all Down-Conv's on a given node and return the resulting node. */
680 ir_node *ia32_skip_downconv(ir_node *node)
682 while (is_downconv(node))
683 node = get_Conv_op(node);
688 static bool is_sameconv(ir_node *node)
696 /* we only want to skip the conv when we're the only user
697 * (because this test is used in the context of address-mode selection
698 * and we don't want to use address mode for multiple users) */
699 if (get_irn_n_edges(node) > 1)
702 src_mode = get_irn_mode(get_Conv_op(node));
703 dest_mode = get_irn_mode(node);
705 ia32_mode_needs_gp_reg(src_mode) &&
706 ia32_mode_needs_gp_reg(dest_mode) &&
707 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
710 /** Skip all signedness convs */
711 static ir_node *ia32_skip_sameconv(ir_node *node)
713 while (is_sameconv(node))
714 node = get_Conv_op(node);
719 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
721 ir_mode *mode = get_irn_mode(node);
726 if (mode_is_signed(mode)) {
731 block = get_nodes_block(node);
732 dbgi = get_irn_dbg_info(node);
734 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
738 * matches operands of a node into ia32 addressing/operand modes. This covers
739 * usage of source address mode, immediates, operations with non 32-bit modes,
741 * The resulting data is filled into the @p am struct. block is the block
742 * of the node whose arguments are matched. op1, op2 are the first and second
743 * input that are matched (op1 may be NULL). other_op is another unrelated
744 * input that is not matched! but which is needed sometimes to check if AM
745 * for op1/op2 is legal.
746 * @p flags describes the supported modes of the operation in detail.
748 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
749 ir_node *op1, ir_node *op2, ir_node *other_op,
752 ia32_address_t *addr = &am->addr;
753 ir_mode *mode = get_irn_mode(op2);
754 int mode_bits = get_mode_size_bits(mode);
755 ir_node *new_op1, *new_op2;
757 unsigned commutative;
758 int use_am_and_immediates;
761 memset(am, 0, sizeof(am[0]));
763 commutative = (flags & match_commutative) != 0;
764 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
765 use_am = (flags & match_am) != 0;
766 use_immediate = (flags & match_immediate) != 0;
767 assert(!use_am_and_immediates || use_immediate);
770 assert(!commutative || op1 != NULL);
771 assert(use_am || !(flags & match_8bit_am));
772 assert(use_am || !(flags & match_16bit_am));
774 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
775 (mode_bits == 16 && !(flags & match_16bit_am))) {
779 /* we can simply skip downconvs for mode neutral nodes: the upper bits
780 * can be random for these operations */
781 if (flags & match_mode_neutral) {
782 op2 = ia32_skip_downconv(op2);
784 op1 = ia32_skip_downconv(op1);
787 op2 = ia32_skip_sameconv(op2);
789 op1 = ia32_skip_sameconv(op1);
793 /* match immediates. firm nodes are normalized: constants are always on the
796 if (!(flags & match_try_am) && use_immediate) {
797 new_op2 = ia32_try_create_Immediate(op2, 0);
800 if (new_op2 == NULL &&
801 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
802 build_address(am, op2, ia32_create_am_normal);
803 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
804 if (mode_is_float(mode)) {
805 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
809 am->op_type = ia32_AddrModeS;
810 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
812 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
814 build_address(am, op1, ia32_create_am_normal);
816 if (mode_is_float(mode)) {
817 noreg = ia32_new_NoReg_vfp(current_ir_graph);
822 if (new_op2 != NULL) {
825 new_op1 = be_transform_node(op2);
827 am->ins_permuted = true;
829 am->op_type = ia32_AddrModeS;
831 am->op_type = ia32_Normal;
833 if (flags & match_try_am) {
839 mode = get_irn_mode(op2);
840 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
841 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
843 new_op2 = create_upconv(op2, NULL);
844 am->ls_mode = mode_Iu;
846 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
848 new_op2 = be_transform_node(op2);
849 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
852 if (addr->base == NULL)
853 addr->base = noreg_GP;
854 if (addr->index == NULL)
855 addr->index = noreg_GP;
856 if (addr->mem == NULL)
859 am->new_op1 = new_op1;
860 am->new_op2 = new_op2;
861 am->commutative = commutative;
865 * "Fixes" a node that uses address mode by turning it into mode_T
866 * and returning a pn_ia32_res Proj.
868 * @param node the node
869 * @param am its address mode
871 * @return a Proj(pn_ia32_res) if a memory address mode is used,
874 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
879 if (am->mem_proj == NULL)
882 /* we have to create a mode_T so the old MemProj can attach to us */
883 mode = get_irn_mode(node);
884 load = get_Proj_pred(am->mem_proj);
886 be_set_transformed_node(load, node);
888 if (mode != mode_T) {
889 set_irn_mode(node, mode_T);
890 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
897 * Construct a standard binary operation, set AM and immediate if required.
899 * @param node The original node for which the binop is created
900 * @param op1 The first operand
901 * @param op2 The second operand
902 * @param func The node constructor function
903 * @return The constructed ia32 node.
905 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
906 construct_binop_func *func, match_flags_t flags)
909 ir_node *block, *new_block, *new_node;
910 ia32_address_mode_t am;
911 ia32_address_t *addr = &am.addr;
913 block = get_nodes_block(node);
914 match_arguments(&am, block, op1, op2, NULL, flags);
916 dbgi = get_irn_dbg_info(node);
917 new_block = be_transform_node(block);
918 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
919 am.new_op1, am.new_op2);
920 set_am_attributes(new_node, &am);
921 /* we can't use source address mode anymore when using immediates */
922 if (!(flags & match_am_and_immediates) &&
923 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
924 set_ia32_am_support(new_node, ia32_am_none);
925 SET_IA32_ORIG_NODE(new_node, node);
927 new_node = fix_mem_proj(new_node, &am);
933 * Generic names for the inputs of an ia32 binary op.
936 n_ia32_l_binop_left, /**< ia32 left input */
937 n_ia32_l_binop_right, /**< ia32 right input */
938 n_ia32_l_binop_eflags /**< ia32 eflags input */
940 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
941 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
942 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
943 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
944 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
945 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
948 * Construct a binary operation which also consumes the eflags.
950 * @param node The node to transform
951 * @param func The node constructor function
952 * @param flags The match flags
953 * @return The constructor ia32 node
955 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
958 ir_node *src_block = get_nodes_block(node);
959 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
960 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
961 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
963 ir_node *block, *new_node, *new_eflags;
964 ia32_address_mode_t am;
965 ia32_address_t *addr = &am.addr;
967 match_arguments(&am, src_block, op1, op2, eflags, flags);
969 dbgi = get_irn_dbg_info(node);
970 block = be_transform_node(src_block);
971 new_eflags = be_transform_node(eflags);
972 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
973 am.new_op1, am.new_op2, new_eflags);
974 set_am_attributes(new_node, &am);
975 /* we can't use source address mode anymore when using immediates */
976 if (!(flags & match_am_and_immediates) &&
977 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
978 set_ia32_am_support(new_node, ia32_am_none);
979 SET_IA32_ORIG_NODE(new_node, node);
981 new_node = fix_mem_proj(new_node, &am);
986 static ir_node *get_fpcw(void)
988 if (initial_fpcw != NULL)
991 initial_fpcw = be_transform_node(old_initial_fpcw);
995 static ir_node *skip_float_upconv(ir_node *node)
997 ir_mode *mode = get_irn_mode(node);
998 assert(mode_is_float(mode));
1000 while (is_Conv(node)) {
1001 ir_node *pred = get_Conv_op(node);
1002 ir_mode *pred_mode = get_irn_mode(pred);
1005 * suboptimal, but without this check the address mode matcher
1006 * can incorrectly think that something has only 1 user
1008 if (get_irn_n_edges(node) > 1)
1011 if (!mode_is_float(pred_mode)
1012 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1021 * Construct a standard binary operation, set AM and immediate if required.
1023 * @param op1 The first operand
1024 * @param op2 The second operand
1025 * @param func The node constructor function
1026 * @return The constructed ia32 node.
1028 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1029 construct_binop_float_func *func)
1035 ia32_address_mode_t am;
1036 ia32_address_t *addr = &am.addr;
1037 ia32_x87_attr_t *attr;
1038 /* All operations are considered commutative, because there are reverse
1040 match_flags_t flags = match_commutative | match_am;
1042 op1 = skip_float_upconv(op1);
1043 op2 = skip_float_upconv(op2);
1045 block = get_nodes_block(node);
1046 match_arguments(&am, block, op1, op2, NULL, flags);
1048 dbgi = get_irn_dbg_info(node);
1049 new_block = be_transform_node(block);
1050 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1051 am.new_op1, am.new_op2, get_fpcw());
1052 set_am_attributes(new_node, &am);
1054 attr = get_ia32_x87_attr(new_node);
1055 attr->attr.data.ins_permuted = am.ins_permuted;
1057 SET_IA32_ORIG_NODE(new_node, node);
1059 new_node = fix_mem_proj(new_node, &am);
1065 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1067 * @param op1 The first operand
1068 * @param op2 The second operand
1069 * @param func The node constructor function
1070 * @return The constructed ia32 node.
1072 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1073 construct_shift_func *func,
1074 match_flags_t flags)
1077 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1078 ir_mode *mode = get_irn_mode(node);
1080 assert(! mode_is_float(mode));
1081 assert(flags & match_immediate);
1082 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1084 if (get_mode_modulo_shift(mode) != 32)
1085 panic("modulo shift!=32 not supported by ia32 backend");
1087 if (flags & match_mode_neutral) {
1088 op1 = ia32_skip_downconv(op1);
1089 new_op1 = be_transform_node(op1);
1090 } else if (get_mode_size_bits(mode) != 32) {
1091 new_op1 = create_upconv(op1, node);
1093 new_op1 = be_transform_node(op1);
1096 /* the shift amount can be any mode that is bigger than 5 bits, since all
1097 * other bits are ignored anyway */
1098 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1099 ir_node *const op = get_Conv_op(op2);
1100 if (mode_is_float(get_irn_mode(op)))
1103 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1105 new_op2 = create_immediate_or_transform(op2, 0);
1107 dbgi = get_irn_dbg_info(node);
1108 block = get_nodes_block(node);
1109 new_block = be_transform_node(block);
1110 new_node = func(dbgi, new_block, new_op1, new_op2);
1111 SET_IA32_ORIG_NODE(new_node, node);
1113 /* lowered shift instruction may have a dependency operand, handle it here */
1114 if (get_irn_arity(node) == 3) {
1115 /* we have a dependency */
1116 ir_node* dep = get_irn_n(node, 2);
1117 if (get_irn_n_edges(dep) > 1) {
1118 /* ... which has at least one user other than 'node' */
1119 ir_node *new_dep = be_transform_node(dep);
1120 add_irn_dep(new_node, new_dep);
1129 * Construct a standard unary operation, set AM and immediate if required.
1131 * @param op The operand
1132 * @param func The node constructor function
1133 * @return The constructed ia32 node.
1135 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1136 match_flags_t flags)
1139 ir_node *block, *new_block, *new_op, *new_node;
1141 assert(flags == 0 || flags == match_mode_neutral);
1142 if (flags & match_mode_neutral) {
1143 op = ia32_skip_downconv(op);
1146 new_op = be_transform_node(op);
1147 dbgi = get_irn_dbg_info(node);
1148 block = get_nodes_block(node);
1149 new_block = be_transform_node(block);
1150 new_node = func(dbgi, new_block, new_op);
1152 SET_IA32_ORIG_NODE(new_node, node);
1157 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1158 ia32_address_t *addr)
1168 base = be_transform_node(base);
1175 idx = be_transform_node(idx);
1178 /* segment overrides are ineffective for Leas :-( so we have to patch
1180 if (addr->tls_segment) {
1181 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1182 assert(addr->symconst_ent != NULL);
1183 if (base == noreg_GP)
1186 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1187 addr->tls_segment = false;
1190 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1191 set_address(res, addr);
1197 * Returns non-zero if a given address mode has a symbolic or
1198 * numerical offset != 0.
1200 static int am_has_immediates(const ia32_address_t *addr)
1202 return addr->offset != 0 || addr->symconst_ent != NULL
1203 || addr->frame_entity || addr->use_frame;
1206 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1207 ir_node *high, ir_node *low,
1211 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1212 * op1 - target to be shifted
1213 * op2 - contains bits to be shifted into target
1215 * Only op3 can be an immediate.
1217 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1218 ir_node *high, ir_node *low, ir_node *count,
1219 new_shiftd_func func)
1221 ir_node *new_block = be_transform_node(block);
1222 ir_node *new_high = be_transform_node(high);
1223 ir_node *new_low = be_transform_node(low);
1227 /* the shift amount can be any mode that is bigger than 5 bits, since all
1228 * other bits are ignored anyway */
1229 while (is_Conv(count) &&
1230 get_irn_n_edges(count) == 1 &&
1231 mode_is_int(get_irn_mode(count))) {
1232 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1233 count = get_Conv_op(count);
1235 new_count = create_immediate_or_transform(count, 0);
1237 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1242 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1245 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1247 if (is_Const(value1) && is_Const(value2)) {
1248 ir_tarval *tv1 = get_Const_tarval(value1);
1249 ir_tarval *tv2 = get_Const_tarval(value2);
1250 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1251 long v1 = get_tarval_long(tv1);
1252 long v2 = get_tarval_long(tv2);
1253 return v1 <= v2 && v2 == 32-v1;
1259 static ir_node *match_64bit_shift(ir_node *node)
1261 ir_node *op1 = get_binop_left(node);
1262 ir_node *op2 = get_binop_right(node);
1263 assert(is_Or(node) || is_Add(node));
1271 /* match ShlD operation */
1272 if (is_Shl(op1) && is_Shr(op2)) {
1273 ir_node *shl_right = get_Shl_right(op1);
1274 ir_node *shl_left = get_Shl_left(op1);
1275 ir_node *shr_right = get_Shr_right(op2);
1276 ir_node *shr_left = get_Shr_left(op2);
1277 /* constant ShlD operation */
1278 if (is_complementary_shifts(shl_right, shr_right)) {
1279 dbg_info *dbgi = get_irn_dbg_info(node);
1280 ir_node *block = get_nodes_block(node);
1281 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1284 /* constant ShrD operation */
1285 if (is_complementary_shifts(shr_right, shl_right)) {
1286 dbg_info *dbgi = get_irn_dbg_info(node);
1287 ir_node *block = get_nodes_block(node);
1288 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1291 /* lower_dw produces the following for ShlD:
1292 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1293 if (is_Shr(shr_left) && is_Not(shr_right)
1294 && is_Const_1(get_Shr_right(shr_left))
1295 && get_Not_op(shr_right) == shl_right) {
1296 dbg_info *dbgi = get_irn_dbg_info(node);
1297 ir_node *block = get_nodes_block(node);
1298 ir_node *val_h = get_Shr_left(shr_left);
1299 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1302 /* lower_dw produces the following for ShrD:
1303 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1304 if (is_Shl(shl_left) && is_Not(shl_right)
1305 && is_Const_1(get_Shl_right(shl_left))
1306 && get_Not_op(shl_right) == shr_right) {
1307 dbg_info *dbgi = get_irn_dbg_info(node);
1308 ir_node *block = get_nodes_block(node);
1309 ir_node *val_h = get_Shl_left(shl_left);
1310 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1319 * Creates an ia32 Add.
1321 * @return the created ia32 Add node
1323 static ir_node *gen_Add(ir_node *node)
1325 ir_mode *mode = get_irn_mode(node);
1326 ir_node *op1 = get_Add_left(node);
1327 ir_node *op2 = get_Add_right(node);
1329 ir_node *block, *new_block, *new_node, *add_immediate_op;
1330 ia32_address_t addr;
1331 ia32_address_mode_t am;
1333 new_node = match_64bit_shift(node);
1334 if (new_node != NULL)
1337 if (mode_is_float(mode)) {
1338 if (ia32_cg_config.use_sse2)
1339 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1340 match_commutative | match_am);
1342 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1345 ia32_mark_non_am(node);
1347 op2 = ia32_skip_downconv(op2);
1348 op1 = ia32_skip_downconv(op1);
1352 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1353 * 1. Add with immediate -> Lea
1354 * 2. Add with possible source address mode -> Add
1355 * 3. Otherwise -> Lea
1357 memset(&addr, 0, sizeof(addr));
1358 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1359 add_immediate_op = NULL;
1361 dbgi = get_irn_dbg_info(node);
1362 block = get_nodes_block(node);
1363 new_block = be_transform_node(block);
1366 if (addr.base == NULL && addr.index == NULL) {
1367 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1368 addr.symconst_sign, 0, addr.offset);
1369 SET_IA32_ORIG_NODE(new_node, node);
1372 /* add with immediate? */
1373 if (addr.index == NULL) {
1374 add_immediate_op = addr.base;
1375 } else if (addr.base == NULL && addr.scale == 0) {
1376 add_immediate_op = addr.index;
1379 if (add_immediate_op != NULL) {
1380 if (!am_has_immediates(&addr)) {
1381 #ifdef DEBUG_libfirm
1382 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1385 return be_transform_node(add_immediate_op);
1388 new_node = create_lea_from_address(dbgi, new_block, &addr);
1389 SET_IA32_ORIG_NODE(new_node, node);
1393 /* test if we can use source address mode */
1394 match_arguments(&am, block, op1, op2, NULL, match_commutative
1395 | match_mode_neutral | match_am | match_immediate | match_try_am);
1397 /* construct an Add with source address mode */
1398 if (am.op_type == ia32_AddrModeS) {
1399 ia32_address_t *am_addr = &am.addr;
1400 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1401 am_addr->index, am_addr->mem, am.new_op1,
1403 set_am_attributes(new_node, &am);
1404 SET_IA32_ORIG_NODE(new_node, node);
1406 new_node = fix_mem_proj(new_node, &am);
1411 /* otherwise construct a lea */
1412 new_node = create_lea_from_address(dbgi, new_block, &addr);
1413 SET_IA32_ORIG_NODE(new_node, node);
1418 * Creates an ia32 Mul.
1420 * @return the created ia32 Mul node
1422 static ir_node *gen_Mul(ir_node *node)
1424 ir_node *op1 = get_Mul_left(node);
1425 ir_node *op2 = get_Mul_right(node);
1426 ir_mode *mode = get_irn_mode(node);
1428 if (mode_is_float(mode)) {
1429 if (ia32_cg_config.use_sse2)
1430 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1431 match_commutative | match_am);
1433 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1435 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1436 match_commutative | match_am | match_mode_neutral |
1437 match_immediate | match_am_and_immediates);
1441 * Creates an ia32 Mulh.
1442 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1443 * this result while Mul returns the lower 32 bit.
1445 * @return the created ia32 Mulh node
1447 static ir_node *gen_Mulh(ir_node *node)
1449 dbg_info *dbgi = get_irn_dbg_info(node);
1450 ir_node *op1 = get_Mulh_left(node);
1451 ir_node *op2 = get_Mulh_right(node);
1452 ir_mode *mode = get_irn_mode(node);
1454 ir_node *proj_res_high;
1456 if (get_mode_size_bits(mode) != 32) {
1457 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1460 if (mode_is_signed(mode)) {
1461 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1462 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1464 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1465 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1467 return proj_res_high;
1471 * Creates an ia32 And.
1473 * @return The created ia32 And node
1475 static ir_node *gen_And(ir_node *node)
1477 ir_node *op1 = get_And_left(node);
1478 ir_node *op2 = get_And_right(node);
1479 assert(! mode_is_float(get_irn_mode(node)));
1481 /* is it a zero extension? */
1482 if (is_Const(op2)) {
1483 ir_tarval *tv = get_Const_tarval(op2);
1484 long v = get_tarval_long(tv);
1486 if (v == 0xFF || v == 0xFFFF) {
1487 dbg_info *dbgi = get_irn_dbg_info(node);
1488 ir_node *block = get_nodes_block(node);
1495 assert(v == 0xFFFF);
1498 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1503 return gen_binop(node, op1, op2, new_bd_ia32_And,
1504 match_commutative | match_mode_neutral | match_am | match_immediate);
1508 * Creates an ia32 Or.
1510 * @return The created ia32 Or node
1512 static ir_node *gen_Or(ir_node *node)
1514 ir_node *op1 = get_Or_left(node);
1515 ir_node *op2 = get_Or_right(node);
1518 res = match_64bit_shift(node);
1522 assert (! mode_is_float(get_irn_mode(node)));
1523 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1524 | match_mode_neutral | match_am | match_immediate);
1530 * Creates an ia32 Eor.
1532 * @return The created ia32 Eor node
1534 static ir_node *gen_Eor(ir_node *node)
1536 ir_node *op1 = get_Eor_left(node);
1537 ir_node *op2 = get_Eor_right(node);
1539 assert(! mode_is_float(get_irn_mode(node)));
1540 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1541 | match_mode_neutral | match_am | match_immediate);
1546 * Creates an ia32 Sub.
1548 * @return The created ia32 Sub node
1550 static ir_node *gen_Sub(ir_node *node)
1552 ir_node *op1 = get_Sub_left(node);
1553 ir_node *op2 = get_Sub_right(node);
1554 ir_mode *mode = get_irn_mode(node);
1556 if (mode_is_float(mode)) {
1557 if (ia32_cg_config.use_sse2)
1558 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1560 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1563 if (is_Const(op2)) {
1564 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1568 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1569 | match_am | match_immediate);
1572 static ir_node *transform_AM_mem(ir_node *const block,
1573 ir_node *const src_val,
1574 ir_node *const src_mem,
1575 ir_node *const am_mem)
1577 if (is_NoMem(am_mem)) {
1578 return be_transform_node(src_mem);
1579 } else if (is_Proj(src_val) &&
1581 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1582 /* avoid memory loop */
1584 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1585 ir_node *const ptr_pred = get_Proj_pred(src_val);
1586 int const arity = get_Sync_n_preds(src_mem);
1591 NEW_ARR_A(ir_node*, ins, arity + 1);
1593 /* NOTE: This sometimes produces dead-code because the old sync in
1594 * src_mem might not be used anymore, we should detect this case
1595 * and kill the sync... */
1596 for (i = arity - 1; i >= 0; --i) {
1597 ir_node *const pred = get_Sync_pred(src_mem, i);
1599 /* avoid memory loop */
1600 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1603 ins[n++] = be_transform_node(pred);
1606 if (n==1 && ins[0] == am_mem) {
1608 /* creating a new Sync and relying on CSE may fail,
1609 * if am_mem is a ProjM, which does not yet verify. */
1613 return new_r_Sync(block, n, ins);
1617 ins[0] = be_transform_node(src_mem);
1619 return new_r_Sync(block, 2, ins);
1624 * Create a 32bit to 64bit signed extension.
1626 * @param dbgi debug info
1627 * @param block the block where node nodes should be placed
1628 * @param val the value to extend
1629 * @param orig the original node
1631 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1632 ir_node *val, const ir_node *orig)
1637 if (ia32_cg_config.use_short_sex_eax) {
1638 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1639 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1641 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1642 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1644 SET_IA32_ORIG_NODE(res, orig);
1649 * Generates an ia32 Div with additional infrastructure for the
1650 * register allocator if needed.
1652 static ir_node *create_Div(ir_node *node)
1654 dbg_info *dbgi = get_irn_dbg_info(node);
1655 ir_node *block = get_nodes_block(node);
1656 ir_node *new_block = be_transform_node(block);
1657 int throws_exception = ir_throws_exception(node);
1664 ir_node *sign_extension;
1665 ia32_address_mode_t am;
1666 ia32_address_t *addr = &am.addr;
1668 /* the upper bits have random contents for smaller modes */
1669 switch (get_irn_opcode(node)) {
1671 op1 = get_Div_left(node);
1672 op2 = get_Div_right(node);
1673 mem = get_Div_mem(node);
1674 mode = get_Div_resmode(node);
1677 op1 = get_Mod_left(node);
1678 op2 = get_Mod_right(node);
1679 mem = get_Mod_mem(node);
1680 mode = get_Mod_resmode(node);
1683 panic("invalid divmod node %+F", node);
1686 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1688 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1689 is the memory of the consumed address. We can have only the second op as address
1690 in Div nodes, so check only op2. */
1691 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1693 if (mode_is_signed(mode)) {
1694 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1695 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1696 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1698 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1700 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1701 addr->index, new_mem, am.new_op2,
1702 am.new_op1, sign_extension);
1704 ir_set_throws_exception(new_node, throws_exception);
1706 set_irn_pinned(new_node, get_irn_pinned(node));
1708 set_am_attributes(new_node, &am);
1709 SET_IA32_ORIG_NODE(new_node, node);
1711 new_node = fix_mem_proj(new_node, &am);
1717 * Generates an ia32 Mod.
1719 static ir_node *gen_Mod(ir_node *node)
1721 return create_Div(node);
1725 * Generates an ia32 Div.
1727 static ir_node *gen_Div(ir_node *node)
1729 ir_mode *mode = get_Div_resmode(node);
1730 if (mode_is_float(mode)) {
1731 ir_node *op1 = get_Div_left(node);
1732 ir_node *op2 = get_Div_right(node);
1734 if (ia32_cg_config.use_sse2) {
1735 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1737 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1741 return create_Div(node);
1745 * Creates an ia32 Shl.
1747 * @return The created ia32 Shl node
1749 static ir_node *gen_Shl(ir_node *node)
1751 ir_node *left = get_Shl_left(node);
1752 ir_node *right = get_Shl_right(node);
1754 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1755 match_mode_neutral | match_immediate);
1759 * Creates an ia32 Shr.
1761 * @return The created ia32 Shr node
1763 static ir_node *gen_Shr(ir_node *node)
1765 ir_node *left = get_Shr_left(node);
1766 ir_node *right = get_Shr_right(node);
1768 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1774 * Creates an ia32 Sar.
1776 * @return The created ia32 Shrs node
1778 static ir_node *gen_Shrs(ir_node *node)
1780 ir_node *left = get_Shrs_left(node);
1781 ir_node *right = get_Shrs_right(node);
1783 if (is_Const(right)) {
1784 ir_tarval *tv = get_Const_tarval(right);
1785 long val = get_tarval_long(tv);
1787 /* this is a sign extension */
1788 dbg_info *dbgi = get_irn_dbg_info(node);
1789 ir_node *block = be_transform_node(get_nodes_block(node));
1790 ir_node *new_op = be_transform_node(left);
1792 return create_sex_32_64(dbgi, block, new_op, node);
1796 /* 8 or 16 bit sign extension? */
1797 if (is_Const(right) && is_Shl(left)) {
1798 ir_node *shl_left = get_Shl_left(left);
1799 ir_node *shl_right = get_Shl_right(left);
1800 if (is_Const(shl_right)) {
1801 ir_tarval *tv1 = get_Const_tarval(right);
1802 ir_tarval *tv2 = get_Const_tarval(shl_right);
1803 if (tv1 == tv2 && tarval_is_long(tv1)) {
1804 long val = get_tarval_long(tv1);
1805 if (val == 16 || val == 24) {
1806 dbg_info *dbgi = get_irn_dbg_info(node);
1807 ir_node *block = get_nodes_block(node);
1817 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1826 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1832 * Creates an ia32 Rol.
1834 * @param op1 The first operator
1835 * @param op2 The second operator
1836 * @return The created ia32 RotL node
1838 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1840 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1846 * Creates an ia32 Ror.
1847 * NOTE: There is no RotR with immediate because this would always be a RotL
1848 * "imm-mode_size_bits" which can be pre-calculated.
1850 * @param op1 The first operator
1851 * @param op2 The second operator
1852 * @return The created ia32 RotR node
1854 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1856 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1862 * Creates an ia32 RotR or RotL (depending on the found pattern).
1864 * @return The created ia32 RotL or RotR node
1866 static ir_node *gen_Rotl(ir_node *node)
1868 ir_node *op1 = get_Rotl_left(node);
1869 ir_node *op2 = get_Rotl_right(node);
1871 if (is_Minus(op2)) {
1872 return gen_Ror(node, op1, get_Minus_op(op2));
1875 return gen_Rol(node, op1, op2);
1881 * Transforms a Minus node.
1883 * @return The created ia32 Minus node
1885 static ir_node *gen_Minus(ir_node *node)
1887 ir_node *op = get_Minus_op(node);
1888 ir_node *block = be_transform_node(get_nodes_block(node));
1889 dbg_info *dbgi = get_irn_dbg_info(node);
1890 ir_mode *mode = get_irn_mode(node);
1895 if (mode_is_float(mode)) {
1896 ir_node *new_op = be_transform_node(op);
1897 if (ia32_cg_config.use_sse2) {
1898 /* TODO: non-optimal... if we have many xXors, then we should
1899 * rather create a load for the const and use that instead of
1900 * several AM nodes... */
1901 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1903 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1904 noreg_GP, nomem, new_op, noreg_xmm);
1906 size = get_mode_size_bits(mode);
1907 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1909 set_ia32_am_sc(new_node, ent);
1910 set_ia32_op_type(new_node, ia32_AddrModeS);
1911 set_ia32_ls_mode(new_node, mode);
1913 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1916 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1919 SET_IA32_ORIG_NODE(new_node, node);
1925 * Transforms a Not node.
1927 * @return The created ia32 Not node
1929 static ir_node *gen_Not(ir_node *node)
1931 ir_node *op = get_Not_op(node);
1933 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1934 assert (! mode_is_float(get_irn_mode(node)));
1936 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1939 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1940 bool negate, ir_node *node)
1942 ir_node *new_block = be_transform_node(block);
1943 ir_mode *mode = get_irn_mode(op);
1944 ir_node *new_op = be_transform_node(op);
1949 assert(mode_is_float(mode));
1951 if (ia32_cg_config.use_sse2) {
1952 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1953 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1954 noreg_GP, nomem, new_op, noreg_fp);
1956 size = get_mode_size_bits(mode);
1957 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1959 set_ia32_am_sc(new_node, ent);
1961 SET_IA32_ORIG_NODE(new_node, node);
1963 set_ia32_op_type(new_node, ia32_AddrModeS);
1964 set_ia32_ls_mode(new_node, mode);
1966 /* TODO, implement -Abs case */
1969 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1970 SET_IA32_ORIG_NODE(new_node, node);
1972 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1973 SET_IA32_ORIG_NODE(new_node, node);
1981 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1983 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1985 dbg_info *dbgi = get_irn_dbg_info(cmp);
1986 ir_node *block = get_nodes_block(cmp);
1987 ir_node *new_block = be_transform_node(block);
1988 ir_node *op1 = be_transform_node(x);
1989 ir_node *op2 = be_transform_node(n);
1991 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1994 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
1996 bool overflow_possible)
1998 if (mode_is_float(mode)) {
2000 case ir_relation_equal: return ia32_cc_float_equal;
2001 case ir_relation_less: return ia32_cc_float_below;
2002 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2003 case ir_relation_greater: return ia32_cc_float_above;
2004 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2005 case ir_relation_less_greater: return ia32_cc_not_equal;
2006 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2007 case ir_relation_unordered: return ia32_cc_parity;
2008 case ir_relation_unordered_equal: return ia32_cc_equal;
2009 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2010 case ir_relation_unordered_less_equal:
2011 return ia32_cc_float_unordered_below_equal;
2012 case ir_relation_unordered_greater:
2013 return ia32_cc_float_unordered_above;
2014 case ir_relation_unordered_greater_equal:
2015 return ia32_cc_float_unordered_above_equal;
2016 case ir_relation_unordered_less_greater:
2017 return ia32_cc_float_not_equal;
2018 case ir_relation_false:
2019 case ir_relation_true:
2020 /* should we introduce a jump always/jump never? */
2023 panic("Unexpected float pnc");
2024 } else if (mode_is_signed(mode)) {
2026 case ir_relation_unordered_equal:
2027 case ir_relation_equal: return ia32_cc_equal;
2028 case ir_relation_unordered_less:
2029 case ir_relation_less:
2030 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2031 case ir_relation_unordered_less_equal:
2032 case ir_relation_less_equal: return ia32_cc_less_equal;
2033 case ir_relation_unordered_greater:
2034 case ir_relation_greater: return ia32_cc_greater;
2035 case ir_relation_unordered_greater_equal:
2036 case ir_relation_greater_equal:
2037 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2038 case ir_relation_unordered_less_greater:
2039 case ir_relation_less_greater: return ia32_cc_not_equal;
2040 case ir_relation_less_equal_greater:
2041 case ir_relation_unordered:
2042 case ir_relation_false:
2043 case ir_relation_true:
2044 /* introduce jump always/jump never? */
2047 panic("Unexpected pnc");
2050 case ir_relation_unordered_equal:
2051 case ir_relation_equal: return ia32_cc_equal;
2052 case ir_relation_unordered_less:
2053 case ir_relation_less: return ia32_cc_below;
2054 case ir_relation_unordered_less_equal:
2055 case ir_relation_less_equal: return ia32_cc_below_equal;
2056 case ir_relation_unordered_greater:
2057 case ir_relation_greater: return ia32_cc_above;
2058 case ir_relation_unordered_greater_equal:
2059 case ir_relation_greater_equal: return ia32_cc_above_equal;
2060 case ir_relation_unordered_less_greater:
2061 case ir_relation_less_greater: return ia32_cc_not_equal;
2062 case ir_relation_less_equal_greater:
2063 case ir_relation_unordered:
2064 case ir_relation_false:
2065 case ir_relation_true:
2066 /* introduce jump always/jump never? */
2069 panic("Unexpected pnc");
2073 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2075 /* must have a Cmp as input */
2076 ir_relation relation = get_Cmp_relation(cmp);
2077 ir_node *l = get_Cmp_left(cmp);
2078 ir_node *r = get_Cmp_right(cmp);
2079 ir_mode *mode = get_irn_mode(l);
2080 bool overflow_possible;
2081 ir_relation possible;
2084 /* check for bit-test */
2085 if (ia32_cg_config.use_bt
2086 && (relation == ir_relation_equal
2087 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2088 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2090 ir_node *la = get_And_left(l);
2091 ir_node *ra = get_And_right(l);
2098 ir_node *c = get_Shl_left(la);
2099 if (is_Const_1(c) && is_Const_0(r)) {
2100 /* (1 << n) & ra) */
2101 ir_node *n = get_Shl_right(la);
2102 flags = gen_bt(cmp, ra, n);
2103 /* the bit is copied into the CF flag */
2104 if (relation & ir_relation_equal)
2105 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2107 *cc_out = ia32_cc_below; /* test for CF=1 */
2113 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2114 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2115 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2116 * a predecessor node). So add the < bit.
2117 * (Note that we do not want to produce <=> (which can happen for
2118 * unoptimized code), because no x86 flag can represent that */
2119 possible = ir_get_possible_cmp_relations(l, r);
2120 if (!(relation & ir_relation_equal) &&
2121 ( ((relation & ir_relation_less) && !(possible & ir_relation_greater))
2122 || ((relation & ir_relation_greater) && !(possible & ir_relation_less))))
2123 relation |= ir_relation_less_greater;
2125 overflow_possible = true;
2126 if (is_Const(r) && is_Const_null(r))
2127 overflow_possible = false;
2129 /* just do a normal transformation of the Cmp */
2130 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2131 flags = be_transform_node(cmp);
2136 * Transforms a Load.
2138 * @return the created ia32 Load node
2140 static ir_node *gen_Load(ir_node *node)
2142 ir_node *old_block = get_nodes_block(node);
2143 ir_node *block = be_transform_node(old_block);
2144 ir_node *ptr = get_Load_ptr(node);
2145 ir_node *mem = get_Load_mem(node);
2146 ir_node *new_mem = be_transform_node(mem);
2147 dbg_info *dbgi = get_irn_dbg_info(node);
2148 ir_mode *mode = get_Load_mode(node);
2149 int throws_exception = ir_throws_exception(node);
2153 ia32_address_t addr;
2155 /* construct load address */
2156 memset(&addr, 0, sizeof(addr));
2157 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2164 base = be_transform_node(base);
2170 idx = be_transform_node(idx);
2173 if (mode_is_float(mode)) {
2174 if (ia32_cg_config.use_sse2) {
2175 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2178 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2182 assert(mode != mode_b);
2184 /* create a conv node with address mode for smaller modes */
2185 if (get_mode_size_bits(mode) < 32) {
2186 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2187 new_mem, noreg_GP, mode);
2189 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2192 ir_set_throws_exception(new_node, throws_exception);
2194 set_irn_pinned(new_node, get_irn_pinned(node));
2195 set_ia32_op_type(new_node, ia32_AddrModeS);
2196 set_ia32_ls_mode(new_node, mode);
2197 set_address(new_node, &addr);
2199 if (get_irn_pinned(node) == op_pin_state_floats) {
2200 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2201 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2202 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2203 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2206 SET_IA32_ORIG_NODE(new_node, node);
2211 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2212 ir_node *ptr, ir_node *other)
2219 /* we only use address mode if we're the only user of the load */
2220 if (get_irn_n_edges(node) > 1)
2223 load = get_Proj_pred(node);
2226 if (get_nodes_block(load) != block)
2229 /* store should have the same pointer as the load */
2230 if (get_Load_ptr(load) != ptr)
2233 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2234 if (other != NULL &&
2235 get_nodes_block(other) == block &&
2236 heights_reachable_in_block(ia32_heights, other, load)) {
2240 if (ia32_prevents_AM(block, load, mem))
2242 /* Store should be attached to the load via mem */
2243 assert(heights_reachable_in_block(ia32_heights, mem, load));
2248 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2249 ir_node *mem, ir_node *ptr, ir_mode *mode,
2250 construct_binop_dest_func *func,
2251 construct_binop_dest_func *func8bit,
2252 match_flags_t flags)
2254 ir_node *src_block = get_nodes_block(node);
2262 ia32_address_mode_t am;
2263 ia32_address_t *addr = &am.addr;
2264 memset(&am, 0, sizeof(am));
2266 assert(flags & match_immediate); /* there is no destam node without... */
2267 commutative = (flags & match_commutative) != 0;
2269 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2270 build_address(&am, op1, ia32_create_am_double_use);
2271 new_op = create_immediate_or_transform(op2, 0);
2272 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2273 build_address(&am, op2, ia32_create_am_double_use);
2274 new_op = create_immediate_or_transform(op1, 0);
2279 if (addr->base == NULL)
2280 addr->base = noreg_GP;
2281 if (addr->index == NULL)
2282 addr->index = noreg_GP;
2283 if (addr->mem == NULL)
2286 dbgi = get_irn_dbg_info(node);
2287 block = be_transform_node(src_block);
2288 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2290 if (get_mode_size_bits(mode) == 8) {
2291 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2293 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2295 set_address(new_node, addr);
2296 set_ia32_op_type(new_node, ia32_AddrModeD);
2297 set_ia32_ls_mode(new_node, mode);
2298 SET_IA32_ORIG_NODE(new_node, node);
2300 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2301 mem_proj = be_transform_node(am.mem_proj);
2302 be_set_transformed_node(am.mem_proj, new_node);
2303 be_set_transformed_node(mem_proj, new_node);
2308 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2309 ir_node *ptr, ir_mode *mode,
2310 construct_unop_dest_func *func)
2312 ir_node *src_block = get_nodes_block(node);
2318 ia32_address_mode_t am;
2319 ia32_address_t *addr = &am.addr;
2321 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2324 memset(&am, 0, sizeof(am));
2325 build_address(&am, op, ia32_create_am_double_use);
2327 dbgi = get_irn_dbg_info(node);
2328 block = be_transform_node(src_block);
2329 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2330 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2331 set_address(new_node, addr);
2332 set_ia32_op_type(new_node, ia32_AddrModeD);
2333 set_ia32_ls_mode(new_node, mode);
2334 SET_IA32_ORIG_NODE(new_node, node);
2336 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2337 mem_proj = be_transform_node(am.mem_proj);
2338 be_set_transformed_node(am.mem_proj, new_node);
2339 be_set_transformed_node(mem_proj, new_node);
2344 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2346 ir_mode *mode = get_irn_mode(node);
2347 ir_node *mux_true = get_Mux_true(node);
2348 ir_node *mux_false = get_Mux_false(node);
2356 ia32_condition_code_t cc;
2357 ia32_address_t addr;
2359 if (get_mode_size_bits(mode) != 8)
2362 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2364 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2370 cond = get_Mux_sel(node);
2371 flags = get_flags_node(cond, &cc);
2372 /* we can't handle the float special cases with SetM */
2373 if (cc & ia32_cc_additional_float_cases)
2376 cc = ia32_negate_condition_code(cc);
2378 build_address_ptr(&addr, ptr, mem);
2380 dbgi = get_irn_dbg_info(node);
2381 block = get_nodes_block(node);
2382 new_block = be_transform_node(block);
2383 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2384 addr.index, addr.mem, flags, cc);
2385 set_address(new_node, &addr);
2386 set_ia32_op_type(new_node, ia32_AddrModeD);
2387 set_ia32_ls_mode(new_node, mode);
2388 SET_IA32_ORIG_NODE(new_node, node);
2393 static ir_node *try_create_dest_am(ir_node *node)
2395 ir_node *val = get_Store_value(node);
2396 ir_node *mem = get_Store_mem(node);
2397 ir_node *ptr = get_Store_ptr(node);
2398 ir_mode *mode = get_irn_mode(val);
2399 unsigned bits = get_mode_size_bits(mode);
2404 /* handle only GP modes for now... */
2405 if (!ia32_mode_needs_gp_reg(mode))
2409 /* store must be the only user of the val node */
2410 if (get_irn_n_edges(val) > 1)
2412 /* skip pointless convs */
2414 ir_node *conv_op = get_Conv_op(val);
2415 ir_mode *pred_mode = get_irn_mode(conv_op);
2416 if (!ia32_mode_needs_gp_reg(pred_mode))
2418 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2426 /* value must be in the same block */
2427 if (get_nodes_block(node) != get_nodes_block(val))
2430 switch (get_irn_opcode(val)) {
2432 op1 = get_Add_left(val);
2433 op2 = get_Add_right(val);
2434 if (ia32_cg_config.use_incdec) {
2435 if (is_Const_1(op2)) {
2436 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2438 } else if (is_Const_Minus_1(op2)) {
2439 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2443 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2444 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2445 match_commutative | match_immediate);
2448 op1 = get_Sub_left(val);
2449 op2 = get_Sub_right(val);
2450 if (is_Const(op2)) {
2451 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2453 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2454 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2458 op1 = get_And_left(val);
2459 op2 = get_And_right(val);
2460 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2461 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2462 match_commutative | match_immediate);
2465 op1 = get_Or_left(val);
2466 op2 = get_Or_right(val);
2467 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2468 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2469 match_commutative | match_immediate);
2472 op1 = get_Eor_left(val);
2473 op2 = get_Eor_right(val);
2474 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2475 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2476 match_commutative | match_immediate);
2479 op1 = get_Shl_left(val);
2480 op2 = get_Shl_right(val);
2481 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2482 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2486 op1 = get_Shr_left(val);
2487 op2 = get_Shr_right(val);
2488 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2489 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2493 op1 = get_Shrs_left(val);
2494 op2 = get_Shrs_right(val);
2495 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2496 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2500 op1 = get_Rotl_left(val);
2501 op2 = get_Rotl_right(val);
2502 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2503 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2506 /* TODO: match ROR patterns... */
2508 new_node = try_create_SetMem(val, ptr, mem);
2512 op1 = get_Minus_op(val);
2513 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2516 /* should be lowered already */
2517 assert(mode != mode_b);
2518 op1 = get_Not_op(val);
2519 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2525 if (new_node != NULL) {
2526 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2527 get_irn_pinned(node) == op_pin_state_pinned) {
2528 set_irn_pinned(new_node, op_pin_state_pinned);
2535 static bool possible_int_mode_for_fp(ir_mode *mode)
2539 if (!mode_is_signed(mode))
2541 size = get_mode_size_bits(mode);
2542 if (size != 16 && size != 32)
2547 static int is_float_to_int_conv(const ir_node *node)
2549 ir_mode *mode = get_irn_mode(node);
2553 if (!possible_int_mode_for_fp(mode))
2558 conv_op = get_Conv_op(node);
2559 conv_mode = get_irn_mode(conv_op);
2561 if (!mode_is_float(conv_mode))
2568 * Transform a Store(floatConst) into a sequence of
2571 * @return the created ia32 Store node
2573 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2575 ir_mode *mode = get_irn_mode(cns);
2576 unsigned size = get_mode_size_bytes(mode);
2577 ir_tarval *tv = get_Const_tarval(cns);
2578 ir_node *block = get_nodes_block(node);
2579 ir_node *new_block = be_transform_node(block);
2580 ir_node *ptr = get_Store_ptr(node);
2581 ir_node *mem = get_Store_mem(node);
2582 dbg_info *dbgi = get_irn_dbg_info(node);
2585 int throws_exception = ir_throws_exception(node);
2587 ia32_address_t addr;
2589 build_address_ptr(&addr, ptr, mem);
2596 val= get_tarval_sub_bits(tv, ofs) |
2597 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2598 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2599 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2602 } else if (size >= 2) {
2603 val= get_tarval_sub_bits(tv, ofs) |
2604 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2608 panic("invalid size of Store float to mem (%+F)", node);
2610 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2612 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2613 addr.index, addr.mem, imm);
2614 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2616 ir_set_throws_exception(new_node, throws_exception);
2617 set_irn_pinned(new_node, get_irn_pinned(node));
2618 set_ia32_op_type(new_node, ia32_AddrModeD);
2619 set_ia32_ls_mode(new_node, mode);
2620 set_address(new_node, &addr);
2621 SET_IA32_ORIG_NODE(new_node, node);
2628 addr.offset += delta;
2629 } while (size != 0);
2632 return new_rd_Sync(dbgi, new_block, i, ins);
2634 return get_Proj_pred(ins[0]);
2639 * Generate a vfist or vfisttp instruction.
2641 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2642 ir_node *index, ir_node *mem, ir_node *val)
2644 if (ia32_cg_config.use_fisttp) {
2645 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2646 if other users exists */
2647 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2648 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_vfisttp_res);
2649 be_new_Keep(block, 1, &value);
2653 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2656 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2662 * Transforms a general (no special case) Store.
2664 * @return the created ia32 Store node
2666 static ir_node *gen_general_Store(ir_node *node)
2668 ir_node *val = get_Store_value(node);
2669 ir_mode *mode = get_irn_mode(val);
2670 ir_node *block = get_nodes_block(node);
2671 ir_node *new_block = be_transform_node(block);
2672 ir_node *ptr = get_Store_ptr(node);
2673 ir_node *mem = get_Store_mem(node);
2674 dbg_info *dbgi = get_irn_dbg_info(node);
2675 int throws_exception = ir_throws_exception(node);
2678 ia32_address_t addr;
2680 /* check for destination address mode */
2681 new_node = try_create_dest_am(node);
2682 if (new_node != NULL)
2685 /* construct store address */
2686 memset(&addr, 0, sizeof(addr));
2687 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2689 if (addr.base == NULL) {
2690 addr.base = noreg_GP;
2692 addr.base = be_transform_node(addr.base);
2695 if (addr.index == NULL) {
2696 addr.index = noreg_GP;
2698 addr.index = be_transform_node(addr.index);
2700 addr.mem = be_transform_node(mem);
2702 if (mode_is_float(mode)) {
2703 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2705 while (is_Conv(val) && mode == get_irn_mode(val)) {
2706 ir_node *op = get_Conv_op(val);
2707 if (!mode_is_float(get_irn_mode(op)))
2711 new_val = be_transform_node(val);
2712 if (ia32_cg_config.use_sse2) {
2713 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2714 addr.index, addr.mem, new_val);
2716 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2717 addr.index, addr.mem, new_val, mode);
2719 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2720 val = get_Conv_op(val);
2722 /* TODO: is this optimisation still necessary at all (middleend)? */
2723 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2725 while (is_Conv(val)) {
2726 ir_node *op = get_Conv_op(val);
2727 if (!mode_is_float(get_irn_mode(op)))
2729 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2733 new_val = be_transform_node(val);
2734 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2736 new_val = create_immediate_or_transform(val, 0);
2737 assert(mode != mode_b);
2739 if (get_mode_size_bits(mode) == 8) {
2740 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2741 addr.index, addr.mem, new_val);
2743 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2744 addr.index, addr.mem, new_val);
2747 ir_set_throws_exception(new_node, throws_exception);
2749 set_irn_pinned(new_node, get_irn_pinned(node));
2750 set_ia32_op_type(new_node, ia32_AddrModeD);
2751 set_ia32_ls_mode(new_node, mode);
2753 set_address(new_node, &addr);
2754 SET_IA32_ORIG_NODE(new_node, node);
2760 * Transforms a Store.
2762 * @return the created ia32 Store node
2764 static ir_node *gen_Store(ir_node *node)
2766 ir_node *val = get_Store_value(node);
2767 ir_mode *mode = get_irn_mode(val);
2769 if (mode_is_float(mode) && is_Const(val)) {
2770 /* We can transform every floating const store
2771 into a sequence of integer stores.
2772 If the constant is already in a register,
2773 it would be better to use it, but we don't
2774 have this information here. */
2775 return gen_float_const_Store(node, val);
2777 return gen_general_Store(node);
2781 * Transforms a Switch.
2783 * @return the created ia32 SwitchJmp node
2785 static ir_node *gen_Switch(ir_node *node)
2787 dbg_info *dbgi = get_irn_dbg_info(node);
2788 ir_graph *irg = get_irn_irg(node);
2789 ir_node *block = be_transform_node(get_nodes_block(node));
2790 ir_node *sel = get_Switch_selector(node);
2791 ir_node *new_sel = be_transform_node(sel);
2792 ir_mode *sel_mode = get_irn_mode(sel);
2793 const ir_switch_table *table = get_Switch_table(node);
2794 unsigned n_outs = get_Switch_n_outs(node);
2798 assert(get_mode_size_bits(get_irn_mode(sel)) <= 32);
2799 if (get_mode_size_bits(sel_mode) != 32)
2800 new_sel = create_upconv(new_sel, sel);
2802 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2803 set_entity_visibility(entity, ir_visibility_private);
2804 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2806 table = ir_switch_table_duplicate(irg, table);
2808 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2809 set_ia32_am_scale(new_node, 2);
2810 set_ia32_am_sc(new_node, entity);
2811 set_ia32_op_type(new_node, ia32_AddrModeS);
2812 set_ia32_ls_mode(new_node, mode_Iu);
2813 SET_IA32_ORIG_NODE(new_node, node);
2814 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2815 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2821 * Transform a Cond node.
2823 static ir_node *gen_Cond(ir_node *node)
2825 ir_node *block = get_nodes_block(node);
2826 ir_node *new_block = be_transform_node(block);
2827 dbg_info *dbgi = get_irn_dbg_info(node);
2828 ir_node *sel = get_Cond_selector(node);
2829 ir_node *flags = NULL;
2831 ia32_condition_code_t cc;
2833 /* we get flags from a Cmp */
2834 flags = get_flags_node(sel, &cc);
2836 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2837 SET_IA32_ORIG_NODE(new_node, node);
2843 * Transform a be_Copy.
2845 static ir_node *gen_be_Copy(ir_node *node)
2847 ir_node *new_node = be_duplicate_node(node);
2848 ir_mode *mode = get_irn_mode(new_node);
2850 if (ia32_mode_needs_gp_reg(mode)) {
2851 set_irn_mode(new_node, mode_Iu);
2857 static ir_node *create_Fucom(ir_node *node)
2859 dbg_info *dbgi = get_irn_dbg_info(node);
2860 ir_node *block = get_nodes_block(node);
2861 ir_node *new_block = be_transform_node(block);
2862 ir_node *left = get_Cmp_left(node);
2863 ir_node *new_left = be_transform_node(left);
2864 ir_node *right = get_Cmp_right(node);
2868 if (ia32_cg_config.use_fucomi) {
2869 new_right = be_transform_node(right);
2870 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2872 set_ia32_commutative(new_node);
2873 SET_IA32_ORIG_NODE(new_node, node);
2875 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2876 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2878 new_right = be_transform_node(right);
2879 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2880 set_ia32_commutative(new_node);
2883 SET_IA32_ORIG_NODE(new_node, node);
2885 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2886 SET_IA32_ORIG_NODE(new_node, node);
2892 static ir_node *create_Ucomi(ir_node *node)
2894 dbg_info *dbgi = get_irn_dbg_info(node);
2895 ir_node *src_block = get_nodes_block(node);
2896 ir_node *new_block = be_transform_node(src_block);
2897 ir_node *left = get_Cmp_left(node);
2898 ir_node *right = get_Cmp_right(node);
2900 ia32_address_mode_t am;
2901 ia32_address_t *addr = &am.addr;
2903 match_arguments(&am, src_block, left, right, NULL,
2904 match_commutative | match_am);
2906 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2907 addr->mem, am.new_op1, am.new_op2,
2909 set_am_attributes(new_node, &am);
2911 SET_IA32_ORIG_NODE(new_node, node);
2913 new_node = fix_mem_proj(new_node, &am);
2919 * returns true if it is assured, that the upper bits of a node are "clean"
2920 * which means for a 16 or 8 bit value, that the upper bits in the register
2921 * are 0 for unsigned and a copy of the last significant bit for signed
2924 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2926 assert(ia32_mode_needs_gp_reg(mode));
2927 if (get_mode_size_bits(mode) >= 32)
2930 if (is_Proj(transformed_node))
2931 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2933 switch (get_ia32_irn_opcode(transformed_node)) {
2934 case iro_ia32_Conv_I2I:
2935 case iro_ia32_Conv_I2I8Bit: {
2936 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2937 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2939 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2946 if (mode_is_signed(mode)) {
2947 return false; /* TODO handle signed modes */
2949 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2950 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2951 const ia32_immediate_attr_t *attr
2952 = get_ia32_immediate_attr_const(right);
2953 if (attr->symconst == 0 &&
2954 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2958 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2962 /* TODO too conservative if shift amount is constant */
2963 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2966 if (!mode_is_signed(mode)) {
2968 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2969 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2971 /* TODO if one is known to be zero extended, then || is sufficient */
2976 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2977 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2979 case iro_ia32_Const:
2980 case iro_ia32_Immediate: {
2981 const ia32_immediate_attr_t *attr =
2982 get_ia32_immediate_attr_const(transformed_node);
2983 if (mode_is_signed(mode)) {
2984 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2985 return shifted == 0 || shifted == -1;
2987 unsigned long shifted = (unsigned long)attr->offset;
2988 shifted >>= get_mode_size_bits(mode)-1;
2990 return shifted == 0;
3000 * Generate code for a Cmp.
3002 static ir_node *gen_Cmp(ir_node *node)
3004 dbg_info *dbgi = get_irn_dbg_info(node);
3005 ir_node *block = get_nodes_block(node);
3006 ir_node *new_block = be_transform_node(block);
3007 ir_node *left = get_Cmp_left(node);
3008 ir_node *right = get_Cmp_right(node);
3009 ir_mode *cmp_mode = get_irn_mode(left);
3011 ia32_address_mode_t am;
3012 ia32_address_t *addr = &am.addr;
3014 if (mode_is_float(cmp_mode)) {
3015 if (ia32_cg_config.use_sse2) {
3016 return create_Ucomi(node);
3018 return create_Fucom(node);
3022 assert(ia32_mode_needs_gp_reg(cmp_mode));
3024 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3025 if (is_Const_0(right) &&
3027 get_irn_n_edges(left) == 1) {
3028 /* Test(and_left, and_right) */
3029 ir_node *and_left = get_And_left(left);
3030 ir_node *and_right = get_And_right(left);
3032 /* matze: code here used mode instead of cmd_mode, I think it is always
3033 * the same as cmp_mode, but I leave this here to see if this is really
3036 assert(get_irn_mode(and_left) == cmp_mode);
3038 match_arguments(&am, block, and_left, and_right, NULL,
3040 match_am | match_8bit_am | match_16bit_am |
3041 match_am_and_immediates | match_immediate);
3043 /* use 32bit compare mode if possible since the opcode is smaller */
3044 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3045 upper_bits_clean(am.new_op2, cmp_mode)) {
3046 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3049 if (get_mode_size_bits(cmp_mode) == 8) {
3050 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3051 addr->index, addr->mem,
3052 am.new_op1, am.new_op2,
3055 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base,
3056 addr->index, addr->mem, am.new_op1,
3057 am.new_op2, am.ins_permuted);
3060 /* Cmp(left, right) */
3061 match_arguments(&am, block, left, right, NULL,
3062 match_commutative | match_am | match_8bit_am |
3063 match_16bit_am | match_am_and_immediates |
3065 /* use 32bit compare mode if possible since the opcode is smaller */
3066 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3067 upper_bits_clean(am.new_op2, cmp_mode)) {
3068 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3071 if (get_mode_size_bits(cmp_mode) == 8) {
3072 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3073 addr->index, addr->mem, am.new_op1,
3074 am.new_op2, am.ins_permuted);
3076 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3077 addr->mem, am.new_op1, am.new_op2,
3081 set_am_attributes(new_node, &am);
3082 set_ia32_ls_mode(new_node, cmp_mode);
3084 SET_IA32_ORIG_NODE(new_node, node);
3086 new_node = fix_mem_proj(new_node, &am);
3091 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3092 ia32_condition_code_t cc)
3094 dbg_info *dbgi = get_irn_dbg_info(node);
3095 ir_node *block = get_nodes_block(node);
3096 ir_node *new_block = be_transform_node(block);
3097 ir_node *val_true = get_Mux_true(node);
3098 ir_node *val_false = get_Mux_false(node);
3100 ia32_address_mode_t am;
3101 ia32_address_t *addr;
3103 assert(ia32_cg_config.use_cmov);
3104 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3108 match_arguments(&am, block, val_false, val_true, flags,
3109 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3111 if (am.ins_permuted)
3112 cc = ia32_negate_condition_code(cc);
3114 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3115 addr->mem, am.new_op1, am.new_op2, new_flags,
3117 set_am_attributes(new_node, &am);
3119 SET_IA32_ORIG_NODE(new_node, node);
3121 new_node = fix_mem_proj(new_node, &am);
3127 * Creates a ia32 Setcc instruction.
3129 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3130 ir_node *flags, ia32_condition_code_t cc,
3133 ir_mode *mode = get_irn_mode(orig_node);
3136 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3137 SET_IA32_ORIG_NODE(new_node, orig_node);
3139 /* we might need to conv the result up */
3140 if (get_mode_size_bits(mode) > 8) {
3141 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3142 nomem, new_node, mode_Bu);
3143 SET_IA32_ORIG_NODE(new_node, orig_node);
3150 * Create instruction for an unsigned Difference or Zero.
3152 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3154 ir_mode *mode = get_irn_mode(psi);
3164 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3165 match_mode_neutral | match_am | match_immediate | match_two_users);
3167 block = get_nodes_block(new_node);
3169 if (is_Proj(new_node)) {
3170 sub = get_Proj_pred(new_node);
3173 set_irn_mode(sub, mode_T);
3174 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3176 assert(is_ia32_Sub(sub));
3177 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3179 dbgi = get_irn_dbg_info(psi);
3180 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3181 set_ia32_ls_mode(sbb, mode_Iu);
3182 notn = new_bd_ia32_Not(dbgi, block, sbb);
3184 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3185 set_ia32_ls_mode(new_node, mode_Iu);
3186 set_ia32_commutative(new_node);
3191 * Create an const array of two float consts.
3193 * @param c0 the first constant
3194 * @param c1 the second constant
3195 * @param new_mode IN/OUT for the mode of the constants, if NULL
3196 * smallest possible mode will be used
3198 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3201 ir_mode *mode = *new_mode;
3203 ir_initializer_t *initializer;
3204 ir_tarval *tv0 = get_Const_tarval(c0);
3205 ir_tarval *tv1 = get_Const_tarval(c1);
3208 /* detect the best mode for the constants */
3209 mode = get_tarval_mode(tv0);
3211 if (mode != mode_F) {
3212 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3213 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3215 tv0 = tarval_convert_to(tv0, mode);
3216 tv1 = tarval_convert_to(tv1, mode);
3217 } else if (mode != mode_D) {
3218 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3219 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3221 tv0 = tarval_convert_to(tv0, mode);
3222 tv1 = tarval_convert_to(tv1, mode);
3229 tp = ia32_get_prim_type(mode);
3230 tp = ia32_create_float_array(tp);
3232 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3234 set_entity_ld_ident(ent, get_entity_ident(ent));
3235 set_entity_visibility(ent, ir_visibility_private);
3236 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3238 initializer = create_initializer_compound(2);
3240 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3241 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3243 set_entity_initializer(ent, initializer);
3250 * Possible transformations for creating a Setcc.
3252 enum setcc_transform_insn {
3265 typedef struct setcc_transform {
3267 ia32_condition_code_t cc;
3269 enum setcc_transform_insn transform;
3273 } setcc_transform_t;
3276 * Setcc can only handle 0 and 1 result.
3277 * Find a transformation that creates 0 and 1 from
3280 static void find_const_transform(ia32_condition_code_t cc,
3281 ir_tarval *t, ir_tarval *f,
3282 setcc_transform_t *res)
3288 if (tarval_is_null(t)) {
3292 cc = ia32_negate_condition_code(cc);
3293 } else if (tarval_cmp(t, f) == ir_relation_less) {
3294 // now, t is the bigger one
3298 cc = ia32_negate_condition_code(cc);
3302 if (! tarval_is_null(f)) {
3303 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3306 res->steps[step].transform = SETCC_TR_ADD;
3308 if (t == tarval_bad)
3309 panic("constant subtract failed");
3310 if (! tarval_is_long(f))
3311 panic("tarval is not long");
3313 res->steps[step].val = get_tarval_long(f);
3315 f = tarval_sub(f, f, NULL);
3316 assert(tarval_is_null(f));
3319 if (tarval_is_one(t)) {
3320 res->steps[step].transform = SETCC_TR_SET;
3321 res->num_steps = ++step;
3325 if (tarval_is_minus_one(t)) {
3326 res->steps[step].transform = SETCC_TR_NEG;
3328 res->steps[step].transform = SETCC_TR_SET;
3329 res->num_steps = ++step;
3332 if (tarval_is_long(t)) {
3333 long v = get_tarval_long(t);
3335 res->steps[step].val = 0;
3338 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3340 res->steps[step].transform = SETCC_TR_LEAxx;
3341 res->steps[step].scale = 3; /* (a << 3) + a */
3344 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3346 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3347 res->steps[step].scale = 3; /* (a << 3) */
3350 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3352 res->steps[step].transform = SETCC_TR_LEAxx;
3353 res->steps[step].scale = 2; /* (a << 2) + a */
3356 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3358 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3359 res->steps[step].scale = 2; /* (a << 2) */
3362 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3364 res->steps[step].transform = SETCC_TR_LEAxx;
3365 res->steps[step].scale = 1; /* (a << 1) + a */
3368 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3370 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3371 res->steps[step].scale = 1; /* (a << 1) */
3374 res->num_steps = step;
3377 if (! tarval_is_single_bit(t)) {
3378 res->steps[step].transform = SETCC_TR_AND;
3379 res->steps[step].val = v;
3381 res->steps[step].transform = SETCC_TR_NEG;
3383 int val = get_tarval_lowest_bit(t);
3386 res->steps[step].transform = SETCC_TR_SHL;
3387 res->steps[step].scale = val;
3391 res->steps[step].transform = SETCC_TR_SET;
3392 res->num_steps = ++step;
3395 panic("tarval is not long");
3399 * Transforms a Mux node into some code sequence.
3401 * @return The transformed node.
3403 static ir_node *gen_Mux(ir_node *node)
3405 dbg_info *dbgi = get_irn_dbg_info(node);
3406 ir_node *block = get_nodes_block(node);
3407 ir_node *new_block = be_transform_node(block);
3408 ir_node *mux_true = get_Mux_true(node);
3409 ir_node *mux_false = get_Mux_false(node);
3410 ir_node *sel = get_Mux_sel(node);
3411 ir_mode *mode = get_irn_mode(node);
3415 ia32_condition_code_t cc;
3417 assert(get_irn_mode(sel) == mode_b);
3419 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3421 if (ia32_mode_needs_gp_reg(mode)) {
3422 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3425 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3426 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3430 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3431 if (mode_is_float(mode)) {
3432 ir_node *cmp_left = get_Cmp_left(sel);
3433 ir_node *cmp_right = get_Cmp_right(sel);
3434 ir_relation relation = get_Cmp_relation(sel);
3436 if (ia32_cg_config.use_sse2) {
3437 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3438 if (cmp_left == mux_true && cmp_right == mux_false) {
3439 /* Mux(a <= b, a, b) => MIN */
3440 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3441 match_commutative | match_am | match_two_users);
3442 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3443 /* Mux(a <= b, b, a) => MAX */
3444 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3445 match_commutative | match_am | match_two_users);
3447 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3448 if (cmp_left == mux_true && cmp_right == mux_false) {
3449 /* Mux(a >= b, a, b) => MAX */
3450 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3451 match_commutative | match_am | match_two_users);
3452 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3453 /* Mux(a >= b, b, a) => MIN */
3454 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3455 match_commutative | match_am | match_two_users);
3460 if (is_Const(mux_true) && is_Const(mux_false)) {
3461 ia32_address_mode_t am;
3466 flags = get_flags_node(sel, &cc);
3467 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3469 if (ia32_cg_config.use_sse2) {
3470 /* cannot load from different mode on SSE */
3473 /* x87 can load any mode */
3477 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3479 if (new_mode == mode_F) {
3481 } else if (new_mode == mode_D) {
3483 } else if (new_mode == ia32_mode_E) {
3484 /* arg, shift 16 NOT supported */
3486 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3488 panic("Unsupported constant size");
3491 am.ls_mode = new_mode;
3492 am.addr.base = get_symconst_base();
3493 am.addr.index = new_node;
3494 am.addr.mem = nomem;
3496 am.addr.scale = scale;
3497 am.addr.use_frame = 0;
3498 am.addr.tls_segment = false;
3499 am.addr.frame_entity = NULL;
3500 am.addr.symconst_sign = 0;
3501 am.mem_proj = am.addr.mem;
3502 am.op_type = ia32_AddrModeS;
3505 am.pinned = op_pin_state_floats;
3507 am.ins_permuted = false;
3509 if (ia32_cg_config.use_sse2)
3510 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3512 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3513 set_am_attributes(load, &am);
3515 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3517 panic("cannot transform floating point Mux");
3520 assert(ia32_mode_needs_gp_reg(mode));
3523 ir_node *cmp_left = get_Cmp_left(sel);
3524 ir_node *cmp_right = get_Cmp_right(sel);
3525 ir_relation relation = get_Cmp_relation(sel);
3526 ir_node *val_true = mux_true;
3527 ir_node *val_false = mux_false;
3529 if (is_Const(val_true) && is_Const_null(val_true)) {
3530 ir_node *tmp = val_false;
3531 val_false = val_true;
3533 relation = get_negated_relation(relation);
3535 if (is_Const_0(val_false) && is_Sub(val_true)) {
3536 if ((relation & ir_relation_greater)
3537 && get_Sub_left(val_true) == cmp_left
3538 && get_Sub_right(val_true) == cmp_right) {
3539 return create_doz(node, cmp_left, cmp_right);
3541 if ((relation & ir_relation_less)
3542 && get_Sub_left(val_true) == cmp_right
3543 && get_Sub_right(val_true) == cmp_left) {
3544 return create_doz(node, cmp_right, cmp_left);
3549 flags = get_flags_node(sel, &cc);
3551 if (is_Const(mux_true) && is_Const(mux_false)) {
3552 /* both are const, good */
3553 ir_tarval *tv_true = get_Const_tarval(mux_true);
3554 ir_tarval *tv_false = get_Const_tarval(mux_false);
3555 setcc_transform_t res;
3558 find_const_transform(cc, tv_true, tv_false, &res);
3560 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3563 switch (res.steps[step].transform) {
3565 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3566 add_ia32_am_offs_int(new_node, res.steps[step].val);
3568 case SETCC_TR_ADDxx:
3569 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3572 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3573 set_ia32_am_scale(new_node, res.steps[step].scale);
3574 set_ia32_am_offs_int(new_node, res.steps[step].val);
3576 case SETCC_TR_LEAxx:
3577 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3578 set_ia32_am_scale(new_node, res.steps[step].scale);
3579 set_ia32_am_offs_int(new_node, res.steps[step].val);
3582 imm = ia32_immediate_from_long(res.steps[step].scale);
3583 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3586 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3589 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3592 imm = ia32_immediate_from_long(res.steps[step].val);
3593 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3596 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3599 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3602 panic("unknown setcc transform");
3606 new_node = create_CMov(node, sel, flags, cc);
3613 * Create a conversion from x87 state register to general purpose.
3615 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3617 ir_node *block = be_transform_node(get_nodes_block(node));
3618 ir_node *op = get_Conv_op(node);
3619 ir_node *new_op = be_transform_node(op);
3620 ir_graph *irg = current_ir_graph;
3621 dbg_info *dbgi = get_irn_dbg_info(node);
3622 ir_mode *mode = get_irn_mode(node);
3623 ir_node *frame = get_irg_frame(irg);
3624 ir_node *fist, *load, *mem;
3626 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3627 set_irn_pinned(fist, op_pin_state_floats);
3628 set_ia32_use_frame(fist);
3629 set_ia32_op_type(fist, ia32_AddrModeD);
3631 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3632 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3634 assert(get_mode_size_bits(mode) <= 32);
3635 /* exception we can only store signed 32 bit integers, so for unsigned
3636 we store a 64bit (signed) integer and load the lower bits */
3637 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3638 set_ia32_ls_mode(fist, mode_Ls);
3640 set_ia32_ls_mode(fist, mode_Is);
3642 SET_IA32_ORIG_NODE(fist, node);
3645 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3647 set_irn_pinned(load, op_pin_state_floats);
3648 set_ia32_use_frame(load);
3649 set_ia32_op_type(load, ia32_AddrModeS);
3650 set_ia32_ls_mode(load, mode_Is);
3651 if (get_ia32_ls_mode(fist) == mode_Ls) {
3652 ia32_attr_t *attr = get_ia32_attr(load);
3653 attr->data.need_64bit_stackent = 1;
3655 ia32_attr_t *attr = get_ia32_attr(load);
3656 attr->data.need_32bit_stackent = 1;
3658 SET_IA32_ORIG_NODE(load, node);
3660 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3664 * Creates a x87 strict Conv by placing a Store and a Load
3666 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3668 ir_node *block = get_nodes_block(node);
3669 ir_graph *irg = get_Block_irg(block);
3670 dbg_info *dbgi = get_irn_dbg_info(node);
3671 ir_node *frame = get_irg_frame(irg);
3673 ir_node *store, *load;
3676 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3677 set_ia32_use_frame(store);
3678 set_ia32_op_type(store, ia32_AddrModeD);
3679 SET_IA32_ORIG_NODE(store, node);
3681 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3683 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3684 set_ia32_use_frame(load);
3685 set_ia32_op_type(load, ia32_AddrModeS);
3686 SET_IA32_ORIG_NODE(load, node);
3688 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_vfld_res);
3692 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3693 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3695 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3697 func = get_mode_size_bits(mode) == 8 ?
3698 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3699 return func(dbgi, block, base, index, mem, val, mode);
3703 * Create a conversion from general purpose to x87 register
3705 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3707 ir_node *src_block = get_nodes_block(node);
3708 ir_node *block = be_transform_node(src_block);
3709 ir_graph *irg = get_Block_irg(block);
3710 dbg_info *dbgi = get_irn_dbg_info(node);
3711 ir_node *op = get_Conv_op(node);
3712 ir_node *new_op = NULL;
3714 ir_mode *store_mode;
3720 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3721 if (possible_int_mode_for_fp(src_mode)) {
3722 ia32_address_mode_t am;
3724 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3725 if (am.op_type == ia32_AddrModeS) {
3726 ia32_address_t *addr = &am.addr;
3728 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3729 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3731 set_am_attributes(fild, &am);
3732 SET_IA32_ORIG_NODE(fild, node);
3734 fix_mem_proj(fild, &am);
3739 if (new_op == NULL) {
3740 new_op = be_transform_node(op);
3743 mode = get_irn_mode(op);
3745 /* first convert to 32 bit signed if necessary */
3746 if (get_mode_size_bits(src_mode) < 32) {
3747 if (!upper_bits_clean(new_op, src_mode)) {
3748 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3749 SET_IA32_ORIG_NODE(new_op, node);
3754 assert(get_mode_size_bits(mode) == 32);
3757 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3759 set_ia32_use_frame(store);
3760 set_ia32_op_type(store, ia32_AddrModeD);
3761 set_ia32_ls_mode(store, mode_Iu);
3763 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3765 /* exception for 32bit unsigned, do a 64bit spill+load */
3766 if (!mode_is_signed(mode)) {
3769 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3771 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3772 noreg_GP, nomem, zero_const);
3773 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3775 set_ia32_use_frame(zero_store);
3776 set_ia32_op_type(zero_store, ia32_AddrModeD);
3777 add_ia32_am_offs_int(zero_store, 4);
3778 set_ia32_ls_mode(zero_store, mode_Iu);
3780 in[0] = zero_store_mem;
3783 store_mem = new_rd_Sync(dbgi, block, 2, in);
3784 store_mode = mode_Ls;
3786 store_mode = mode_Is;
3790 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3792 set_ia32_use_frame(fild);
3793 set_ia32_op_type(fild, ia32_AddrModeS);
3794 set_ia32_ls_mode(fild, store_mode);
3796 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3802 * Create a conversion from one integer mode into another one
3804 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3805 dbg_info *dbgi, ir_node *block, ir_node *op,
3808 ir_node *new_block = be_transform_node(block);
3810 ir_mode *smaller_mode;
3811 ia32_address_mode_t am;
3812 ia32_address_t *addr = &am.addr;
3815 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3816 smaller_mode = src_mode;
3818 smaller_mode = tgt_mode;
3821 #ifdef DEBUG_libfirm
3823 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3828 match_arguments(&am, block, NULL, op, NULL,
3829 match_am | match_8bit_am | match_16bit_am);
3831 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3832 /* unnecessary conv. in theory it shouldn't have been AM */
3833 assert(is_ia32_NoReg_GP(addr->base));
3834 assert(is_ia32_NoReg_GP(addr->index));
3835 assert(is_NoMem(addr->mem));
3836 assert(am.addr.offset == 0);
3837 assert(am.addr.symconst_ent == NULL);
3841 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3842 addr->mem, am.new_op2, smaller_mode);
3843 set_am_attributes(new_node, &am);
3844 /* match_arguments assume that out-mode = in-mode, this isn't true here
3846 set_ia32_ls_mode(new_node, smaller_mode);
3847 SET_IA32_ORIG_NODE(new_node, node);
3848 new_node = fix_mem_proj(new_node, &am);
3853 * Transforms a Conv node.
3855 * @return The created ia32 Conv node
3857 static ir_node *gen_Conv(ir_node *node)
3859 ir_node *block = get_nodes_block(node);
3860 ir_node *new_block = be_transform_node(block);
3861 ir_node *op = get_Conv_op(node);
3862 ir_node *new_op = NULL;
3863 dbg_info *dbgi = get_irn_dbg_info(node);
3864 ir_mode *src_mode = get_irn_mode(op);
3865 ir_mode *tgt_mode = get_irn_mode(node);
3866 int src_bits = get_mode_size_bits(src_mode);
3867 int tgt_bits = get_mode_size_bits(tgt_mode);
3868 ir_node *res = NULL;
3870 assert(!mode_is_int(src_mode) || src_bits <= 32);
3871 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3873 /* modeB -> X should already be lowered by the lower_mode_b pass */
3874 if (src_mode == mode_b) {
3875 panic("ConvB not lowered %+F", node);
3878 if (src_mode == tgt_mode) {
3879 if (get_Conv_strict(node)) {
3880 if (ia32_cg_config.use_sse2) {
3881 /* when we are in SSE mode, we can kill all strict no-op conversion */
3882 return be_transform_node(op);
3885 /* this should be optimized already, but who knows... */
3886 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3887 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3888 return be_transform_node(op);
3892 if (mode_is_float(src_mode)) {
3893 new_op = be_transform_node(op);
3894 /* we convert from float ... */
3895 if (mode_is_float(tgt_mode)) {
3897 if (ia32_cg_config.use_sse2) {
3898 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3899 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3901 set_ia32_ls_mode(res, tgt_mode);
3903 if (get_Conv_strict(node)) {
3904 /* if fp_no_float_fold is not set then we assume that we
3905 * don't have any float operations in a non
3906 * mode_float_arithmetic mode and can skip strict upconvs */
3907 if (src_bits < tgt_bits) {
3908 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3911 res = gen_x87_strict_conv(tgt_mode, new_op);
3912 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3916 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3921 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3922 if (ia32_cg_config.use_sse2) {
3923 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3925 set_ia32_ls_mode(res, src_mode);
3927 return gen_x87_fp_to_gp(node);
3931 /* we convert from int ... */
3932 if (mode_is_float(tgt_mode)) {
3934 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3935 if (ia32_cg_config.use_sse2) {
3936 new_op = be_transform_node(op);
3937 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3939 set_ia32_ls_mode(res, tgt_mode);
3941 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3942 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3943 res = gen_x87_gp_to_fp(node, src_mode);
3945 /* we need a strict-Conv, if the int mode has more bits than the
3947 if (float_mantissa < int_mantissa) {
3948 res = gen_x87_strict_conv(tgt_mode, res);
3949 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3953 } else if (tgt_mode == mode_b) {
3954 /* mode_b lowering already took care that we only have 0/1 values */
3955 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3956 src_mode, tgt_mode));
3957 return be_transform_node(op);
3960 if (src_bits == tgt_bits) {
3961 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3962 src_mode, tgt_mode));
3963 return be_transform_node(op);
3966 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3974 static ir_node *create_immediate_or_transform(ir_node *node,
3975 char immediate_constraint_type)
3977 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3978 if (new_node == NULL) {
3979 new_node = be_transform_node(node);
3985 * Transforms a FrameAddr into an ia32 Add.
3987 static ir_node *gen_be_FrameAddr(ir_node *node)
3989 ir_node *block = be_transform_node(get_nodes_block(node));
3990 ir_node *op = be_get_FrameAddr_frame(node);
3991 ir_node *new_op = be_transform_node(op);
3992 dbg_info *dbgi = get_irn_dbg_info(node);
3995 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3996 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3997 set_ia32_use_frame(new_node);
3999 SET_IA32_ORIG_NODE(new_node, node);
4005 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
4007 static ir_node *gen_be_Return(ir_node *node)
4009 ir_graph *irg = current_ir_graph;
4010 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4011 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4012 ir_node *new_ret_val = be_transform_node(ret_val);
4013 ir_node *new_ret_mem = be_transform_node(ret_mem);
4014 ir_entity *ent = get_irg_entity(irg);
4015 ir_type *tp = get_entity_type(ent);
4016 dbg_info *dbgi = get_irn_dbg_info(node);
4017 ir_node *block = be_transform_node(get_nodes_block(node));
4031 assert(ret_val != NULL);
4032 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4033 return be_duplicate_node(node);
4036 res_type = get_method_res_type(tp, 0);
4038 if (! is_Primitive_type(res_type)) {
4039 return be_duplicate_node(node);
4042 mode = get_type_mode(res_type);
4043 if (! mode_is_float(mode)) {
4044 return be_duplicate_node(node);
4047 assert(get_method_n_ress(tp) == 1);
4049 frame = get_irg_frame(irg);
4051 /* store xmm0 onto stack */
4052 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4053 new_ret_mem, new_ret_val);
4054 set_ia32_ls_mode(sse_store, mode);
4055 set_ia32_op_type(sse_store, ia32_AddrModeD);
4056 set_ia32_use_frame(sse_store);
4057 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4059 /* load into x87 register */
4060 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4061 set_ia32_op_type(fld, ia32_AddrModeS);
4062 set_ia32_use_frame(fld);
4064 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4065 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4067 /* create a new return */
4068 arity = get_irn_arity(node);
4069 in = ALLOCAN(ir_node*, arity);
4070 pop = be_Return_get_pop(node);
4071 for (i = 0; i < arity; ++i) {
4072 ir_node *op = get_irn_n(node, i);
4073 if (op == ret_val) {
4075 } else if (op == ret_mem) {
4078 in[i] = be_transform_node(op);
4081 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4082 copy_node_attr(irg, node, new_node);
4088 * Transform a be_AddSP into an ia32_SubSP.
4090 static ir_node *gen_be_AddSP(ir_node *node)
4092 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4093 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4095 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4096 match_am | match_immediate);
4097 assert(is_ia32_SubSP(new_node));
4098 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4099 &ia32_registers[REG_ESP]);
4104 * Transform a be_SubSP into an ia32_AddSP
4106 static ir_node *gen_be_SubSP(ir_node *node)
4108 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4109 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4111 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4112 match_am | match_immediate);
4113 assert(is_ia32_AddSP(new_node));
4114 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4115 &ia32_registers[REG_ESP]);
4120 * Change some phi modes
4122 static ir_node *gen_Phi(ir_node *node)
4124 const arch_register_req_t *req;
4125 ir_node *block = be_transform_node(get_nodes_block(node));
4126 ir_graph *irg = current_ir_graph;
4127 dbg_info *dbgi = get_irn_dbg_info(node);
4128 ir_mode *mode = get_irn_mode(node);
4131 if (ia32_mode_needs_gp_reg(mode)) {
4132 /* we shouldn't have any 64bit stuff around anymore */
4133 assert(get_mode_size_bits(mode) <= 32);
4134 /* all integer operations are on 32bit registers now */
4136 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4137 } else if (mode_is_float(mode)) {
4138 if (ia32_cg_config.use_sse2) {
4140 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4143 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4146 req = arch_no_register_req;
4149 /* phi nodes allow loops, so we use the old arguments for now
4150 * and fix this later */
4151 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4152 get_irn_in(node) + 1);
4153 copy_node_attr(irg, node, phi);
4154 be_duplicate_deps(node, phi);
4156 arch_set_irn_register_req_out(phi, 0, req);
4158 be_enqueue_preds(node);
4163 static ir_node *gen_Jmp(ir_node *node)
4165 ir_node *block = get_nodes_block(node);
4166 ir_node *new_block = be_transform_node(block);
4167 dbg_info *dbgi = get_irn_dbg_info(node);
4170 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4171 SET_IA32_ORIG_NODE(new_node, node);
4179 static ir_node *gen_IJmp(ir_node *node)
4181 ir_node *block = get_nodes_block(node);
4182 ir_node *new_block = be_transform_node(block);
4183 dbg_info *dbgi = get_irn_dbg_info(node);
4184 ir_node *op = get_IJmp_target(node);
4186 ia32_address_mode_t am;
4187 ia32_address_t *addr = &am.addr;
4189 assert(get_irn_mode(op) == mode_P);
4191 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4193 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4194 addr->mem, am.new_op2);
4195 set_am_attributes(new_node, &am);
4196 SET_IA32_ORIG_NODE(new_node, node);
4198 new_node = fix_mem_proj(new_node, &am);
4203 static ir_node *gen_ia32_l_Add(ir_node *node)
4205 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4206 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4207 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4208 match_commutative | match_am | match_immediate |
4209 match_mode_neutral);
4211 if (is_Proj(lowered)) {
4212 lowered = get_Proj_pred(lowered);
4214 assert(is_ia32_Add(lowered));
4215 set_irn_mode(lowered, mode_T);
4221 static ir_node *gen_ia32_l_Adc(ir_node *node)
4223 return gen_binop_flags(node, new_bd_ia32_Adc,
4224 match_commutative | match_am | match_immediate |
4225 match_mode_neutral);
4229 * Transforms a l_MulS into a "real" MulS node.
4231 * @return the created ia32 Mul node
4233 static ir_node *gen_ia32_l_Mul(ir_node *node)
4235 ir_node *left = get_binop_left(node);
4236 ir_node *right = get_binop_right(node);
4238 return gen_binop(node, left, right, new_bd_ia32_Mul,
4239 match_commutative | match_am | match_mode_neutral);
4243 * Transforms a l_IMulS into a "real" IMul1OPS node.
4245 * @return the created ia32 IMul1OP node
4247 static ir_node *gen_ia32_l_IMul(ir_node *node)
4249 ir_node *left = get_binop_left(node);
4250 ir_node *right = get_binop_right(node);
4252 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4253 match_commutative | match_am | match_mode_neutral);
4256 static ir_node *gen_ia32_l_Sub(ir_node *node)
4258 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4259 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4260 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4261 match_am | match_immediate | match_mode_neutral);
4263 if (is_Proj(lowered)) {
4264 lowered = get_Proj_pred(lowered);
4266 assert(is_ia32_Sub(lowered));
4267 set_irn_mode(lowered, mode_T);
4273 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4275 return gen_binop_flags(node, new_bd_ia32_Sbb,
4276 match_am | match_immediate | match_mode_neutral);
4279 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4281 ir_node *src_block = get_nodes_block(node);
4282 ir_node *block = be_transform_node(src_block);
4283 ir_graph *irg = current_ir_graph;
4284 dbg_info *dbgi = get_irn_dbg_info(node);
4285 ir_node *frame = get_irg_frame(irg);
4286 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4287 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4288 ir_node *new_val_low = be_transform_node(val_low);
4289 ir_node *new_val_high = be_transform_node(val_high);
4291 ir_node *sync, *fild, *res;
4293 ir_node *store_high;
4297 if (ia32_cg_config.use_sse2) {
4298 panic("ia32_l_LLtoFloat not implemented for SSE2");
4302 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4304 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4306 SET_IA32_ORIG_NODE(store_low, node);
4307 SET_IA32_ORIG_NODE(store_high, node);
4309 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4310 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4312 set_ia32_use_frame(store_low);
4313 set_ia32_use_frame(store_high);
4314 set_ia32_op_type(store_low, ia32_AddrModeD);
4315 set_ia32_op_type(store_high, ia32_AddrModeD);
4316 set_ia32_ls_mode(store_low, mode_Iu);
4317 set_ia32_ls_mode(store_high, mode_Is);
4318 add_ia32_am_offs_int(store_high, 4);
4322 sync = new_rd_Sync(dbgi, block, 2, in);
4325 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4327 set_ia32_use_frame(fild);
4328 set_ia32_op_type(fild, ia32_AddrModeS);
4329 set_ia32_ls_mode(fild, mode_Ls);
4331 SET_IA32_ORIG_NODE(fild, node);
4333 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4335 if (! mode_is_signed(get_irn_mode(val_high))) {
4336 ia32_address_mode_t am;
4338 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4341 am.addr.base = get_symconst_base();
4342 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4343 am.addr.mem = nomem;
4346 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4347 am.addr.tls_segment = false;
4348 am.addr.use_frame = 0;
4349 am.addr.frame_entity = NULL;
4350 am.addr.symconst_sign = 0;
4351 am.ls_mode = mode_F;
4352 am.mem_proj = nomem;
4353 am.op_type = ia32_AddrModeS;
4355 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4356 am.pinned = op_pin_state_floats;
4358 am.ins_permuted = false;
4360 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4361 am.new_op1, am.new_op2, get_fpcw());
4362 set_am_attributes(fadd, &am);
4364 set_irn_mode(fadd, mode_T);
4365 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4370 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4372 ir_node *src_block = get_nodes_block(node);
4373 ir_node *block = be_transform_node(src_block);
4374 ir_graph *irg = get_Block_irg(block);
4375 dbg_info *dbgi = get_irn_dbg_info(node);
4376 ir_node *frame = get_irg_frame(irg);
4377 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4378 ir_node *new_val = be_transform_node(val);
4381 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4382 SET_IA32_ORIG_NODE(fist, node);
4383 set_ia32_use_frame(fist);
4384 set_ia32_op_type(fist, ia32_AddrModeD);
4385 set_ia32_ls_mode(fist, mode_Ls);
4387 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4388 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4391 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4393 ir_node *block = be_transform_node(get_nodes_block(node));
4394 ir_graph *irg = get_Block_irg(block);
4395 ir_node *pred = get_Proj_pred(node);
4396 ir_node *new_pred = be_transform_node(pred);
4397 ir_node *frame = get_irg_frame(irg);
4398 dbg_info *dbgi = get_irn_dbg_info(node);
4399 long pn = get_Proj_proj(node);
4404 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4405 SET_IA32_ORIG_NODE(load, node);
4406 set_ia32_use_frame(load);
4407 set_ia32_op_type(load, ia32_AddrModeS);
4408 set_ia32_ls_mode(load, mode_Iu);
4409 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4410 * 32 bit from it with this particular load */
4411 attr = get_ia32_attr(load);
4412 attr->data.need_64bit_stackent = 1;
4414 if (pn == pn_ia32_l_FloattoLL_res_high) {
4415 add_ia32_am_offs_int(load, 4);
4417 assert(pn == pn_ia32_l_FloattoLL_res_low);
4420 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4426 * Transform the Projs of an AddSP.
4428 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4430 ir_node *pred = get_Proj_pred(node);
4431 ir_node *new_pred = be_transform_node(pred);
4432 dbg_info *dbgi = get_irn_dbg_info(node);
4433 long proj = get_Proj_proj(node);
4435 if (proj == pn_be_AddSP_sp) {
4436 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4437 pn_ia32_SubSP_stack);
4438 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4440 } else if (proj == pn_be_AddSP_res) {
4441 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4442 pn_ia32_SubSP_addr);
4443 } else if (proj == pn_be_AddSP_M) {
4444 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4447 panic("No idea how to transform proj->AddSP");
4451 * Transform the Projs of a SubSP.
4453 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4455 ir_node *pred = get_Proj_pred(node);
4456 ir_node *new_pred = be_transform_node(pred);
4457 dbg_info *dbgi = get_irn_dbg_info(node);
4458 long proj = get_Proj_proj(node);
4460 if (proj == pn_be_SubSP_sp) {
4461 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4462 pn_ia32_AddSP_stack);
4463 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4465 } else if (proj == pn_be_SubSP_M) {
4466 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4469 panic("No idea how to transform proj->SubSP");
4473 * Transform and renumber the Projs from a Load.
4475 static ir_node *gen_Proj_Load(ir_node *node)
4478 ir_node *pred = get_Proj_pred(node);
4479 dbg_info *dbgi = get_irn_dbg_info(node);
4480 long proj = get_Proj_proj(node);
4482 /* loads might be part of source address mode matches, so we don't
4483 * transform the ProjMs yet (with the exception of loads whose result is
4486 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4489 /* this is needed, because sometimes we have loops that are only
4490 reachable through the ProjM */
4491 be_enqueue_preds(node);
4492 /* do it in 2 steps, to silence firm verifier */
4493 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4494 set_Proj_proj(res, pn_ia32_mem);
4498 /* renumber the proj */
4499 new_pred = be_transform_node(pred);
4500 if (is_ia32_Load(new_pred)) {
4501 switch ((pn_Load)proj) {
4503 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4505 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4506 case pn_Load_X_except:
4507 /* This Load might raise an exception. Mark it. */
4508 set_ia32_exc_label(new_pred, 1);
4509 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4510 case pn_Load_X_regular:
4511 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4513 } else if (is_ia32_Conv_I2I(new_pred) ||
4514 is_ia32_Conv_I2I8Bit(new_pred)) {
4515 set_irn_mode(new_pred, mode_T);
4516 switch ((pn_Load)proj) {
4518 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4520 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4521 case pn_Load_X_except:
4522 /* This Load might raise an exception. Mark it. */
4523 set_ia32_exc_label(new_pred, 1);
4524 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4525 case pn_Load_X_regular:
4526 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4528 } else if (is_ia32_xLoad(new_pred)) {
4529 switch ((pn_Load)proj) {
4531 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4533 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4534 case pn_Load_X_except:
4535 /* This Load might raise an exception. Mark it. */
4536 set_ia32_exc_label(new_pred, 1);
4537 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4538 case pn_Load_X_regular:
4539 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4541 } else if (is_ia32_vfld(new_pred)) {
4542 switch ((pn_Load)proj) {
4544 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4546 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4547 case pn_Load_X_except:
4548 /* This Load might raise an exception. Mark it. */
4549 set_ia32_exc_label(new_pred, 1);
4550 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4551 case pn_Load_X_regular:
4552 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4555 /* can happen for ProJMs when source address mode happened for the
4558 /* however it should not be the result proj, as that would mean the
4559 load had multiple users and should not have been used for
4561 if (proj != pn_Load_M) {
4562 panic("internal error: transformed node not a Load");
4564 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4567 panic("No idea how to transform Proj(Load) %+F", node);
4570 static ir_node *gen_Proj_Store(ir_node *node)
4572 ir_node *pred = get_Proj_pred(node);
4573 ir_node *new_pred = be_transform_node(pred);
4574 dbg_info *dbgi = get_irn_dbg_info(node);
4575 long pn = get_Proj_proj(node);
4577 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4578 switch ((pn_Store)pn) {
4580 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4581 case pn_Store_X_except:
4582 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4583 case pn_Store_X_regular:
4584 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4586 } else if (is_ia32_vfist(new_pred)) {
4587 switch ((pn_Store)pn) {
4589 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4590 case pn_Store_X_except:
4591 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4592 case pn_Store_X_regular:
4593 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4595 } else if (is_ia32_vfisttp(new_pred)) {
4596 switch ((pn_Store)pn) {
4598 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4599 case pn_Store_X_except:
4600 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4601 case pn_Store_X_regular:
4602 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4604 } else if (is_ia32_vfst(new_pred)) {
4605 switch ((pn_Store)pn) {
4607 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4608 case pn_Store_X_except:
4609 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4610 case pn_Store_X_regular:
4611 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4613 } else if (is_ia32_xStore(new_pred)) {
4614 switch ((pn_Store)pn) {
4616 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4617 case pn_Store_X_except:
4618 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4619 case pn_Store_X_regular:
4620 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4622 } else if (is_Sync(new_pred)) {
4623 /* hack for the case that gen_float_const_Store produced a Sync */
4624 if (pn == pn_Store_M) {
4627 panic("exception control flow for gen_float_const_Store not implemented yet");
4628 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4629 /* destination address mode */
4630 if (pn == pn_Store_M) {
4633 panic("exception control flow for destination AM not implemented yet");
4636 panic("No idea how to transform Proj(Store) %+F", node);
4640 * Transform and renumber the Projs from a Div or Mod instruction.
4642 static ir_node *gen_Proj_Div(ir_node *node)
4644 ir_node *pred = get_Proj_pred(node);
4645 ir_node *new_pred = be_transform_node(pred);
4646 dbg_info *dbgi = get_irn_dbg_info(node);
4647 long proj = get_Proj_proj(node);
4649 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4650 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4652 switch ((pn_Div)proj) {
4654 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4655 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4656 } else if (is_ia32_xDiv(new_pred)) {
4657 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4658 } else if (is_ia32_vfdiv(new_pred)) {
4659 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4661 panic("Div transformed to unexpected thing %+F", new_pred);
4664 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4665 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4666 } else if (is_ia32_xDiv(new_pred)) {
4667 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4668 } else if (is_ia32_vfdiv(new_pred)) {
4669 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4671 panic("Div transformed to unexpected thing %+F", new_pred);
4673 case pn_Div_X_except:
4674 set_ia32_exc_label(new_pred, 1);
4675 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4676 case pn_Div_X_regular:
4677 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4680 panic("No idea how to transform proj->Div");
4684 * Transform and renumber the Projs from a Div or Mod instruction.
4686 static ir_node *gen_Proj_Mod(ir_node *node)
4688 ir_node *pred = get_Proj_pred(node);
4689 ir_node *new_pred = be_transform_node(pred);
4690 dbg_info *dbgi = get_irn_dbg_info(node);
4691 long proj = get_Proj_proj(node);
4693 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4694 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4695 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4697 switch ((pn_Mod)proj) {
4699 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4701 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4702 case pn_Mod_X_except:
4703 set_ia32_exc_label(new_pred, 1);
4704 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4705 case pn_Mod_X_regular:
4706 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4708 panic("No idea how to transform proj->Mod");
4712 * Transform and renumber the Projs from a CopyB.
4714 static ir_node *gen_Proj_CopyB(ir_node *node)
4716 ir_node *pred = get_Proj_pred(node);
4717 ir_node *new_pred = be_transform_node(pred);
4718 dbg_info *dbgi = get_irn_dbg_info(node);
4719 long proj = get_Proj_proj(node);
4721 switch ((pn_CopyB)proj) {
4723 if (is_ia32_CopyB_i(new_pred)) {
4724 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4725 } else if (is_ia32_CopyB(new_pred)) {
4726 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4729 case pn_CopyB_X_regular:
4730 if (is_ia32_CopyB_i(new_pred)) {
4731 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4732 } else if (is_ia32_CopyB(new_pred)) {
4733 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4736 case pn_CopyB_X_except:
4737 if (is_ia32_CopyB_i(new_pred)) {
4738 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4739 } else if (is_ia32_CopyB(new_pred)) {
4740 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4745 panic("No idea how to transform proj->CopyB");
4748 static ir_node *gen_be_Call(ir_node *node)
4750 dbg_info *const dbgi = get_irn_dbg_info(node);
4751 ir_node *const src_block = get_nodes_block(node);
4752 ir_node *const block = be_transform_node(src_block);
4753 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4754 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4755 ir_node *const sp = be_transform_node(src_sp);
4756 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4757 ia32_address_mode_t am;
4758 ia32_address_t *const addr = &am.addr;
4763 ir_node * eax = noreg_GP;
4764 ir_node * ecx = noreg_GP;
4765 ir_node * edx = noreg_GP;
4766 unsigned const pop = be_Call_get_pop(node);
4767 ir_type *const call_tp = be_Call_get_type(node);
4768 int old_no_pic_adjust;
4769 int throws_exception = ir_throws_exception(node);
4771 /* Run the x87 simulator if the call returns a float value */
4772 if (get_method_n_ress(call_tp) > 0) {
4773 ir_type *const res_type = get_method_res_type(call_tp, 0);
4774 ir_mode *const res_mode = get_type_mode(res_type);
4776 if (res_mode != NULL && mode_is_float(res_mode)) {
4777 ir_graph *irg = current_ir_graph;
4778 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4779 irg_data->do_x87_sim = 1;
4783 /* We do not want be_Call direct calls */
4784 assert(be_Call_get_entity(node) == NULL);
4786 /* special case for PIC trampoline calls */
4787 old_no_pic_adjust = ia32_no_pic_adjust;
4788 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4790 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4791 match_am | match_immediate);
4793 ia32_no_pic_adjust = old_no_pic_adjust;
4795 i = get_irn_arity(node) - 1;
4796 fpcw = be_transform_node(get_irn_n(node, i--));
4797 for (; i >= n_be_Call_first_arg; --i) {
4798 arch_register_req_t const *const req
4799 = arch_get_irn_register_req_in(node, i);
4800 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4802 assert(req->type == arch_register_req_type_limited);
4803 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4805 switch (*req->limited) {
4806 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4807 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4808 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4809 default: panic("Invalid GP register for register parameter");
4813 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4814 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4815 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4816 ir_set_throws_exception(call, throws_exception);
4817 set_am_attributes(call, &am);
4818 call = fix_mem_proj(call, &am);
4820 if (get_irn_pinned(node) == op_pin_state_pinned)
4821 set_irn_pinned(call, op_pin_state_pinned);
4823 SET_IA32_ORIG_NODE(call, node);
4825 if (ia32_cg_config.use_sse2) {
4826 /* remember this call for post-processing */
4827 ARR_APP1(ir_node *, call_list, call);
4828 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4835 * Transform Builtin trap
4837 static ir_node *gen_trap(ir_node *node)
4839 dbg_info *dbgi = get_irn_dbg_info(node);
4840 ir_node *block = be_transform_node(get_nodes_block(node));
4841 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4843 return new_bd_ia32_UD2(dbgi, block, mem);
4847 * Transform Builtin debugbreak
4849 static ir_node *gen_debugbreak(ir_node *node)
4851 dbg_info *dbgi = get_irn_dbg_info(node);
4852 ir_node *block = be_transform_node(get_nodes_block(node));
4853 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4855 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4859 * Transform Builtin return_address
4861 static ir_node *gen_return_address(ir_node *node)
4863 ir_node *param = get_Builtin_param(node, 0);
4864 ir_node *frame = get_Builtin_param(node, 1);
4865 dbg_info *dbgi = get_irn_dbg_info(node);
4866 ir_tarval *tv = get_Const_tarval(param);
4867 ir_graph *irg = get_irn_irg(node);
4868 unsigned long value = get_tarval_long(tv);
4870 ir_node *block = be_transform_node(get_nodes_block(node));
4871 ir_node *ptr = be_transform_node(frame);
4875 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4876 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4877 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4880 /* load the return address from this frame */
4881 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4883 set_irn_pinned(load, get_irn_pinned(node));
4884 set_ia32_op_type(load, ia32_AddrModeS);
4885 set_ia32_ls_mode(load, mode_Iu);
4887 set_ia32_am_offs_int(load, 0);
4888 set_ia32_use_frame(load);
4889 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4891 if (get_irn_pinned(node) == op_pin_state_floats) {
4892 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4893 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4894 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4895 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4898 SET_IA32_ORIG_NODE(load, node);
4899 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4903 * Transform Builtin frame_address
4905 static ir_node *gen_frame_address(ir_node *node)
4907 ir_node *param = get_Builtin_param(node, 0);
4908 ir_node *frame = get_Builtin_param(node, 1);
4909 dbg_info *dbgi = get_irn_dbg_info(node);
4910 ir_tarval *tv = get_Const_tarval(param);
4911 ir_graph *irg = get_irn_irg(node);
4912 unsigned long value = get_tarval_long(tv);
4914 ir_node *block = be_transform_node(get_nodes_block(node));
4915 ir_node *ptr = be_transform_node(frame);
4920 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4921 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4922 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4925 /* load the frame address from this frame */
4926 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4928 set_irn_pinned(load, get_irn_pinned(node));
4929 set_ia32_op_type(load, ia32_AddrModeS);
4930 set_ia32_ls_mode(load, mode_Iu);
4932 ent = ia32_get_frame_address_entity(irg);
4934 set_ia32_am_offs_int(load, 0);
4935 set_ia32_use_frame(load);
4936 set_ia32_frame_ent(load, ent);
4938 /* will fail anyway, but gcc does this: */
4939 set_ia32_am_offs_int(load, 0);
4942 if (get_irn_pinned(node) == op_pin_state_floats) {
4943 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4944 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4945 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4946 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4949 SET_IA32_ORIG_NODE(load, node);
4950 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4954 * Transform Builtin frame_address
4956 static ir_node *gen_prefetch(ir_node *node)
4959 ir_node *ptr, *block, *mem, *base, *idx;
4960 ir_node *param, *new_node;
4963 ia32_address_t addr;
4965 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4966 /* no prefetch at all, route memory */
4967 return be_transform_node(get_Builtin_mem(node));
4970 param = get_Builtin_param(node, 1);
4971 tv = get_Const_tarval(param);
4972 rw = get_tarval_long(tv);
4974 /* construct load address */
4975 memset(&addr, 0, sizeof(addr));
4976 ptr = get_Builtin_param(node, 0);
4977 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4984 base = be_transform_node(base);
4990 idx = be_transform_node(idx);
4993 dbgi = get_irn_dbg_info(node);
4994 block = be_transform_node(get_nodes_block(node));
4995 mem = be_transform_node(get_Builtin_mem(node));
4997 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4998 /* we have 3DNow!, this was already checked above */
4999 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
5000 } else if (ia32_cg_config.use_sse_prefetch) {
5001 /* note: rw == 1 is IGNORED in that case */
5002 param = get_Builtin_param(node, 2);
5003 tv = get_Const_tarval(param);
5004 locality = get_tarval_long(tv);
5006 /* SSE style prefetch */
5009 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
5012 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
5015 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
5018 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
5022 assert(ia32_cg_config.use_3dnow_prefetch);
5023 /* 3DNow! style prefetch */
5024 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
5027 set_irn_pinned(new_node, get_irn_pinned(node));
5028 set_ia32_op_type(new_node, ia32_AddrModeS);
5029 set_ia32_ls_mode(new_node, mode_Bu);
5030 set_address(new_node, &addr);
5032 SET_IA32_ORIG_NODE(new_node, node);
5034 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5038 * Transform bsf like node
5040 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5042 ir_node *param = get_Builtin_param(node, 0);
5043 dbg_info *dbgi = get_irn_dbg_info(node);
5045 ir_node *block = get_nodes_block(node);
5046 ir_node *new_block = be_transform_node(block);
5048 ia32_address_mode_t am;
5049 ia32_address_t *addr = &am.addr;
5052 match_arguments(&am, block, NULL, param, NULL, match_am);
5054 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5055 set_am_attributes(cnt, &am);
5056 set_ia32_ls_mode(cnt, get_irn_mode(param));
5058 SET_IA32_ORIG_NODE(cnt, node);
5059 return fix_mem_proj(cnt, &am);
5063 * Transform builtin ffs.
5065 static ir_node *gen_ffs(ir_node *node)
5067 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5068 ir_node *real = skip_Proj(bsf);
5069 dbg_info *dbgi = get_irn_dbg_info(real);
5070 ir_node *block = get_nodes_block(real);
5071 ir_node *flag, *set, *conv, *neg, *orn, *add;
5074 if (get_irn_mode(real) != mode_T) {
5075 set_irn_mode(real, mode_T);
5076 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5079 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5082 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5083 SET_IA32_ORIG_NODE(set, node);
5086 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5087 SET_IA32_ORIG_NODE(conv, node);
5090 neg = new_bd_ia32_Neg(dbgi, block, conv);
5093 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5094 set_ia32_ls_mode(orn, mode_Iu);
5095 set_ia32_commutative(orn);
5098 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5099 add_ia32_am_offs_int(add, 1);
5104 * Transform builtin clz.
5106 static ir_node *gen_clz(ir_node *node)
5108 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5109 ir_node *real = skip_Proj(bsr);
5110 dbg_info *dbgi = get_irn_dbg_info(real);
5111 ir_node *block = get_nodes_block(real);
5112 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5114 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5118 * Transform builtin ctz.
5120 static ir_node *gen_ctz(ir_node *node)
5122 return gen_unop_AM(node, new_bd_ia32_Bsf);
5126 * Transform builtin parity.
5128 static ir_node *gen_parity(ir_node *node)
5130 dbg_info *dbgi = get_irn_dbg_info(node);
5131 ir_node *block = get_nodes_block(node);
5132 ir_node *new_block = be_transform_node(block);
5133 ir_node *param = get_Builtin_param(node, 0);
5134 ir_node *new_param = be_transform_node(param);
5137 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5138 * so we have to do complicated xoring first.
5139 * (we should also better lower this before the backend so we still have a
5140 * chance for CSE, constant folding and other goodies for some of these
5143 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5144 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5145 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5147 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5150 set_ia32_ls_mode(xor, mode_Iu);
5151 set_ia32_commutative(xor);
5153 set_irn_mode(xor2, mode_T);
5154 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5157 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5158 SET_IA32_ORIG_NODE(new_node, node);
5161 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5162 nomem, new_node, mode_Bu);
5163 SET_IA32_ORIG_NODE(new_node, node);
5168 * Transform builtin popcount
5170 static ir_node *gen_popcount(ir_node *node)
5172 ir_node *param = get_Builtin_param(node, 0);
5173 dbg_info *dbgi = get_irn_dbg_info(node);
5175 ir_node *block = get_nodes_block(node);
5176 ir_node *new_block = be_transform_node(block);
5179 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5181 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5182 if (ia32_cg_config.use_popcnt) {
5183 ia32_address_mode_t am;
5184 ia32_address_t *addr = &am.addr;
5187 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5189 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5190 set_am_attributes(cnt, &am);
5191 set_ia32_ls_mode(cnt, get_irn_mode(param));
5193 SET_IA32_ORIG_NODE(cnt, node);
5194 return fix_mem_proj(cnt, &am);
5197 new_param = be_transform_node(param);
5199 /* do the standard popcount algo */
5200 /* TODO: This is stupid, we should transform this before the backend,
5201 * to get CSE, localopts, etc. for the operations
5202 * TODO: This is also not the optimal algorithm (it is just the starting
5203 * example in hackers delight, they optimize it more on the following page)
5204 * But I'm too lazy to fix this now, as the code should get lowered before
5205 * the backend anyway.
5208 /* m1 = x & 0x55555555 */
5209 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5210 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5213 simm = ia32_create_Immediate(NULL, 0, 1);
5214 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5216 /* m2 = s1 & 0x55555555 */
5217 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5220 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5222 /* m4 = m3 & 0x33333333 */
5223 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5224 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5227 simm = ia32_create_Immediate(NULL, 0, 2);
5228 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5230 /* m5 = s2 & 0x33333333 */
5231 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5234 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5236 /* m7 = m6 & 0x0F0F0F0F */
5237 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5238 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5241 simm = ia32_create_Immediate(NULL, 0, 4);
5242 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5244 /* m8 = s3 & 0x0F0F0F0F */
5245 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5248 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5250 /* m10 = m9 & 0x00FF00FF */
5251 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5252 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5255 simm = ia32_create_Immediate(NULL, 0, 8);
5256 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5258 /* m11 = s4 & 0x00FF00FF */
5259 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5261 /* m12 = m10 + m11 */
5262 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5264 /* m13 = m12 & 0x0000FFFF */
5265 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5266 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5268 /* s5 = m12 >> 16 */
5269 simm = ia32_create_Immediate(NULL, 0, 16);
5270 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5272 /* res = m13 + s5 */
5273 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5277 * Transform builtin byte swap.
5279 static ir_node *gen_bswap(ir_node *node)
5281 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5282 dbg_info *dbgi = get_irn_dbg_info(node);
5284 ir_node *block = get_nodes_block(node);
5285 ir_node *new_block = be_transform_node(block);
5286 ir_mode *mode = get_irn_mode(param);
5287 unsigned size = get_mode_size_bits(mode);
5291 if (ia32_cg_config.use_bswap) {
5292 /* swap available */
5293 return new_bd_ia32_Bswap(dbgi, new_block, param);
5295 ir_node *i8 = ia32_create_Immediate(NULL, 0, 8);
5296 ir_node *rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5297 ir_node *i16 = ia32_create_Immediate(NULL, 0, 16);
5298 ir_node *rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5299 ir_node *rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5300 set_ia32_ls_mode(rol1, mode_Hu);
5301 set_ia32_ls_mode(rol2, mode_Iu);
5302 set_ia32_ls_mode(rol3, mode_Hu);
5307 /* swap16 always available */
5308 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5311 panic("Invalid bswap size (%d)", size);
5316 * Transform builtin outport.
5318 static ir_node *gen_outport(ir_node *node)
5320 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5321 ir_node *oldv = get_Builtin_param(node, 1);
5322 ir_mode *mode = get_irn_mode(oldv);
5323 ir_node *value = be_transform_node(oldv);
5324 ir_node *block = be_transform_node(get_nodes_block(node));
5325 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5326 dbg_info *dbgi = get_irn_dbg_info(node);
5328 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5329 set_ia32_ls_mode(res, mode);
5334 * Transform builtin inport.
5336 static ir_node *gen_inport(ir_node *node)
5338 ir_type *tp = get_Builtin_type(node);
5339 ir_type *rstp = get_method_res_type(tp, 0);
5340 ir_mode *mode = get_type_mode(rstp);
5341 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5342 ir_node *block = be_transform_node(get_nodes_block(node));
5343 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5344 dbg_info *dbgi = get_irn_dbg_info(node);
5346 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5347 set_ia32_ls_mode(res, mode);
5349 /* check for missing Result Proj */
5354 * Transform a builtin inner trampoline
5356 static ir_node *gen_inner_trampoline(ir_node *node)
5358 ir_node *ptr = get_Builtin_param(node, 0);
5359 ir_node *callee = get_Builtin_param(node, 1);
5360 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5361 ir_node *mem = get_Builtin_mem(node);
5362 ir_node *block = get_nodes_block(node);
5363 ir_node *new_block = be_transform_node(block);
5367 ir_node *trampoline;
5369 dbg_info *dbgi = get_irn_dbg_info(node);
5370 ia32_address_t addr;
5372 /* construct store address */
5373 memset(&addr, 0, sizeof(addr));
5374 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5376 if (addr.base == NULL) {
5377 addr.base = noreg_GP;
5379 addr.base = be_transform_node(addr.base);
5382 if (addr.index == NULL) {
5383 addr.index = noreg_GP;
5385 addr.index = be_transform_node(addr.index);
5387 addr.mem = be_transform_node(mem);
5389 /* mov ecx, <env> */
5390 val = ia32_create_Immediate(NULL, 0, 0xB9);
5391 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5392 addr.index, addr.mem, val);
5393 set_irn_pinned(store, get_irn_pinned(node));
5394 set_ia32_op_type(store, ia32_AddrModeD);
5395 set_ia32_ls_mode(store, mode_Bu);
5396 set_address(store, &addr);
5400 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5401 addr.index, addr.mem, env);
5402 set_irn_pinned(store, get_irn_pinned(node));
5403 set_ia32_op_type(store, ia32_AddrModeD);
5404 set_ia32_ls_mode(store, mode_Iu);
5405 set_address(store, &addr);
5409 /* jmp rel <callee> */
5410 val = ia32_create_Immediate(NULL, 0, 0xE9);
5411 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5412 addr.index, addr.mem, val);
5413 set_irn_pinned(store, get_irn_pinned(node));
5414 set_ia32_op_type(store, ia32_AddrModeD);
5415 set_ia32_ls_mode(store, mode_Bu);
5416 set_address(store, &addr);
5420 trampoline = be_transform_node(ptr);
5422 /* the callee is typically an immediate */
5423 if (is_SymConst(callee)) {
5424 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5426 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5427 add_ia32_am_offs_int(rel, -10);
5429 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5431 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5432 addr.index, addr.mem, rel);
5433 set_irn_pinned(store, get_irn_pinned(node));
5434 set_ia32_op_type(store, ia32_AddrModeD);
5435 set_ia32_ls_mode(store, mode_Iu);
5436 set_address(store, &addr);
5441 return new_r_Tuple(new_block, 2, in);
5445 * Transform Builtin node.
5447 static ir_node *gen_Builtin(ir_node *node)
5449 ir_builtin_kind kind = get_Builtin_kind(node);
5453 return gen_trap(node);
5454 case ir_bk_debugbreak:
5455 return gen_debugbreak(node);
5456 case ir_bk_return_address:
5457 return gen_return_address(node);
5458 case ir_bk_frame_address:
5459 return gen_frame_address(node);
5460 case ir_bk_prefetch:
5461 return gen_prefetch(node);
5463 return gen_ffs(node);
5465 return gen_clz(node);
5467 return gen_ctz(node);
5469 return gen_parity(node);
5470 case ir_bk_popcount:
5471 return gen_popcount(node);
5473 return gen_bswap(node);
5475 return gen_outport(node);
5477 return gen_inport(node);
5478 case ir_bk_inner_trampoline:
5479 return gen_inner_trampoline(node);
5481 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5485 * Transform Proj(Builtin) node.
5487 static ir_node *gen_Proj_Builtin(ir_node *proj)
5489 ir_node *node = get_Proj_pred(proj);
5490 ir_node *new_node = be_transform_node(node);
5491 ir_builtin_kind kind = get_Builtin_kind(node);
5494 case ir_bk_return_address:
5495 case ir_bk_frame_address:
5500 case ir_bk_popcount:
5502 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5505 case ir_bk_debugbreak:
5506 case ir_bk_prefetch:
5508 assert(get_Proj_proj(proj) == pn_Builtin_M);
5511 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5512 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5514 assert(get_Proj_proj(proj) == pn_Builtin_M);
5515 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5517 case ir_bk_inner_trampoline:
5518 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5519 return get_Tuple_pred(new_node, 1);
5521 assert(get_Proj_proj(proj) == pn_Builtin_M);
5522 return get_Tuple_pred(new_node, 0);
5525 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5528 static ir_node *gen_be_IncSP(ir_node *node)
5530 ir_node *res = be_duplicate_node(node);
5531 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5537 * Transform the Projs from a be_Call.
5539 static ir_node *gen_Proj_be_Call(ir_node *node)
5541 ir_node *call = get_Proj_pred(node);
5542 ir_node *new_call = be_transform_node(call);
5543 dbg_info *dbgi = get_irn_dbg_info(node);
5544 long proj = get_Proj_proj(node);
5545 ir_mode *mode = get_irn_mode(node);
5548 if (proj == pn_be_Call_M) {
5549 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5551 /* transform call modes */
5552 if (mode_is_data(mode)) {
5553 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5557 /* Map from be_Call to ia32_Call proj number */
5558 if (proj == pn_be_Call_sp) {
5559 proj = pn_ia32_Call_stack;
5560 } else if (proj == pn_be_Call_M) {
5561 proj = pn_ia32_Call_M;
5562 } else if (proj == pn_be_Call_X_except) {
5563 proj = pn_ia32_Call_X_except;
5564 } else if (proj == pn_be_Call_X_regular) {
5565 proj = pn_ia32_Call_X_regular;
5567 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5568 int const n_outs = arch_get_irn_n_outs(new_call);
5571 assert(proj >= pn_be_Call_first_res);
5572 assert(req->type & arch_register_req_type_limited);
5574 for (i = 0; i < n_outs; ++i) {
5575 arch_register_req_t const *const new_req
5576 = arch_get_irn_register_req_out(new_call, i);
5578 if (!(new_req->type & arch_register_req_type_limited) ||
5579 new_req->cls != req->cls ||
5580 *new_req->limited != *req->limited)
5589 res = new_rd_Proj(dbgi, new_call, mode, proj);
5591 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5593 case pn_ia32_Call_stack:
5594 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5597 case pn_ia32_Call_fpcw:
5598 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5606 * Transform the Projs from a Cmp.
5608 static ir_node *gen_Proj_Cmp(ir_node *node)
5610 /* this probably means not all mode_b nodes were lowered... */
5611 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5615 static ir_node *gen_Proj_ASM(ir_node *node)
5617 ir_mode *mode = get_irn_mode(node);
5618 ir_node *pred = get_Proj_pred(node);
5619 ir_node *new_pred = be_transform_node(pred);
5620 long pos = get_Proj_proj(node);
5622 if (mode == mode_M) {
5623 pos = arch_get_irn_n_outs(new_pred)-1;
5624 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5626 } else if (mode_is_float(mode)) {
5629 panic("unexpected proj mode at ASM");
5632 return new_r_Proj(new_pred, mode, pos);
5636 * Transform and potentially renumber Proj nodes.
5638 static ir_node *gen_Proj(ir_node *node)
5640 ir_node *pred = get_Proj_pred(node);
5643 switch (get_irn_opcode(pred)) {
5645 return gen_Proj_Load(node);
5647 return gen_Proj_Store(node);
5649 return gen_Proj_ASM(node);
5651 return gen_Proj_Builtin(node);
5653 return gen_Proj_Div(node);
5655 return gen_Proj_Mod(node);
5657 return gen_Proj_CopyB(node);
5659 return gen_Proj_be_SubSP(node);
5661 return gen_Proj_be_AddSP(node);
5663 return gen_Proj_be_Call(node);
5665 return gen_Proj_Cmp(node);
5667 proj = get_Proj_proj(node);
5669 case pn_Start_X_initial_exec: {
5670 ir_node *block = get_nodes_block(pred);
5671 ir_node *new_block = be_transform_node(block);
5672 dbg_info *dbgi = get_irn_dbg_info(node);
5673 /* we exchange the ProjX with a jump */
5674 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5682 if (is_ia32_l_FloattoLL(pred)) {
5683 return gen_Proj_l_FloattoLL(node);
5685 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5689 ir_mode *mode = get_irn_mode(node);
5690 if (ia32_mode_needs_gp_reg(mode)) {
5691 ir_node *new_pred = be_transform_node(pred);
5692 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5693 get_Proj_proj(node));
5694 new_proj->node_nr = node->node_nr;
5699 return be_duplicate_node(node);
5703 * Enters all transform functions into the generic pointer
5705 static void register_transformers(void)
5707 /* first clear the generic function pointer for all ops */
5708 be_start_transform_setup();
5710 be_set_transform_function(op_Add, gen_Add);
5711 be_set_transform_function(op_And, gen_And);
5712 be_set_transform_function(op_ASM, ia32_gen_ASM);
5713 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5714 be_set_transform_function(op_be_Call, gen_be_Call);
5715 be_set_transform_function(op_be_Copy, gen_be_Copy);
5716 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5717 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5718 be_set_transform_function(op_be_Return, gen_be_Return);
5719 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5720 be_set_transform_function(op_Builtin, gen_Builtin);
5721 be_set_transform_function(op_Cmp, gen_Cmp);
5722 be_set_transform_function(op_Cond, gen_Cond);
5723 be_set_transform_function(op_Const, gen_Const);
5724 be_set_transform_function(op_Conv, gen_Conv);
5725 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5726 be_set_transform_function(op_Div, gen_Div);
5727 be_set_transform_function(op_Eor, gen_Eor);
5728 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5729 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5730 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5731 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5732 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5733 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5734 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5735 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5736 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5737 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5738 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5739 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5740 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5741 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5742 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5743 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5744 be_set_transform_function(op_IJmp, gen_IJmp);
5745 be_set_transform_function(op_Jmp, gen_Jmp);
5746 be_set_transform_function(op_Load, gen_Load);
5747 be_set_transform_function(op_Minus, gen_Minus);
5748 be_set_transform_function(op_Mod, gen_Mod);
5749 be_set_transform_function(op_Mul, gen_Mul);
5750 be_set_transform_function(op_Mulh, gen_Mulh);
5751 be_set_transform_function(op_Mux, gen_Mux);
5752 be_set_transform_function(op_Not, gen_Not);
5753 be_set_transform_function(op_Or, gen_Or);
5754 be_set_transform_function(op_Phi, gen_Phi);
5755 be_set_transform_function(op_Proj, gen_Proj);
5756 be_set_transform_function(op_Rotl, gen_Rotl);
5757 be_set_transform_function(op_Shl, gen_Shl);
5758 be_set_transform_function(op_Shr, gen_Shr);
5759 be_set_transform_function(op_Shrs, gen_Shrs);
5760 be_set_transform_function(op_Store, gen_Store);
5761 be_set_transform_function(op_Sub, gen_Sub);
5762 be_set_transform_function(op_Switch, gen_Switch);
5763 be_set_transform_function(op_SymConst, gen_SymConst);
5764 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5768 * Pre-transform all unknown and noreg nodes.
5770 static void ia32_pretransform_node(void)
5772 ir_graph *irg = current_ir_graph;
5773 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5775 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5776 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5777 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5778 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5779 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5781 nomem = get_irg_no_mem(irg);
5782 noreg_GP = ia32_new_NoReg_gp(irg);
5786 * Post-process all calls if we are in SSE mode.
5787 * The ABI requires that the results are in st0, copy them
5788 * to a xmm register.
5790 static void postprocess_fp_call_results(void)
5794 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5795 ir_node *call = call_list[i];
5796 ir_type *mtp = call_types[i];
5799 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5800 ir_type *res_tp = get_method_res_type(mtp, j);
5801 ir_node *res, *new_res;
5802 const ir_edge_t *edge, *next;
5805 if (! is_atomic_type(res_tp)) {
5806 /* no floating point return */
5809 res_mode = get_type_mode(res_tp);
5810 if (! mode_is_float(res_mode)) {
5811 /* no floating point return */
5815 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5818 /* now patch the users */
5819 foreach_out_edge_safe(res, edge, next) {
5820 ir_node *succ = get_edge_src_irn(edge);
5823 if (be_is_Keep(succ))
5826 if (is_ia32_xStore(succ)) {
5827 /* an xStore can be patched into an vfst */
5828 dbg_info *db = get_irn_dbg_info(succ);
5829 ir_node *block = get_nodes_block(succ);
5830 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5831 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5832 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5833 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5834 ir_mode *mode = get_ia32_ls_mode(succ);
5836 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5837 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5838 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5839 if (is_ia32_use_frame(succ))
5840 set_ia32_use_frame(st);
5841 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5842 set_irn_pinned(st, get_irn_pinned(succ));
5843 set_ia32_op_type(st, ia32_AddrModeD);
5845 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5846 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5847 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5854 if (new_res == NULL) {
5855 dbg_info *db = get_irn_dbg_info(call);
5856 ir_node *block = get_nodes_block(call);
5857 ir_node *frame = get_irg_frame(current_ir_graph);
5858 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5859 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5860 ir_node *vfst, *xld, *new_mem;
5863 /* store st(0) on stack */
5864 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5866 set_ia32_op_type(vfst, ia32_AddrModeD);
5867 set_ia32_use_frame(vfst);
5869 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5871 /* load into SSE register */
5872 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5874 set_ia32_op_type(xld, ia32_AddrModeS);
5875 set_ia32_use_frame(xld);
5877 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5878 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5880 if (old_mem != NULL) {
5881 edges_reroute(old_mem, new_mem);
5885 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5891 /* do the transformation */
5892 void ia32_transform_graph(ir_graph *irg)
5896 register_transformers();
5897 initial_fpcw = NULL;
5898 ia32_no_pic_adjust = 0;
5900 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5902 be_timer_push(T_HEIGHTS);
5903 ia32_heights = heights_new(irg);
5904 be_timer_pop(T_HEIGHTS);
5905 ia32_calculate_non_address_mode_nodes(irg);
5907 /* the transform phase is not safe for CSE (yet) because several nodes get
5908 * attributes set after their creation */
5909 cse_last = get_opt_cse();
5912 call_list = NEW_ARR_F(ir_node *, 0);
5913 call_types = NEW_ARR_F(ir_type *, 0);
5914 be_transform_graph(irg, ia32_pretransform_node);
5916 if (ia32_cg_config.use_sse2)
5917 postprocess_fp_call_results();
5918 DEL_ARR_F(call_types);
5919 DEL_ARR_F(call_list);
5921 set_opt_cse(cse_last);
5923 ia32_free_non_address_mode_nodes();
5924 heights_free(ia32_heights);
5925 ia32_heights = NULL;
5928 void ia32_init_transform(void)
5930 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");