2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
55 #include "betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_address_mode.h"
66 #include "ia32_architecture.h"
68 #include "gen_ia32_regalloc_if.h"
70 /* define this to construct SSE constants instead of load them */
71 #undef CONSTRUCT_SSE_CONST
73 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
74 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
76 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
78 static ir_node *old_initial_fpcw = NULL;
79 static ir_node *initial_fpcw = NULL;
80 int ia32_no_pic_adjust;
82 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
83 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
86 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
87 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
90 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
91 ir_node *op1, ir_node *op2);
93 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
94 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
96 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem);
99 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
100 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
103 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
105 static ir_node *create_immediate_or_transform(ir_node *node,
106 char immediate_constraint_type);
108 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
109 dbg_info *dbgi, ir_node *block,
110 ir_node *op, ir_node *orig_node);
112 /* its enough to have those once */
113 static ir_node *nomem, *noreg_GP;
115 /** a list to postprocess all calls */
116 static ir_node **call_list;
117 static ir_type **call_types;
119 /** Return non-zero is a node represents the 0 constant. */
120 static bool is_Const_0(ir_node *node)
122 return is_Const(node) && is_Const_null(node);
125 /** Return non-zero is a node represents the 1 constant. */
126 static bool is_Const_1(ir_node *node)
128 return is_Const(node) && is_Const_one(node);
131 /** Return non-zero is a node represents the -1 constant. */
132 static bool is_Const_Minus_1(ir_node *node)
134 return is_Const(node) && is_Const_all_one(node);
138 * returns true if constant can be created with a simple float command
140 static bool is_simple_x87_Const(ir_node *node)
142 ir_tarval *tv = get_Const_tarval(node);
143 if (tarval_is_null(tv) || tarval_is_one(tv))
146 /* TODO: match all the other float constants */
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_sse_Const(ir_node *node)
155 ir_tarval *tv = get_Const_tarval(node);
156 ir_mode *mode = get_tarval_mode(tv);
161 if (tarval_is_null(tv)
162 #ifdef CONSTRUCT_SSE_CONST
167 #ifdef CONSTRUCT_SSE_CONST
168 if (mode == mode_D) {
169 unsigned val = get_tarval_sub_bits(tv, 0) |
170 (get_tarval_sub_bits(tv, 1) << 8) |
171 (get_tarval_sub_bits(tv, 2) << 16) |
172 (get_tarval_sub_bits(tv, 3) << 24);
174 /* lower 32bit are zero, really a 32bit constant */
177 #endif /* CONSTRUCT_SSE_CONST */
178 /* TODO: match all the other float constants */
183 * return NoREG or pic_base in case of PIC.
184 * This is necessary as base address for newly created symbols
186 static ir_node *get_symconst_base(void)
188 ir_graph *irg = current_ir_graph;
190 if (be_get_irg_options(irg)->pic) {
191 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
192 return arch_env->impl->get_pic_base(irg);
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
207 ir_tarval *tv = get_Const_tarval(node);
209 assert(is_Const(node));
211 if (mode_is_float(mode)) {
212 ir_graph *irg = get_irn_irg(node);
213 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
214 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
219 if (ia32_cg_config.use_sse2) {
220 if (tarval_is_null(tv)) {
221 load = new_bd_ia32_xZero(dbgi, block);
222 set_ia32_ls_mode(load, mode);
224 #ifdef CONSTRUCT_SSE_CONST
225 } else if (tarval_is_one(tv)) {
226 int cnst = mode == mode_F ? 26 : 55;
227 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
228 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
229 ir_node *pslld, *psrld;
231 load = new_bd_ia32_xAllOnes(dbgi, block);
232 set_ia32_ls_mode(load, mode);
233 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
234 set_ia32_ls_mode(pslld, mode);
235 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
236 set_ia32_ls_mode(psrld, mode);
238 #endif /* CONSTRUCT_SSE_CONST */
239 } else if (mode == mode_F) {
240 /* we can place any 32bit constant by using a movd gp, sse */
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
245 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
246 load = new_bd_ia32_xMovd(dbgi, block, cnst);
247 set_ia32_ls_mode(load, mode);
251 #ifdef CONSTRUCT_SSE_CONST
252 if (mode == mode_D) {
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
259 ir_node *cnst, *psllq;
261 /* fine, lower 32bit are zero, produce 32bit value */
262 val = get_tarval_sub_bits(tv, 4) |
263 (get_tarval_sub_bits(tv, 5) << 8) |
264 (get_tarval_sub_bits(tv, 6) << 16) |
265 (get_tarval_sub_bits(tv, 7) << 24);
266 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
267 load = new_bd_ia32_xMovd(dbgi, block, cnst);
268 set_ia32_ls_mode(load, mode);
269 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
270 set_ia32_ls_mode(psllq, mode);
275 #endif /* CONSTRUCT_SSE_CONST */
276 floatent = ia32_create_float_const_entity(isa, tv, NULL);
278 base = get_symconst_base();
279 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
281 set_ia32_op_type(load, ia32_AddrModeS);
282 set_ia32_am_sc(load, floatent);
283 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
284 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
287 if (tarval_is_null(tv)) {
288 load = new_bd_ia32_vfldz(dbgi, block);
290 set_ia32_ls_mode(load, mode);
291 } else if (tarval_is_one(tv)) {
292 load = new_bd_ia32_vfld1(dbgi, block);
294 set_ia32_ls_mode(load, mode);
299 floatent = ia32_create_float_const_entity(isa, tv, NULL);
300 /* create_float_const_ent is smart and sometimes creates
302 ls_mode = get_type_mode(get_entity_type(floatent));
303 base = get_symconst_base();
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 } else { /* non-float mode */
321 tv = tarval_convert_to(tv, mode_Iu);
323 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
325 panic("couldn't convert constant tarval (%+F)", node);
327 val = get_tarval_long(tv);
329 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
330 SET_IA32_ORIG_NODE(cnst, node);
337 * Transforms a SymConst.
339 static ir_node *gen_SymConst(ir_node *node)
341 ir_node *old_block = get_nodes_block(node);
342 ir_node *block = be_transform_node(old_block);
343 dbg_info *dbgi = get_irn_dbg_info(node);
344 ir_mode *mode = get_irn_mode(node);
347 if (mode_is_float(mode)) {
348 if (ia32_cg_config.use_sse2)
349 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
351 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
352 set_ia32_am_sc(cnst, get_SymConst_entity(node));
353 set_ia32_use_frame(cnst);
357 if (get_SymConst_kind(node) != symconst_addr_ent) {
358 panic("backend only support symconst_addr_ent (at %+F)", node);
360 entity = get_SymConst_entity(node);
361 if (get_entity_owner(entity) == get_tls_type()) {
362 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
363 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
364 set_ia32_am_sc(lea, entity);
367 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
371 SET_IA32_ORIG_NODE(cnst, node);
376 static ir_type *make_array_type(ir_type *tp)
378 unsigned alignment = get_type_alignment_bytes(tp);
379 unsigned size = get_type_size_bytes(tp);
380 ir_type *res = new_type_array(1, tp);
381 set_type_alignment_bytes(res, alignment);
382 set_array_bounds_int(res, 0, 0, 2);
383 if (alignment > size)
385 set_type_size_bytes(res, 2 * size);
386 set_type_state(res, layout_fixed);
391 * Create a float[2] array type for the given atomic type.
393 * @param tp the atomic type
395 static ir_type *ia32_create_float_array(ir_type *tp)
397 ir_mode *mode = get_type_mode(tp);
400 if (mode == mode_F) {
401 static ir_type *float_F;
405 arr = float_F = make_array_type(tp);
406 } else if (mode == mode_D) {
407 static ir_type *float_D;
411 arr = float_D = make_array_type(tp);
413 static ir_type *float_E;
417 arr = float_E = make_array_type(tp);
422 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
423 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
425 static const struct {
427 const char *cnst_str;
429 } names [ia32_known_const_max] = {
430 { "C_sfp_sign", "0x80000000", 0 },
431 { "C_dfp_sign", "0x8000000000000000", 1 },
432 { "C_sfp_abs", "0x7FFFFFFF", 0 },
433 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
434 { "C_ull_bias", "0x10000000000000000", 2 }
436 static ir_entity *ent_cache[ia32_known_const_max];
438 ir_entity *ent = ent_cache[kct];
441 ir_graph *irg = current_ir_graph;
442 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
443 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
444 const char *cnst_str = names[kct].cnst_str;
445 ident *name = new_id_from_str(names[kct].name);
448 switch (names[kct].mode) {
449 case 0: mode = mode_Iu; break;
450 case 1: mode = mode_Lu; break;
451 case 2: mode = mode_F; break;
452 default: panic("internal compiler error (ia32_gen_fp_known_const)");
454 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
456 if (kct == ia32_ULLBIAS) {
457 ir_type *type = ia32_get_prim_type(mode_F);
458 ir_type *atype = ia32_create_float_array(type);
459 ir_initializer_t *initializer;
461 ent = new_entity(get_glob_type(), name, atype);
463 set_entity_ld_ident(ent, name);
464 set_entity_visibility(ent, ir_visibility_private);
465 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
467 initializer = create_initializer_compound(2);
468 set_initializer_compound_value(initializer, 0,
469 create_initializer_tarval(get_mode_null(mode)));
470 set_initializer_compound_value(initializer, 1,
471 create_initializer_tarval(tv));
472 set_entity_initializer(ent, initializer);
474 ent = ia32_create_float_const_entity(isa, tv, name);
476 /* cache the entry */
477 ent_cache[kct] = ent;
480 return ent_cache[kct];
484 * return true if the node is a Proj(Load) and could be used in source address
485 * mode for another node. Will return only true if the @p other node is not
486 * dependent on the memory of the Load (for binary operations use the other
487 * input here, for unary operations use NULL).
489 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
490 ir_node *other, ir_node *other2,
497 /* float constants are always available */
498 if (is_Const(node)) {
499 mode = get_irn_mode(node);
500 if (mode_is_float(mode)) {
501 ir_tarval *tv = get_Const_tarval(node);
502 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
504 if (ia32_cg_config.use_sse2) {
505 if (is_simple_sse_Const(node))
508 if (is_simple_x87_Const(node))
511 if (get_irn_n_edges(node) > 1)
520 load = get_Proj_pred(node);
521 pn = get_Proj_proj(node);
522 if (!is_Load(load) || pn != pn_Load_res)
524 if (get_nodes_block(load) != block)
526 mode = get_irn_mode(node);
527 /* we can't fold mode_E AM */
528 if (mode == ia32_mode_E)
530 /* we only use address mode if we're the only user of the load */
531 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
533 /* in some edge cases with address mode we might reach the load normally
534 * and through some AM sequence, if it is already materialized then we
535 * can't create an AM node from it */
536 if (be_is_transformed(node))
539 /* don't do AM if other node inputs depend on the load (via mem-proj) */
540 if (other != NULL && ia32_prevents_AM(block, load, other))
543 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
549 typedef struct ia32_address_mode_t ia32_address_mode_t;
550 struct ia32_address_mode_t {
555 ia32_op_type_t op_type;
559 unsigned commutative : 1;
560 unsigned ins_permuted : 1;
563 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
565 /* construct load address */
566 memset(addr, 0, sizeof(addr[0]));
567 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
569 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
570 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
571 addr->mem = be_transform_node(mem);
574 static void build_address(ia32_address_mode_t *am, ir_node *node,
575 ia32_create_am_flags_t flags)
577 ia32_address_t *addr = &am->addr;
583 /* floating point immediates */
584 if (is_Const(node)) {
585 ir_graph *irg = get_irn_irg(node);
586 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
587 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
588 ir_tarval *tv = get_Const_tarval(node);
589 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
590 addr->base = get_symconst_base();
591 addr->index = noreg_GP;
593 addr->symconst_ent = entity;
594 addr->tls_segment = false;
596 am->ls_mode = get_type_mode(get_entity_type(entity));
597 am->pinned = op_pin_state_floats;
601 load = get_Proj_pred(node);
602 ptr = get_Load_ptr(load);
603 mem = get_Load_mem(load);
604 new_mem = be_transform_node(mem);
605 am->pinned = get_irn_pinned(load);
606 am->ls_mode = get_Load_mode(load);
607 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
610 /* construct load address */
611 ia32_create_address_mode(addr, ptr, flags);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
618 static void set_address(ir_node *node, const ia32_address_t *addr)
620 set_ia32_am_scale(node, addr->scale);
621 set_ia32_am_sc(node, addr->symconst_ent);
622 set_ia32_am_offs_int(node, addr->offset);
623 set_ia32_am_tls_segment(node, addr->tls_segment);
624 if (addr->symconst_sign)
625 set_ia32_am_sc_sign(node);
627 set_ia32_use_frame(node);
628 set_ia32_frame_ent(node, addr->frame_entity);
632 * Apply attributes of a given address mode to a node.
634 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
636 set_address(node, &am->addr);
638 set_ia32_op_type(node, am->op_type);
639 set_ia32_ls_mode(node, am->ls_mode);
640 if (am->pinned == op_pin_state_pinned) {
641 /* beware: some nodes are already pinned and did not allow to change the state */
642 if (get_irn_pinned(node) != op_pin_state_pinned)
643 set_irn_pinned(node, op_pin_state_pinned);
646 set_ia32_commutative(node);
650 * Check, if a given node is a Down-Conv, ie. a integer Conv
651 * from a mode with a mode with more bits to a mode with lesser bits.
652 * Moreover, we return only true if the node has not more than 1 user.
654 * @param node the node
655 * @return non-zero if node is a Down-Conv
657 static int is_downconv(const ir_node *node)
665 /* we only want to skip the conv when we're the only user
666 * (because this test is used in the context of address-mode selection
667 * and we don't want to use address mode for multiple users) */
668 if (get_irn_n_edges(node) > 1)
671 src_mode = get_irn_mode(get_Conv_op(node));
672 dest_mode = get_irn_mode(node);
674 ia32_mode_needs_gp_reg(src_mode) &&
675 ia32_mode_needs_gp_reg(dest_mode) &&
676 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
679 /** Skip all Down-Conv's on a given node and return the resulting node. */
680 ir_node *ia32_skip_downconv(ir_node *node)
682 while (is_downconv(node))
683 node = get_Conv_op(node);
688 static bool is_sameconv(ir_node *node)
696 /* we only want to skip the conv when we're the only user
697 * (because this test is used in the context of address-mode selection
698 * and we don't want to use address mode for multiple users) */
699 if (get_irn_n_edges(node) > 1)
702 src_mode = get_irn_mode(get_Conv_op(node));
703 dest_mode = get_irn_mode(node);
705 ia32_mode_needs_gp_reg(src_mode) &&
706 ia32_mode_needs_gp_reg(dest_mode) &&
707 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
710 /** Skip all signedness convs */
711 static ir_node *ia32_skip_sameconv(ir_node *node)
713 while (is_sameconv(node))
714 node = get_Conv_op(node);
719 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
721 ir_mode *mode = get_irn_mode(node);
726 if (mode_is_signed(mode)) {
731 block = get_nodes_block(node);
732 dbgi = get_irn_dbg_info(node);
734 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
738 * matches operands of a node into ia32 addressing/operand modes. This covers
739 * usage of source address mode, immediates, operations with non 32-bit modes,
741 * The resulting data is filled into the @p am struct. block is the block
742 * of the node whose arguments are matched. op1, op2 are the first and second
743 * input that are matched (op1 may be NULL). other_op is another unrelated
744 * input that is not matched! but which is needed sometimes to check if AM
745 * for op1/op2 is legal.
746 * @p flags describes the supported modes of the operation in detail.
748 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
749 ir_node *op1, ir_node *op2, ir_node *other_op,
752 ia32_address_t *addr = &am->addr;
753 ir_mode *mode = get_irn_mode(op2);
754 int mode_bits = get_mode_size_bits(mode);
755 ir_node *new_op1, *new_op2;
757 unsigned commutative;
758 int use_am_and_immediates;
761 memset(am, 0, sizeof(am[0]));
763 commutative = (flags & match_commutative) != 0;
764 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
765 use_am = (flags & match_am) != 0;
766 use_immediate = (flags & match_immediate) != 0;
767 assert(!use_am_and_immediates || use_immediate);
770 assert(!commutative || op1 != NULL);
771 assert(use_am || !(flags & match_8bit_am));
772 assert(use_am || !(flags & match_16bit_am));
774 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
775 (mode_bits == 16 && !(flags & match_16bit_am))) {
779 /* we can simply skip downconvs for mode neutral nodes: the upper bits
780 * can be random for these operations */
781 if (flags & match_mode_neutral) {
782 op2 = ia32_skip_downconv(op2);
784 op1 = ia32_skip_downconv(op1);
787 op2 = ia32_skip_sameconv(op2);
789 op1 = ia32_skip_sameconv(op1);
793 /* match immediates. firm nodes are normalized: constants are always on the
796 if (!(flags & match_try_am) && use_immediate) {
797 new_op2 = ia32_try_create_Immediate(op2, 0);
800 if (new_op2 == NULL &&
801 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
802 build_address(am, op2, ia32_create_am_normal);
803 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
804 if (mode_is_float(mode)) {
805 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
809 am->op_type = ia32_AddrModeS;
810 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
812 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
814 build_address(am, op1, ia32_create_am_normal);
816 if (mode_is_float(mode)) {
817 noreg = ia32_new_NoReg_vfp(current_ir_graph);
822 if (new_op2 != NULL) {
825 new_op1 = be_transform_node(op2);
827 am->ins_permuted = true;
829 am->op_type = ia32_AddrModeS;
831 am->op_type = ia32_Normal;
833 if (flags & match_try_am) {
839 mode = get_irn_mode(op2);
840 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
841 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
843 new_op2 = create_upconv(op2, NULL);
844 am->ls_mode = mode_Iu;
846 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
848 new_op2 = be_transform_node(op2);
849 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
852 if (addr->base == NULL)
853 addr->base = noreg_GP;
854 if (addr->index == NULL)
855 addr->index = noreg_GP;
856 if (addr->mem == NULL)
859 am->new_op1 = new_op1;
860 am->new_op2 = new_op2;
861 am->commutative = commutative;
865 * "Fixes" a node that uses address mode by turning it into mode_T
866 * and returning a pn_ia32_res Proj.
868 * @param node the node
869 * @param am its address mode
871 * @return a Proj(pn_ia32_res) if a memory address mode is used,
874 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
879 if (am->mem_proj == NULL)
882 /* we have to create a mode_T so the old MemProj can attach to us */
883 mode = get_irn_mode(node);
884 load = get_Proj_pred(am->mem_proj);
886 be_set_transformed_node(load, node);
888 if (mode != mode_T) {
889 set_irn_mode(node, mode_T);
890 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
897 * Construct a standard binary operation, set AM and immediate if required.
899 * @param node The original node for which the binop is created
900 * @param op1 The first operand
901 * @param op2 The second operand
902 * @param func The node constructor function
903 * @return The constructed ia32 node.
905 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
906 construct_binop_func *func, match_flags_t flags)
909 ir_node *block, *new_block, *new_node;
910 ia32_address_mode_t am;
911 ia32_address_t *addr = &am.addr;
913 block = get_nodes_block(node);
914 match_arguments(&am, block, op1, op2, NULL, flags);
916 dbgi = get_irn_dbg_info(node);
917 new_block = be_transform_node(block);
918 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
919 am.new_op1, am.new_op2);
920 set_am_attributes(new_node, &am);
921 /* we can't use source address mode anymore when using immediates */
922 if (!(flags & match_am_and_immediates) &&
923 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
924 set_ia32_am_support(new_node, ia32_am_none);
925 SET_IA32_ORIG_NODE(new_node, node);
927 new_node = fix_mem_proj(new_node, &am);
933 * Generic names for the inputs of an ia32 binary op.
936 n_ia32_l_binop_left, /**< ia32 left input */
937 n_ia32_l_binop_right, /**< ia32 right input */
938 n_ia32_l_binop_eflags /**< ia32 eflags input */
940 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
941 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
942 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
943 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
944 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
945 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
948 * Construct a binary operation which also consumes the eflags.
950 * @param node The node to transform
951 * @param func The node constructor function
952 * @param flags The match flags
953 * @return The constructor ia32 node
955 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
958 ir_node *src_block = get_nodes_block(node);
959 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
960 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
961 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
963 ir_node *block, *new_node, *new_eflags;
964 ia32_address_mode_t am;
965 ia32_address_t *addr = &am.addr;
967 match_arguments(&am, src_block, op1, op2, eflags, flags);
969 dbgi = get_irn_dbg_info(node);
970 block = be_transform_node(src_block);
971 new_eflags = be_transform_node(eflags);
972 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
973 am.new_op1, am.new_op2, new_eflags);
974 set_am_attributes(new_node, &am);
975 /* we can't use source address mode anymore when using immediates */
976 if (!(flags & match_am_and_immediates) &&
977 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
978 set_ia32_am_support(new_node, ia32_am_none);
979 SET_IA32_ORIG_NODE(new_node, node);
981 new_node = fix_mem_proj(new_node, &am);
986 static ir_node *get_fpcw(void)
988 if (initial_fpcw != NULL)
991 initial_fpcw = be_transform_node(old_initial_fpcw);
995 static ir_node *skip_float_upconv(ir_node *node)
997 ir_mode *mode = get_irn_mode(node);
998 assert(mode_is_float(mode));
1000 while (is_Conv(node)) {
1001 ir_node *pred = get_Conv_op(node);
1002 ir_mode *pred_mode = get_irn_mode(pred);
1005 * suboptimal, but without this check the address mode matcher
1006 * can incorrectly think that something has only 1 user
1008 if (get_irn_n_edges(node) > 1)
1011 if (!mode_is_float(pred_mode)
1012 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1021 * Construct a standard binary operation, set AM and immediate if required.
1023 * @param op1 The first operand
1024 * @param op2 The second operand
1025 * @param func The node constructor function
1026 * @return The constructed ia32 node.
1028 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1029 construct_binop_float_func *func)
1035 ia32_address_mode_t am;
1036 ia32_address_t *addr = &am.addr;
1037 ia32_x87_attr_t *attr;
1038 /* All operations are considered commutative, because there are reverse
1040 match_flags_t flags = match_commutative | match_am;
1042 op1 = skip_float_upconv(op1);
1043 op2 = skip_float_upconv(op2);
1045 block = get_nodes_block(node);
1046 match_arguments(&am, block, op1, op2, NULL, flags);
1048 dbgi = get_irn_dbg_info(node);
1049 new_block = be_transform_node(block);
1050 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1051 am.new_op1, am.new_op2, get_fpcw());
1052 set_am_attributes(new_node, &am);
1054 attr = get_ia32_x87_attr(new_node);
1055 attr->attr.data.ins_permuted = am.ins_permuted;
1057 SET_IA32_ORIG_NODE(new_node, node);
1059 new_node = fix_mem_proj(new_node, &am);
1065 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1067 * @param op1 The first operand
1068 * @param op2 The second operand
1069 * @param func The node constructor function
1070 * @return The constructed ia32 node.
1072 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1073 construct_shift_func *func,
1074 match_flags_t flags)
1077 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1078 ir_mode *mode = get_irn_mode(node);
1080 assert(! mode_is_float(mode));
1081 assert(flags & match_immediate);
1082 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1084 if (get_mode_modulo_shift(mode) != 32)
1085 panic("modulo shift!=32 not supported by ia32 backend");
1087 if (flags & match_mode_neutral) {
1088 op1 = ia32_skip_downconv(op1);
1089 new_op1 = be_transform_node(op1);
1090 } else if (get_mode_size_bits(mode) != 32) {
1091 new_op1 = create_upconv(op1, node);
1093 new_op1 = be_transform_node(op1);
1096 /* the shift amount can be any mode that is bigger than 5 bits, since all
1097 * other bits are ignored anyway */
1098 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1099 ir_node *const op = get_Conv_op(op2);
1100 if (mode_is_float(get_irn_mode(op)))
1103 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1105 new_op2 = create_immediate_or_transform(op2, 0);
1107 dbgi = get_irn_dbg_info(node);
1108 block = get_nodes_block(node);
1109 new_block = be_transform_node(block);
1110 new_node = func(dbgi, new_block, new_op1, new_op2);
1111 SET_IA32_ORIG_NODE(new_node, node);
1113 /* lowered shift instruction may have a dependency operand, handle it here */
1114 if (get_irn_arity(node) == 3) {
1115 /* we have a dependency */
1116 ir_node* dep = get_irn_n(node, 2);
1117 if (get_irn_n_edges(dep) > 1) {
1118 /* ... which has at least one user other than 'node' */
1119 ir_node *new_dep = be_transform_node(dep);
1120 add_irn_dep(new_node, new_dep);
1129 * Construct a standard unary operation, set AM and immediate if required.
1131 * @param op The operand
1132 * @param func The node constructor function
1133 * @return The constructed ia32 node.
1135 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1136 match_flags_t flags)
1139 ir_node *block, *new_block, *new_op, *new_node;
1141 assert(flags == 0 || flags == match_mode_neutral);
1142 if (flags & match_mode_neutral) {
1143 op = ia32_skip_downconv(op);
1146 new_op = be_transform_node(op);
1147 dbgi = get_irn_dbg_info(node);
1148 block = get_nodes_block(node);
1149 new_block = be_transform_node(block);
1150 new_node = func(dbgi, new_block, new_op);
1152 SET_IA32_ORIG_NODE(new_node, node);
1157 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1158 ia32_address_t *addr)
1168 base = be_transform_node(base);
1175 idx = be_transform_node(idx);
1178 /* segment overrides are ineffective for Leas :-( so we have to patch
1180 if (addr->tls_segment) {
1181 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1182 assert(addr->symconst_ent != NULL);
1183 if (base == noreg_GP)
1186 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1187 addr->tls_segment = false;
1190 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1191 set_address(res, addr);
1197 * Returns non-zero if a given address mode has a symbolic or
1198 * numerical offset != 0.
1200 static int am_has_immediates(const ia32_address_t *addr)
1202 return addr->offset != 0 || addr->symconst_ent != NULL
1203 || addr->frame_entity || addr->use_frame;
1206 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1207 ir_node *high, ir_node *low,
1211 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1212 * op1 - target to be shifted
1213 * op2 - contains bits to be shifted into target
1215 * Only op3 can be an immediate.
1217 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1218 ir_node *high, ir_node *low, ir_node *count,
1219 new_shiftd_func func)
1221 ir_node *new_block = be_transform_node(block);
1222 ir_node *new_high = be_transform_node(high);
1223 ir_node *new_low = be_transform_node(low);
1227 /* the shift amount can be any mode that is bigger than 5 bits, since all
1228 * other bits are ignored anyway */
1229 while (is_Conv(count) &&
1230 get_irn_n_edges(count) == 1 &&
1231 mode_is_int(get_irn_mode(count))) {
1232 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1233 count = get_Conv_op(count);
1235 new_count = create_immediate_or_transform(count, 0);
1237 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1242 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1245 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1247 if (is_Const(value1) && is_Const(value2)) {
1248 ir_tarval *tv1 = get_Const_tarval(value1);
1249 ir_tarval *tv2 = get_Const_tarval(value2);
1250 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1251 long v1 = get_tarval_long(tv1);
1252 long v2 = get_tarval_long(tv2);
1253 return v1 <= v2 && v2 == 32-v1;
1259 static ir_node *match_64bit_shift(ir_node *node)
1261 ir_node *op1 = get_binop_left(node);
1262 ir_node *op2 = get_binop_right(node);
1263 assert(is_Or(node) || is_Add(node));
1271 /* match ShlD operation */
1272 if (is_Shl(op1) && is_Shr(op2)) {
1273 ir_node *shl_right = get_Shl_right(op1);
1274 ir_node *shl_left = get_Shl_left(op1);
1275 ir_node *shr_right = get_Shr_right(op2);
1276 ir_node *shr_left = get_Shr_left(op2);
1277 /* constant ShlD operation */
1278 if (is_complementary_shifts(shl_right, shr_right)) {
1279 dbg_info *dbgi = get_irn_dbg_info(node);
1280 ir_node *block = get_nodes_block(node);
1281 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1284 /* constant ShrD operation */
1285 if (is_complementary_shifts(shr_right, shl_right)) {
1286 dbg_info *dbgi = get_irn_dbg_info(node);
1287 ir_node *block = get_nodes_block(node);
1288 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1291 /* lower_dw produces the following for ShlD:
1292 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1293 if (is_Shr(shr_left) && is_Not(shr_right)
1294 && is_Const_1(get_Shr_right(shr_left))
1295 && get_Not_op(shr_right) == shl_right) {
1296 dbg_info *dbgi = get_irn_dbg_info(node);
1297 ir_node *block = get_nodes_block(node);
1298 ir_node *val_h = get_Shr_left(shr_left);
1299 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1302 /* lower_dw produces the following for ShrD:
1303 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1304 if (is_Shl(shl_left) && is_Not(shl_right)
1305 && is_Const_1(get_Shl_right(shl_left))
1306 && get_Not_op(shl_right) == shr_right) {
1307 dbg_info *dbgi = get_irn_dbg_info(node);
1308 ir_node *block = get_nodes_block(node);
1309 ir_node *val_h = get_Shl_left(shl_left);
1310 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1319 * Creates an ia32 Add.
1321 * @return the created ia32 Add node
1323 static ir_node *gen_Add(ir_node *node)
1325 ir_mode *mode = get_irn_mode(node);
1326 ir_node *op1 = get_Add_left(node);
1327 ir_node *op2 = get_Add_right(node);
1329 ir_node *block, *new_block, *new_node, *add_immediate_op;
1330 ia32_address_t addr;
1331 ia32_address_mode_t am;
1333 new_node = match_64bit_shift(node);
1334 if (new_node != NULL)
1337 if (mode_is_float(mode)) {
1338 if (ia32_cg_config.use_sse2)
1339 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1340 match_commutative | match_am);
1342 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1345 ia32_mark_non_am(node);
1347 op2 = ia32_skip_downconv(op2);
1348 op1 = ia32_skip_downconv(op1);
1352 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1353 * 1. Add with immediate -> Lea
1354 * 2. Add with possible source address mode -> Add
1355 * 3. Otherwise -> Lea
1357 memset(&addr, 0, sizeof(addr));
1358 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1359 add_immediate_op = NULL;
1361 dbgi = get_irn_dbg_info(node);
1362 block = get_nodes_block(node);
1363 new_block = be_transform_node(block);
1366 if (addr.base == NULL && addr.index == NULL) {
1367 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1368 addr.symconst_sign, 0, addr.offset);
1369 SET_IA32_ORIG_NODE(new_node, node);
1372 /* add with immediate? */
1373 if (addr.index == NULL) {
1374 add_immediate_op = addr.base;
1375 } else if (addr.base == NULL && addr.scale == 0) {
1376 add_immediate_op = addr.index;
1379 if (add_immediate_op != NULL) {
1380 if (!am_has_immediates(&addr)) {
1381 #ifdef DEBUG_libfirm
1382 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1385 return be_transform_node(add_immediate_op);
1388 new_node = create_lea_from_address(dbgi, new_block, &addr);
1389 SET_IA32_ORIG_NODE(new_node, node);
1393 /* test if we can use source address mode */
1394 match_arguments(&am, block, op1, op2, NULL, match_commutative
1395 | match_mode_neutral | match_am | match_immediate | match_try_am);
1397 /* construct an Add with source address mode */
1398 if (am.op_type == ia32_AddrModeS) {
1399 ia32_address_t *am_addr = &am.addr;
1400 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1401 am_addr->index, am_addr->mem, am.new_op1,
1403 set_am_attributes(new_node, &am);
1404 SET_IA32_ORIG_NODE(new_node, node);
1406 new_node = fix_mem_proj(new_node, &am);
1411 /* otherwise construct a lea */
1412 new_node = create_lea_from_address(dbgi, new_block, &addr);
1413 SET_IA32_ORIG_NODE(new_node, node);
1418 * Creates an ia32 Mul.
1420 * @return the created ia32 Mul node
1422 static ir_node *gen_Mul(ir_node *node)
1424 ir_node *op1 = get_Mul_left(node);
1425 ir_node *op2 = get_Mul_right(node);
1426 ir_mode *mode = get_irn_mode(node);
1428 if (mode_is_float(mode)) {
1429 if (ia32_cg_config.use_sse2)
1430 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1431 match_commutative | match_am);
1433 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1435 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1436 match_commutative | match_am | match_mode_neutral |
1437 match_immediate | match_am_and_immediates);
1441 * Creates an ia32 Mulh.
1442 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1443 * this result while Mul returns the lower 32 bit.
1445 * @return the created ia32 Mulh node
1447 static ir_node *gen_Mulh(ir_node *node)
1449 dbg_info *dbgi = get_irn_dbg_info(node);
1450 ir_node *op1 = get_Mulh_left(node);
1451 ir_node *op2 = get_Mulh_right(node);
1452 ir_mode *mode = get_irn_mode(node);
1454 ir_node *proj_res_high;
1456 if (get_mode_size_bits(mode) != 32) {
1457 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1460 if (mode_is_signed(mode)) {
1461 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1462 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1464 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1465 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1467 return proj_res_high;
1471 * Creates an ia32 And.
1473 * @return The created ia32 And node
1475 static ir_node *gen_And(ir_node *node)
1477 ir_node *op1 = get_And_left(node);
1478 ir_node *op2 = get_And_right(node);
1479 assert(! mode_is_float(get_irn_mode(node)));
1481 /* is it a zero extension? */
1482 if (is_Const(op2)) {
1483 ir_tarval *tv = get_Const_tarval(op2);
1484 long v = get_tarval_long(tv);
1486 if (v == 0xFF || v == 0xFFFF) {
1487 dbg_info *dbgi = get_irn_dbg_info(node);
1488 ir_node *block = get_nodes_block(node);
1495 assert(v == 0xFFFF);
1498 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1503 return gen_binop(node, op1, op2, new_bd_ia32_And,
1504 match_commutative | match_mode_neutral | match_am | match_immediate);
1508 * Creates an ia32 Or.
1510 * @return The created ia32 Or node
1512 static ir_node *gen_Or(ir_node *node)
1514 ir_node *op1 = get_Or_left(node);
1515 ir_node *op2 = get_Or_right(node);
1518 res = match_64bit_shift(node);
1522 assert (! mode_is_float(get_irn_mode(node)));
1523 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1524 | match_mode_neutral | match_am | match_immediate);
1530 * Creates an ia32 Eor.
1532 * @return The created ia32 Eor node
1534 static ir_node *gen_Eor(ir_node *node)
1536 ir_node *op1 = get_Eor_left(node);
1537 ir_node *op2 = get_Eor_right(node);
1539 assert(! mode_is_float(get_irn_mode(node)));
1540 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1541 | match_mode_neutral | match_am | match_immediate);
1546 * Creates an ia32 Sub.
1548 * @return The created ia32 Sub node
1550 static ir_node *gen_Sub(ir_node *node)
1552 ir_node *op1 = get_Sub_left(node);
1553 ir_node *op2 = get_Sub_right(node);
1554 ir_mode *mode = get_irn_mode(node);
1556 if (mode_is_float(mode)) {
1557 if (ia32_cg_config.use_sse2)
1558 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1560 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1563 if (is_Const(op2)) {
1564 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1568 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1569 | match_am | match_immediate);
1572 static ir_node *transform_AM_mem(ir_node *const block,
1573 ir_node *const src_val,
1574 ir_node *const src_mem,
1575 ir_node *const am_mem)
1577 if (is_NoMem(am_mem)) {
1578 return be_transform_node(src_mem);
1579 } else if (is_Proj(src_val) &&
1581 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1582 /* avoid memory loop */
1584 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1585 ir_node *const ptr_pred = get_Proj_pred(src_val);
1586 int const arity = get_Sync_n_preds(src_mem);
1591 NEW_ARR_A(ir_node*, ins, arity + 1);
1593 /* NOTE: This sometimes produces dead-code because the old sync in
1594 * src_mem might not be used anymore, we should detect this case
1595 * and kill the sync... */
1596 for (i = arity - 1; i >= 0; --i) {
1597 ir_node *const pred = get_Sync_pred(src_mem, i);
1599 /* avoid memory loop */
1600 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1603 ins[n++] = be_transform_node(pred);
1606 if (n==1 && ins[0] == am_mem) {
1608 /* creating a new Sync and relying on CSE may fail,
1609 * if am_mem is a ProjM, which does not yet verify. */
1613 return new_r_Sync(block, n, ins);
1617 ins[0] = be_transform_node(src_mem);
1619 return new_r_Sync(block, 2, ins);
1624 * Create a 32bit to 64bit signed extension.
1626 * @param dbgi debug info
1627 * @param block the block where node nodes should be placed
1628 * @param val the value to extend
1629 * @param orig the original node
1631 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1632 ir_node *val, const ir_node *orig)
1637 if (ia32_cg_config.use_short_sex_eax) {
1638 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1639 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1641 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1642 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1644 SET_IA32_ORIG_NODE(res, orig);
1649 * Generates an ia32 Div with additional infrastructure for the
1650 * register allocator if needed.
1652 static ir_node *create_Div(ir_node *node)
1654 dbg_info *dbgi = get_irn_dbg_info(node);
1655 ir_node *block = get_nodes_block(node);
1656 ir_node *new_block = be_transform_node(block);
1657 int throws_exception = ir_throws_exception(node);
1664 ir_node *sign_extension;
1665 ia32_address_mode_t am;
1666 ia32_address_t *addr = &am.addr;
1668 /* the upper bits have random contents for smaller modes */
1669 switch (get_irn_opcode(node)) {
1671 op1 = get_Div_left(node);
1672 op2 = get_Div_right(node);
1673 mem = get_Div_mem(node);
1674 mode = get_Div_resmode(node);
1677 op1 = get_Mod_left(node);
1678 op2 = get_Mod_right(node);
1679 mem = get_Mod_mem(node);
1680 mode = get_Mod_resmode(node);
1683 panic("invalid divmod node %+F", node);
1686 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1688 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1689 is the memory of the consumed address. We can have only the second op as address
1690 in Div nodes, so check only op2. */
1691 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1693 if (mode_is_signed(mode)) {
1694 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1695 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1696 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1698 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1700 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1701 addr->index, new_mem, am.new_op2,
1702 am.new_op1, sign_extension);
1704 ir_set_throws_exception(new_node, throws_exception);
1706 set_irn_pinned(new_node, get_irn_pinned(node));
1708 set_am_attributes(new_node, &am);
1709 SET_IA32_ORIG_NODE(new_node, node);
1711 new_node = fix_mem_proj(new_node, &am);
1717 * Generates an ia32 Mod.
1719 static ir_node *gen_Mod(ir_node *node)
1721 return create_Div(node);
1725 * Generates an ia32 Div.
1727 static ir_node *gen_Div(ir_node *node)
1729 ir_mode *mode = get_Div_resmode(node);
1730 if (mode_is_float(mode)) {
1731 ir_node *op1 = get_Div_left(node);
1732 ir_node *op2 = get_Div_right(node);
1734 if (ia32_cg_config.use_sse2) {
1735 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1737 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1741 return create_Div(node);
1745 * Creates an ia32 Shl.
1747 * @return The created ia32 Shl node
1749 static ir_node *gen_Shl(ir_node *node)
1751 ir_node *left = get_Shl_left(node);
1752 ir_node *right = get_Shl_right(node);
1754 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1755 match_mode_neutral | match_immediate);
1759 * Creates an ia32 Shr.
1761 * @return The created ia32 Shr node
1763 static ir_node *gen_Shr(ir_node *node)
1765 ir_node *left = get_Shr_left(node);
1766 ir_node *right = get_Shr_right(node);
1768 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1774 * Creates an ia32 Sar.
1776 * @return The created ia32 Shrs node
1778 static ir_node *gen_Shrs(ir_node *node)
1780 ir_node *left = get_Shrs_left(node);
1781 ir_node *right = get_Shrs_right(node);
1783 if (is_Const(right)) {
1784 ir_tarval *tv = get_Const_tarval(right);
1785 long val = get_tarval_long(tv);
1787 /* this is a sign extension */
1788 dbg_info *dbgi = get_irn_dbg_info(node);
1789 ir_node *block = be_transform_node(get_nodes_block(node));
1790 ir_node *new_op = be_transform_node(left);
1792 return create_sex_32_64(dbgi, block, new_op, node);
1796 /* 8 or 16 bit sign extension? */
1797 if (is_Const(right) && is_Shl(left)) {
1798 ir_node *shl_left = get_Shl_left(left);
1799 ir_node *shl_right = get_Shl_right(left);
1800 if (is_Const(shl_right)) {
1801 ir_tarval *tv1 = get_Const_tarval(right);
1802 ir_tarval *tv2 = get_Const_tarval(shl_right);
1803 if (tv1 == tv2 && tarval_is_long(tv1)) {
1804 long val = get_tarval_long(tv1);
1805 if (val == 16 || val == 24) {
1806 dbg_info *dbgi = get_irn_dbg_info(node);
1807 ir_node *block = get_nodes_block(node);
1817 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1826 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1832 * Creates an ia32 Rol.
1834 * @param op1 The first operator
1835 * @param op2 The second operator
1836 * @return The created ia32 RotL node
1838 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1840 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1846 * Creates an ia32 Ror.
1847 * NOTE: There is no RotR with immediate because this would always be a RotL
1848 * "imm-mode_size_bits" which can be pre-calculated.
1850 * @param op1 The first operator
1851 * @param op2 The second operator
1852 * @return The created ia32 RotR node
1854 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1856 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1862 * Creates an ia32 RotR or RotL (depending on the found pattern).
1864 * @return The created ia32 RotL or RotR node
1866 static ir_node *gen_Rotl(ir_node *node)
1868 ir_node *op1 = get_Rotl_left(node);
1869 ir_node *op2 = get_Rotl_right(node);
1871 if (is_Minus(op2)) {
1872 return gen_Ror(node, op1, get_Minus_op(op2));
1875 return gen_Rol(node, op1, op2);
1881 * Transforms a Minus node.
1883 * @return The created ia32 Minus node
1885 static ir_node *gen_Minus(ir_node *node)
1887 ir_node *op = get_Minus_op(node);
1888 ir_node *block = be_transform_node(get_nodes_block(node));
1889 dbg_info *dbgi = get_irn_dbg_info(node);
1890 ir_mode *mode = get_irn_mode(node);
1895 if (mode_is_float(mode)) {
1896 ir_node *new_op = be_transform_node(op);
1897 if (ia32_cg_config.use_sse2) {
1898 /* TODO: non-optimal... if we have many xXors, then we should
1899 * rather create a load for the const and use that instead of
1900 * several AM nodes... */
1901 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1903 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1904 noreg_GP, nomem, new_op, noreg_xmm);
1906 size = get_mode_size_bits(mode);
1907 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1909 set_ia32_am_sc(new_node, ent);
1910 set_ia32_op_type(new_node, ia32_AddrModeS);
1911 set_ia32_ls_mode(new_node, mode);
1913 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1916 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1919 SET_IA32_ORIG_NODE(new_node, node);
1925 * Transforms a Not node.
1927 * @return The created ia32 Not node
1929 static ir_node *gen_Not(ir_node *node)
1931 ir_node *op = get_Not_op(node);
1933 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1934 assert (! mode_is_float(get_irn_mode(node)));
1936 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1939 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1940 bool negate, ir_node *node)
1942 ir_node *new_block = be_transform_node(block);
1943 ir_mode *mode = get_irn_mode(op);
1944 ir_node *new_op = be_transform_node(op);
1949 assert(mode_is_float(mode));
1951 if (ia32_cg_config.use_sse2) {
1952 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1953 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1954 noreg_GP, nomem, new_op, noreg_fp);
1956 size = get_mode_size_bits(mode);
1957 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1959 set_ia32_am_sc(new_node, ent);
1961 SET_IA32_ORIG_NODE(new_node, node);
1963 set_ia32_op_type(new_node, ia32_AddrModeS);
1964 set_ia32_ls_mode(new_node, mode);
1966 /* TODO, implement -Abs case */
1969 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1970 SET_IA32_ORIG_NODE(new_node, node);
1972 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1973 SET_IA32_ORIG_NODE(new_node, node);
1981 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1983 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1985 dbg_info *dbgi = get_irn_dbg_info(cmp);
1986 ir_node *block = get_nodes_block(cmp);
1987 ir_node *new_block = be_transform_node(block);
1988 ir_node *op1 = be_transform_node(x);
1989 ir_node *op2 = be_transform_node(n);
1991 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1994 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
1996 bool overflow_possible)
1998 if (mode_is_float(mode)) {
2000 case ir_relation_equal: return ia32_cc_float_equal;
2001 case ir_relation_less: return ia32_cc_float_below;
2002 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2003 case ir_relation_greater: return ia32_cc_float_above;
2004 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2005 case ir_relation_less_greater: return ia32_cc_not_equal;
2006 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2007 case ir_relation_unordered: return ia32_cc_parity;
2008 case ir_relation_unordered_equal: return ia32_cc_equal;
2009 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2010 case ir_relation_unordered_less_equal:
2011 return ia32_cc_float_unordered_below_equal;
2012 case ir_relation_unordered_greater:
2013 return ia32_cc_float_unordered_above;
2014 case ir_relation_unordered_greater_equal:
2015 return ia32_cc_float_unordered_above_equal;
2016 case ir_relation_unordered_less_greater:
2017 return ia32_cc_float_not_equal;
2018 case ir_relation_false:
2019 case ir_relation_true:
2020 /* should we introduce a jump always/jump never? */
2023 panic("Unexpected float pnc");
2024 } else if (mode_is_signed(mode)) {
2026 case ir_relation_unordered_equal:
2027 case ir_relation_equal: return ia32_cc_equal;
2028 case ir_relation_unordered_less:
2029 case ir_relation_less:
2030 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2031 case ir_relation_unordered_less_equal:
2032 case ir_relation_less_equal: return ia32_cc_less_equal;
2033 case ir_relation_unordered_greater:
2034 case ir_relation_greater: return ia32_cc_greater;
2035 case ir_relation_unordered_greater_equal:
2036 case ir_relation_greater_equal:
2037 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2038 case ir_relation_unordered_less_greater:
2039 case ir_relation_less_greater: return ia32_cc_not_equal;
2040 case ir_relation_less_equal_greater:
2041 case ir_relation_unordered:
2042 case ir_relation_false:
2043 case ir_relation_true:
2044 /* introduce jump always/jump never? */
2047 panic("Unexpected pnc");
2050 case ir_relation_unordered_equal:
2051 case ir_relation_equal: return ia32_cc_equal;
2052 case ir_relation_unordered_less:
2053 case ir_relation_less: return ia32_cc_below;
2054 case ir_relation_unordered_less_equal:
2055 case ir_relation_less_equal: return ia32_cc_below_equal;
2056 case ir_relation_unordered_greater:
2057 case ir_relation_greater: return ia32_cc_above;
2058 case ir_relation_unordered_greater_equal:
2059 case ir_relation_greater_equal: return ia32_cc_above_equal;
2060 case ir_relation_unordered_less_greater:
2061 case ir_relation_less_greater: return ia32_cc_not_equal;
2062 case ir_relation_less_equal_greater:
2063 case ir_relation_unordered:
2064 case ir_relation_false:
2065 case ir_relation_true:
2066 /* introduce jump always/jump never? */
2069 panic("Unexpected pnc");
2073 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2075 /* must have a Cmp as input */
2076 ir_relation relation = get_Cmp_relation(cmp);
2077 ir_node *l = get_Cmp_left(cmp);
2078 ir_node *r = get_Cmp_right(cmp);
2079 ir_mode *mode = get_irn_mode(l);
2080 bool overflow_possible;
2083 /* check for bit-test */
2084 if (ia32_cg_config.use_bt
2085 && (relation == ir_relation_equal
2086 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2087 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2089 ir_node *la = get_And_left(l);
2090 ir_node *ra = get_And_right(l);
2097 ir_node *c = get_Shl_left(la);
2098 if (is_Const_1(c) && is_Const_0(r)) {
2099 /* (1 << n) & ra) */
2100 ir_node *n = get_Shl_right(la);
2101 flags = gen_bt(cmp, ra, n);
2102 /* the bit is copied into the CF flag */
2103 if (relation & ir_relation_equal)
2104 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2106 *cc_out = ia32_cc_below; /* test for CF=1 */
2112 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2113 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2114 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2115 * a predecessor node). So add the < bit.
2116 * (Note that we do not want to produce <=> (which can happen for
2117 * unoptimized code), because no x86 flag can represent that */
2118 if (!(relation & ir_relation_equal) && relation & ir_relation_less_greater)
2119 relation |= get_negated_relation(ir_get_possible_cmp_relations(l, r)) & ir_relation_less_greater;
2121 overflow_possible = true;
2122 if (is_Const(r) && is_Const_null(r))
2123 overflow_possible = false;
2125 /* just do a normal transformation of the Cmp */
2126 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2127 flags = be_transform_node(cmp);
2132 * Transforms a Load.
2134 * @return the created ia32 Load node
2136 static ir_node *gen_Load(ir_node *node)
2138 ir_node *old_block = get_nodes_block(node);
2139 ir_node *block = be_transform_node(old_block);
2140 ir_node *ptr = get_Load_ptr(node);
2141 ir_node *mem = get_Load_mem(node);
2142 ir_node *new_mem = be_transform_node(mem);
2143 dbg_info *dbgi = get_irn_dbg_info(node);
2144 ir_mode *mode = get_Load_mode(node);
2145 int throws_exception = ir_throws_exception(node);
2149 ia32_address_t addr;
2151 /* construct load address */
2152 memset(&addr, 0, sizeof(addr));
2153 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2160 base = be_transform_node(base);
2166 idx = be_transform_node(idx);
2169 if (mode_is_float(mode)) {
2170 if (ia32_cg_config.use_sse2) {
2171 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2174 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2178 assert(mode != mode_b);
2180 /* create a conv node with address mode for smaller modes */
2181 if (get_mode_size_bits(mode) < 32) {
2182 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2183 new_mem, noreg_GP, mode);
2185 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2188 ir_set_throws_exception(new_node, throws_exception);
2190 set_irn_pinned(new_node, get_irn_pinned(node));
2191 set_ia32_op_type(new_node, ia32_AddrModeS);
2192 set_ia32_ls_mode(new_node, mode);
2193 set_address(new_node, &addr);
2195 if (get_irn_pinned(node) == op_pin_state_floats) {
2196 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2197 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2198 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2199 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2202 SET_IA32_ORIG_NODE(new_node, node);
2207 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2208 ir_node *ptr, ir_node *other)
2215 /* we only use address mode if we're the only user of the load */
2216 if (get_irn_n_edges(node) > 1)
2219 load = get_Proj_pred(node);
2222 if (get_nodes_block(load) != block)
2225 /* store should have the same pointer as the load */
2226 if (get_Load_ptr(load) != ptr)
2229 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2230 if (other != NULL &&
2231 get_nodes_block(other) == block &&
2232 heights_reachable_in_block(ia32_heights, other, load)) {
2236 if (ia32_prevents_AM(block, load, mem))
2238 /* Store should be attached to the load via mem */
2239 assert(heights_reachable_in_block(ia32_heights, mem, load));
2244 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2245 ir_node *mem, ir_node *ptr, ir_mode *mode,
2246 construct_binop_dest_func *func,
2247 construct_binop_dest_func *func8bit,
2248 match_flags_t flags)
2250 ir_node *src_block = get_nodes_block(node);
2258 ia32_address_mode_t am;
2259 ia32_address_t *addr = &am.addr;
2260 memset(&am, 0, sizeof(am));
2262 assert(flags & match_immediate); /* there is no destam node without... */
2263 commutative = (flags & match_commutative) != 0;
2265 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2266 build_address(&am, op1, ia32_create_am_double_use);
2267 new_op = create_immediate_or_transform(op2, 0);
2268 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2269 build_address(&am, op2, ia32_create_am_double_use);
2270 new_op = create_immediate_or_transform(op1, 0);
2275 if (addr->base == NULL)
2276 addr->base = noreg_GP;
2277 if (addr->index == NULL)
2278 addr->index = noreg_GP;
2279 if (addr->mem == NULL)
2282 dbgi = get_irn_dbg_info(node);
2283 block = be_transform_node(src_block);
2284 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2286 if (get_mode_size_bits(mode) == 8) {
2287 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2289 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2291 set_address(new_node, addr);
2292 set_ia32_op_type(new_node, ia32_AddrModeD);
2293 set_ia32_ls_mode(new_node, mode);
2294 SET_IA32_ORIG_NODE(new_node, node);
2296 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2297 mem_proj = be_transform_node(am.mem_proj);
2298 be_set_transformed_node(am.mem_proj, new_node);
2299 be_set_transformed_node(mem_proj, new_node);
2304 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2305 ir_node *ptr, ir_mode *mode,
2306 construct_unop_dest_func *func)
2308 ir_node *src_block = get_nodes_block(node);
2314 ia32_address_mode_t am;
2315 ia32_address_t *addr = &am.addr;
2317 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2320 memset(&am, 0, sizeof(am));
2321 build_address(&am, op, ia32_create_am_double_use);
2323 dbgi = get_irn_dbg_info(node);
2324 block = be_transform_node(src_block);
2325 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2326 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2327 set_address(new_node, addr);
2328 set_ia32_op_type(new_node, ia32_AddrModeD);
2329 set_ia32_ls_mode(new_node, mode);
2330 SET_IA32_ORIG_NODE(new_node, node);
2332 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2333 mem_proj = be_transform_node(am.mem_proj);
2334 be_set_transformed_node(am.mem_proj, new_node);
2335 be_set_transformed_node(mem_proj, new_node);
2340 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2342 ir_mode *mode = get_irn_mode(node);
2343 ir_node *mux_true = get_Mux_true(node);
2344 ir_node *mux_false = get_Mux_false(node);
2352 ia32_condition_code_t cc;
2353 ia32_address_t addr;
2355 if (get_mode_size_bits(mode) != 8)
2358 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2360 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2366 cond = get_Mux_sel(node);
2367 flags = get_flags_node(cond, &cc);
2368 /* we can't handle the float special cases with SetM */
2369 if (cc & ia32_cc_additional_float_cases)
2372 cc = ia32_negate_condition_code(cc);
2374 build_address_ptr(&addr, ptr, mem);
2376 dbgi = get_irn_dbg_info(node);
2377 block = get_nodes_block(node);
2378 new_block = be_transform_node(block);
2379 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2380 addr.index, addr.mem, flags, cc);
2381 set_address(new_node, &addr);
2382 set_ia32_op_type(new_node, ia32_AddrModeD);
2383 set_ia32_ls_mode(new_node, mode);
2384 SET_IA32_ORIG_NODE(new_node, node);
2389 static ir_node *try_create_dest_am(ir_node *node)
2391 ir_node *val = get_Store_value(node);
2392 ir_node *mem = get_Store_mem(node);
2393 ir_node *ptr = get_Store_ptr(node);
2394 ir_mode *mode = get_irn_mode(val);
2395 unsigned bits = get_mode_size_bits(mode);
2400 /* handle only GP modes for now... */
2401 if (!ia32_mode_needs_gp_reg(mode))
2405 /* store must be the only user of the val node */
2406 if (get_irn_n_edges(val) > 1)
2408 /* skip pointless convs */
2410 ir_node *conv_op = get_Conv_op(val);
2411 ir_mode *pred_mode = get_irn_mode(conv_op);
2412 if (!ia32_mode_needs_gp_reg(pred_mode))
2414 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2422 /* value must be in the same block */
2423 if (get_nodes_block(node) != get_nodes_block(val))
2426 switch (get_irn_opcode(val)) {
2428 op1 = get_Add_left(val);
2429 op2 = get_Add_right(val);
2430 if (ia32_cg_config.use_incdec) {
2431 if (is_Const_1(op2)) {
2432 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2434 } else if (is_Const_Minus_1(op2)) {
2435 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2439 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2440 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2441 match_commutative | match_immediate);
2444 op1 = get_Sub_left(val);
2445 op2 = get_Sub_right(val);
2446 if (is_Const(op2)) {
2447 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2449 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2450 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2454 op1 = get_And_left(val);
2455 op2 = get_And_right(val);
2456 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2457 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2458 match_commutative | match_immediate);
2461 op1 = get_Or_left(val);
2462 op2 = get_Or_right(val);
2463 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2464 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2465 match_commutative | match_immediate);
2468 op1 = get_Eor_left(val);
2469 op2 = get_Eor_right(val);
2470 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2471 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2472 match_commutative | match_immediate);
2475 op1 = get_Shl_left(val);
2476 op2 = get_Shl_right(val);
2477 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2478 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2482 op1 = get_Shr_left(val);
2483 op2 = get_Shr_right(val);
2484 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2485 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2489 op1 = get_Shrs_left(val);
2490 op2 = get_Shrs_right(val);
2491 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2492 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2496 op1 = get_Rotl_left(val);
2497 op2 = get_Rotl_right(val);
2498 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2499 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2502 /* TODO: match ROR patterns... */
2504 new_node = try_create_SetMem(val, ptr, mem);
2508 op1 = get_Minus_op(val);
2509 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2512 /* should be lowered already */
2513 assert(mode != mode_b);
2514 op1 = get_Not_op(val);
2515 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2521 if (new_node != NULL) {
2522 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2523 get_irn_pinned(node) == op_pin_state_pinned) {
2524 set_irn_pinned(new_node, op_pin_state_pinned);
2531 static bool possible_int_mode_for_fp(ir_mode *mode)
2535 if (!mode_is_signed(mode))
2537 size = get_mode_size_bits(mode);
2538 if (size != 16 && size != 32)
2543 static int is_float_to_int_conv(const ir_node *node)
2545 ir_mode *mode = get_irn_mode(node);
2549 if (!possible_int_mode_for_fp(mode))
2554 conv_op = get_Conv_op(node);
2555 conv_mode = get_irn_mode(conv_op);
2557 if (!mode_is_float(conv_mode))
2564 * Transform a Store(floatConst) into a sequence of
2567 * @return the created ia32 Store node
2569 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2571 ir_mode *mode = get_irn_mode(cns);
2572 unsigned size = get_mode_size_bytes(mode);
2573 ir_tarval *tv = get_Const_tarval(cns);
2574 ir_node *block = get_nodes_block(node);
2575 ir_node *new_block = be_transform_node(block);
2576 ir_node *ptr = get_Store_ptr(node);
2577 ir_node *mem = get_Store_mem(node);
2578 dbg_info *dbgi = get_irn_dbg_info(node);
2581 int throws_exception = ir_throws_exception(node);
2583 ia32_address_t addr;
2585 build_address_ptr(&addr, ptr, mem);
2592 val= get_tarval_sub_bits(tv, ofs) |
2593 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2594 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2595 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2598 } else if (size >= 2) {
2599 val= get_tarval_sub_bits(tv, ofs) |
2600 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2604 panic("invalid size of Store float to mem (%+F)", node);
2606 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2608 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2609 addr.index, addr.mem, imm);
2610 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2612 ir_set_throws_exception(new_node, throws_exception);
2613 set_irn_pinned(new_node, get_irn_pinned(node));
2614 set_ia32_op_type(new_node, ia32_AddrModeD);
2615 set_ia32_ls_mode(new_node, mode);
2616 set_address(new_node, &addr);
2617 SET_IA32_ORIG_NODE(new_node, node);
2624 addr.offset += delta;
2625 } while (size != 0);
2628 return new_rd_Sync(dbgi, new_block, i, ins);
2630 return get_Proj_pred(ins[0]);
2635 * Generate a vfist or vfisttp instruction.
2637 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2638 ir_node *index, ir_node *mem, ir_node *val)
2640 if (ia32_cg_config.use_fisttp) {
2641 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2642 if other users exists */
2643 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2644 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_vfisttp_res);
2645 be_new_Keep(block, 1, &value);
2649 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2652 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2658 * Transforms a general (no special case) Store.
2660 * @return the created ia32 Store node
2662 static ir_node *gen_general_Store(ir_node *node)
2664 ir_node *val = get_Store_value(node);
2665 ir_mode *mode = get_irn_mode(val);
2666 ir_node *block = get_nodes_block(node);
2667 ir_node *new_block = be_transform_node(block);
2668 ir_node *ptr = get_Store_ptr(node);
2669 ir_node *mem = get_Store_mem(node);
2670 dbg_info *dbgi = get_irn_dbg_info(node);
2671 int throws_exception = ir_throws_exception(node);
2674 ia32_address_t addr;
2676 /* check for destination address mode */
2677 new_node = try_create_dest_am(node);
2678 if (new_node != NULL)
2681 /* construct store address */
2682 memset(&addr, 0, sizeof(addr));
2683 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2685 if (addr.base == NULL) {
2686 addr.base = noreg_GP;
2688 addr.base = be_transform_node(addr.base);
2691 if (addr.index == NULL) {
2692 addr.index = noreg_GP;
2694 addr.index = be_transform_node(addr.index);
2696 addr.mem = be_transform_node(mem);
2698 if (mode_is_float(mode)) {
2699 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2701 while (is_Conv(val) && mode == get_irn_mode(val)) {
2702 ir_node *op = get_Conv_op(val);
2703 if (!mode_is_float(get_irn_mode(op)))
2707 new_val = be_transform_node(val);
2708 if (ia32_cg_config.use_sse2) {
2709 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2710 addr.index, addr.mem, new_val);
2712 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2713 addr.index, addr.mem, new_val, mode);
2715 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2716 val = get_Conv_op(val);
2718 /* TODO: is this optimisation still necessary at all (middleend)? */
2719 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2721 while (is_Conv(val)) {
2722 ir_node *op = get_Conv_op(val);
2723 if (!mode_is_float(get_irn_mode(op)))
2725 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2729 new_val = be_transform_node(val);
2730 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2732 new_val = create_immediate_or_transform(val, 0);
2733 assert(mode != mode_b);
2735 if (get_mode_size_bits(mode) == 8) {
2736 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2737 addr.index, addr.mem, new_val);
2739 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2740 addr.index, addr.mem, new_val);
2743 ir_set_throws_exception(new_node, throws_exception);
2745 set_irn_pinned(new_node, get_irn_pinned(node));
2746 set_ia32_op_type(new_node, ia32_AddrModeD);
2747 set_ia32_ls_mode(new_node, mode);
2749 set_address(new_node, &addr);
2750 SET_IA32_ORIG_NODE(new_node, node);
2756 * Transforms a Store.
2758 * @return the created ia32 Store node
2760 static ir_node *gen_Store(ir_node *node)
2762 ir_node *val = get_Store_value(node);
2763 ir_mode *mode = get_irn_mode(val);
2765 if (mode_is_float(mode) && is_Const(val)) {
2766 /* We can transform every floating const store
2767 into a sequence of integer stores.
2768 If the constant is already in a register,
2769 it would be better to use it, but we don't
2770 have this information here. */
2771 return gen_float_const_Store(node, val);
2773 return gen_general_Store(node);
2777 * Transforms a Switch.
2779 * @return the created ia32 SwitchJmp node
2781 static ir_node *gen_Switch(ir_node *node)
2783 dbg_info *dbgi = get_irn_dbg_info(node);
2784 ir_graph *irg = get_irn_irg(node);
2785 ir_node *block = be_transform_node(get_nodes_block(node));
2786 ir_node *sel = get_Switch_selector(node);
2787 ir_node *new_sel = be_transform_node(sel);
2788 ir_mode *sel_mode = get_irn_mode(sel);
2789 const ir_switch_table *table = get_Switch_table(node);
2790 unsigned n_outs = get_Switch_n_outs(node);
2794 assert(get_mode_size_bits(get_irn_mode(sel)) <= 32);
2795 if (get_mode_size_bits(sel_mode) != 32)
2796 new_sel = create_upconv(new_sel, sel);
2798 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2799 set_entity_visibility(entity, ir_visibility_private);
2800 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2802 table = ir_switch_table_duplicate(irg, table);
2804 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2805 set_ia32_am_scale(new_node, 2);
2806 set_ia32_am_sc(new_node, entity);
2807 set_ia32_op_type(new_node, ia32_AddrModeS);
2808 set_ia32_ls_mode(new_node, mode_Iu);
2809 SET_IA32_ORIG_NODE(new_node, node);
2810 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2811 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2817 * Transform a Cond node.
2819 static ir_node *gen_Cond(ir_node *node)
2821 ir_node *block = get_nodes_block(node);
2822 ir_node *new_block = be_transform_node(block);
2823 dbg_info *dbgi = get_irn_dbg_info(node);
2824 ir_node *sel = get_Cond_selector(node);
2825 ir_node *flags = NULL;
2827 ia32_condition_code_t cc;
2829 /* we get flags from a Cmp */
2830 flags = get_flags_node(sel, &cc);
2832 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2833 SET_IA32_ORIG_NODE(new_node, node);
2839 * Transform a be_Copy.
2841 static ir_node *gen_be_Copy(ir_node *node)
2843 ir_node *new_node = be_duplicate_node(node);
2844 ir_mode *mode = get_irn_mode(new_node);
2846 if (ia32_mode_needs_gp_reg(mode)) {
2847 set_irn_mode(new_node, mode_Iu);
2853 static ir_node *create_Fucom(ir_node *node)
2855 dbg_info *dbgi = get_irn_dbg_info(node);
2856 ir_node *block = get_nodes_block(node);
2857 ir_node *new_block = be_transform_node(block);
2858 ir_node *left = get_Cmp_left(node);
2859 ir_node *new_left = be_transform_node(left);
2860 ir_node *right = get_Cmp_right(node);
2864 if (ia32_cg_config.use_fucomi) {
2865 new_right = be_transform_node(right);
2866 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2868 set_ia32_commutative(new_node);
2869 SET_IA32_ORIG_NODE(new_node, node);
2871 if (is_Const_0(right)) {
2872 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2874 new_right = be_transform_node(right);
2875 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2876 set_ia32_commutative(new_node);
2879 SET_IA32_ORIG_NODE(new_node, node);
2881 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2882 SET_IA32_ORIG_NODE(new_node, node);
2888 static ir_node *create_Ucomi(ir_node *node)
2890 dbg_info *dbgi = get_irn_dbg_info(node);
2891 ir_node *src_block = get_nodes_block(node);
2892 ir_node *new_block = be_transform_node(src_block);
2893 ir_node *left = get_Cmp_left(node);
2894 ir_node *right = get_Cmp_right(node);
2896 ia32_address_mode_t am;
2897 ia32_address_t *addr = &am.addr;
2899 match_arguments(&am, src_block, left, right, NULL,
2900 match_commutative | match_am);
2902 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2903 addr->mem, am.new_op1, am.new_op2,
2905 set_am_attributes(new_node, &am);
2907 SET_IA32_ORIG_NODE(new_node, node);
2909 new_node = fix_mem_proj(new_node, &am);
2915 * returns true if it is assured, that the upper bits of a node are "clean"
2916 * which means for a 16 or 8 bit value, that the upper bits in the register
2917 * are 0 for unsigned and a copy of the last significant bit for signed
2920 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2922 assert(ia32_mode_needs_gp_reg(mode));
2923 if (get_mode_size_bits(mode) >= 32)
2926 if (is_Proj(transformed_node))
2927 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2929 switch (get_ia32_irn_opcode(transformed_node)) {
2930 case iro_ia32_Conv_I2I:
2931 case iro_ia32_Conv_I2I8Bit: {
2932 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2933 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2935 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2942 if (mode_is_signed(mode)) {
2943 return false; /* TODO handle signed modes */
2945 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2946 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2947 const ia32_immediate_attr_t *attr
2948 = get_ia32_immediate_attr_const(right);
2949 if (attr->symconst == 0 &&
2950 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2954 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2958 /* TODO too conservative if shift amount is constant */
2959 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2962 if (!mode_is_signed(mode)) {
2964 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2965 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2967 /* TODO if one is known to be zero extended, then || is sufficient */
2972 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2973 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2975 case iro_ia32_Const:
2976 case iro_ia32_Immediate: {
2977 const ia32_immediate_attr_t *attr =
2978 get_ia32_immediate_attr_const(transformed_node);
2979 if (mode_is_signed(mode)) {
2980 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2981 return shifted == 0 || shifted == -1;
2983 unsigned long shifted = (unsigned long)attr->offset;
2984 shifted >>= get_mode_size_bits(mode)-1;
2986 return shifted == 0;
2996 * Generate code for a Cmp.
2998 static ir_node *gen_Cmp(ir_node *node)
3000 dbg_info *dbgi = get_irn_dbg_info(node);
3001 ir_node *block = get_nodes_block(node);
3002 ir_node *new_block = be_transform_node(block);
3003 ir_node *left = get_Cmp_left(node);
3004 ir_node *right = get_Cmp_right(node);
3005 ir_mode *cmp_mode = get_irn_mode(left);
3007 ia32_address_mode_t am;
3008 ia32_address_t *addr = &am.addr;
3010 if (mode_is_float(cmp_mode)) {
3011 if (ia32_cg_config.use_sse2) {
3012 return create_Ucomi(node);
3014 return create_Fucom(node);
3018 assert(ia32_mode_needs_gp_reg(cmp_mode));
3020 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3021 if (is_Const_0(right) &&
3023 get_irn_n_edges(left) == 1) {
3024 /* Test(and_left, and_right) */
3025 ir_node *and_left = get_And_left(left);
3026 ir_node *and_right = get_And_right(left);
3028 /* matze: code here used mode instead of cmd_mode, I think it is always
3029 * the same as cmp_mode, but I leave this here to see if this is really
3032 assert(get_irn_mode(and_left) == cmp_mode);
3034 match_arguments(&am, block, and_left, and_right, NULL,
3036 match_am | match_8bit_am | match_16bit_am |
3037 match_am_and_immediates | match_immediate);
3039 /* use 32bit compare mode if possible since the opcode is smaller */
3040 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3041 upper_bits_clean(am.new_op2, cmp_mode)) {
3042 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3045 if (get_mode_size_bits(cmp_mode) == 8) {
3046 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3047 addr->index, addr->mem,
3048 am.new_op1, am.new_op2,
3051 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base,
3052 addr->index, addr->mem, am.new_op1,
3053 am.new_op2, am.ins_permuted);
3056 /* Cmp(left, right) */
3057 match_arguments(&am, block, left, right, NULL,
3058 match_commutative | match_am | match_8bit_am |
3059 match_16bit_am | match_am_and_immediates |
3061 /* use 32bit compare mode if possible since the opcode is smaller */
3062 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3063 upper_bits_clean(am.new_op2, cmp_mode)) {
3064 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3067 if (get_mode_size_bits(cmp_mode) == 8) {
3068 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3069 addr->index, addr->mem, am.new_op1,
3070 am.new_op2, am.ins_permuted);
3072 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3073 addr->mem, am.new_op1, am.new_op2,
3077 set_am_attributes(new_node, &am);
3078 set_ia32_ls_mode(new_node, cmp_mode);
3080 SET_IA32_ORIG_NODE(new_node, node);
3082 new_node = fix_mem_proj(new_node, &am);
3087 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3088 ia32_condition_code_t cc)
3090 dbg_info *dbgi = get_irn_dbg_info(node);
3091 ir_node *block = get_nodes_block(node);
3092 ir_node *new_block = be_transform_node(block);
3093 ir_node *val_true = get_Mux_true(node);
3094 ir_node *val_false = get_Mux_false(node);
3096 ia32_address_mode_t am;
3097 ia32_address_t *addr;
3099 assert(ia32_cg_config.use_cmov);
3100 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3104 match_arguments(&am, block, val_false, val_true, flags,
3105 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3107 if (am.ins_permuted)
3108 cc = ia32_negate_condition_code(cc);
3110 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3111 addr->mem, am.new_op1, am.new_op2, new_flags,
3113 set_am_attributes(new_node, &am);
3115 SET_IA32_ORIG_NODE(new_node, node);
3117 new_node = fix_mem_proj(new_node, &am);
3123 * Creates a ia32 Setcc instruction.
3125 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3126 ir_node *flags, ia32_condition_code_t cc,
3129 ir_mode *mode = get_irn_mode(orig_node);
3132 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3133 SET_IA32_ORIG_NODE(new_node, orig_node);
3135 /* we might need to conv the result up */
3136 if (get_mode_size_bits(mode) > 8) {
3137 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3138 nomem, new_node, mode_Bu);
3139 SET_IA32_ORIG_NODE(new_node, orig_node);
3146 * Create instruction for an unsigned Difference or Zero.
3148 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3150 ir_mode *mode = get_irn_mode(psi);
3160 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3161 match_mode_neutral | match_am | match_immediate | match_two_users);
3163 block = get_nodes_block(new_node);
3165 if (is_Proj(new_node)) {
3166 sub = get_Proj_pred(new_node);
3169 set_irn_mode(sub, mode_T);
3170 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3172 assert(is_ia32_Sub(sub));
3173 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3175 dbgi = get_irn_dbg_info(psi);
3176 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3177 set_ia32_ls_mode(sbb, mode_Iu);
3178 notn = new_bd_ia32_Not(dbgi, block, sbb);
3180 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3181 set_ia32_ls_mode(new_node, mode_Iu);
3182 set_ia32_commutative(new_node);
3187 * Create an const array of two float consts.
3189 * @param c0 the first constant
3190 * @param c1 the second constant
3191 * @param new_mode IN/OUT for the mode of the constants, if NULL
3192 * smallest possible mode will be used
3194 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3197 ir_mode *mode = *new_mode;
3199 ir_initializer_t *initializer;
3200 ir_tarval *tv0 = get_Const_tarval(c0);
3201 ir_tarval *tv1 = get_Const_tarval(c1);
3204 /* detect the best mode for the constants */
3205 mode = get_tarval_mode(tv0);
3207 if (mode != mode_F) {
3208 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3209 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3211 tv0 = tarval_convert_to(tv0, mode);
3212 tv1 = tarval_convert_to(tv1, mode);
3213 } else if (mode != mode_D) {
3214 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3215 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3217 tv0 = tarval_convert_to(tv0, mode);
3218 tv1 = tarval_convert_to(tv1, mode);
3225 tp = ia32_get_prim_type(mode);
3226 tp = ia32_create_float_array(tp);
3228 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3230 set_entity_ld_ident(ent, get_entity_ident(ent));
3231 set_entity_visibility(ent, ir_visibility_private);
3232 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3234 initializer = create_initializer_compound(2);
3236 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3237 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3239 set_entity_initializer(ent, initializer);
3246 * Possible transformations for creating a Setcc.
3248 enum setcc_transform_insn {
3261 typedef struct setcc_transform {
3263 ia32_condition_code_t cc;
3265 enum setcc_transform_insn transform;
3269 } setcc_transform_t;
3272 * Setcc can only handle 0 and 1 result.
3273 * Find a transformation that creates 0 and 1 from
3276 static void find_const_transform(ia32_condition_code_t cc,
3277 ir_tarval *t, ir_tarval *f,
3278 setcc_transform_t *res)
3284 if (tarval_is_null(t)) {
3288 cc = ia32_negate_condition_code(cc);
3289 } else if (tarval_cmp(t, f) == ir_relation_less) {
3290 // now, t is the bigger one
3294 cc = ia32_negate_condition_code(cc);
3298 if (! tarval_is_null(f)) {
3299 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3302 res->steps[step].transform = SETCC_TR_ADD;
3304 if (t == tarval_bad)
3305 panic("constant subtract failed");
3306 if (! tarval_is_long(f))
3307 panic("tarval is not long");
3309 res->steps[step].val = get_tarval_long(f);
3311 f = tarval_sub(f, f, NULL);
3312 assert(tarval_is_null(f));
3315 if (tarval_is_one(t)) {
3316 res->steps[step].transform = SETCC_TR_SET;
3317 res->num_steps = ++step;
3321 if (tarval_is_minus_one(t)) {
3322 res->steps[step].transform = SETCC_TR_NEG;
3324 res->steps[step].transform = SETCC_TR_SET;
3325 res->num_steps = ++step;
3328 if (tarval_is_long(t)) {
3329 long v = get_tarval_long(t);
3331 res->steps[step].val = 0;
3334 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3336 res->steps[step].transform = SETCC_TR_LEAxx;
3337 res->steps[step].scale = 3; /* (a << 3) + a */
3340 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3342 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3343 res->steps[step].scale = 3; /* (a << 3) */
3346 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3348 res->steps[step].transform = SETCC_TR_LEAxx;
3349 res->steps[step].scale = 2; /* (a << 2) + a */
3352 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3354 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3355 res->steps[step].scale = 2; /* (a << 2) */
3358 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3360 res->steps[step].transform = SETCC_TR_LEAxx;
3361 res->steps[step].scale = 1; /* (a << 1) + a */
3364 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3366 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3367 res->steps[step].scale = 1; /* (a << 1) */
3370 res->num_steps = step;
3373 if (! tarval_is_single_bit(t)) {
3374 res->steps[step].transform = SETCC_TR_AND;
3375 res->steps[step].val = v;
3377 res->steps[step].transform = SETCC_TR_NEG;
3379 int val = get_tarval_lowest_bit(t);
3382 res->steps[step].transform = SETCC_TR_SHL;
3383 res->steps[step].scale = val;
3387 res->steps[step].transform = SETCC_TR_SET;
3388 res->num_steps = ++step;
3391 panic("tarval is not long");
3395 * Transforms a Mux node into some code sequence.
3397 * @return The transformed node.
3399 static ir_node *gen_Mux(ir_node *node)
3401 dbg_info *dbgi = get_irn_dbg_info(node);
3402 ir_node *block = get_nodes_block(node);
3403 ir_node *new_block = be_transform_node(block);
3404 ir_node *mux_true = get_Mux_true(node);
3405 ir_node *mux_false = get_Mux_false(node);
3406 ir_node *sel = get_Mux_sel(node);
3407 ir_mode *mode = get_irn_mode(node);
3411 ia32_condition_code_t cc;
3413 assert(get_irn_mode(sel) == mode_b);
3415 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3417 if (ia32_mode_needs_gp_reg(mode)) {
3418 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3421 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3422 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3426 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3427 if (mode_is_float(mode)) {
3428 ir_node *cmp_left = get_Cmp_left(sel);
3429 ir_node *cmp_right = get_Cmp_right(sel);
3430 ir_relation relation = get_Cmp_relation(sel);
3432 if (ia32_cg_config.use_sse2) {
3433 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3434 if (cmp_left == mux_true && cmp_right == mux_false) {
3435 /* Mux(a <= b, a, b) => MIN */
3436 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3437 match_commutative | match_am | match_two_users);
3438 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3439 /* Mux(a <= b, b, a) => MAX */
3440 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3441 match_commutative | match_am | match_two_users);
3443 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3444 if (cmp_left == mux_true && cmp_right == mux_false) {
3445 /* Mux(a >= b, a, b) => MAX */
3446 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3447 match_commutative | match_am | match_two_users);
3448 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3449 /* Mux(a >= b, b, a) => MIN */
3450 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3451 match_commutative | match_am | match_two_users);
3456 if (is_Const(mux_true) && is_Const(mux_false)) {
3457 ia32_address_mode_t am;
3462 flags = get_flags_node(sel, &cc);
3463 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3465 if (ia32_cg_config.use_sse2) {
3466 /* cannot load from different mode on SSE */
3469 /* x87 can load any mode */
3473 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3475 if (new_mode == mode_F) {
3477 } else if (new_mode == mode_D) {
3479 } else if (new_mode == ia32_mode_E) {
3480 /* arg, shift 16 NOT supported */
3482 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3484 panic("Unsupported constant size");
3487 am.ls_mode = new_mode;
3488 am.addr.base = get_symconst_base();
3489 am.addr.index = new_node;
3490 am.addr.mem = nomem;
3492 am.addr.scale = scale;
3493 am.addr.use_frame = 0;
3494 am.addr.tls_segment = false;
3495 am.addr.frame_entity = NULL;
3496 am.addr.symconst_sign = 0;
3497 am.mem_proj = am.addr.mem;
3498 am.op_type = ia32_AddrModeS;
3501 am.pinned = op_pin_state_floats;
3503 am.ins_permuted = false;
3505 if (ia32_cg_config.use_sse2)
3506 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3508 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3509 set_am_attributes(load, &am);
3511 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3513 panic("cannot transform floating point Mux");
3516 assert(ia32_mode_needs_gp_reg(mode));
3519 ir_node *cmp_left = get_Cmp_left(sel);
3520 ir_node *cmp_right = get_Cmp_right(sel);
3521 ir_relation relation = get_Cmp_relation(sel);
3522 ir_node *val_true = mux_true;
3523 ir_node *val_false = mux_false;
3525 if (is_Const(val_true) && is_Const_null(val_true)) {
3526 ir_node *tmp = val_false;
3527 val_false = val_true;
3529 relation = get_negated_relation(relation);
3531 if (is_Const_0(val_false) && is_Sub(val_true)) {
3532 if ((relation & ir_relation_greater)
3533 && get_Sub_left(val_true) == cmp_left
3534 && get_Sub_right(val_true) == cmp_right) {
3535 return create_doz(node, cmp_left, cmp_right);
3537 if ((relation & ir_relation_less)
3538 && get_Sub_left(val_true) == cmp_right
3539 && get_Sub_right(val_true) == cmp_left) {
3540 return create_doz(node, cmp_right, cmp_left);
3545 flags = get_flags_node(sel, &cc);
3547 if (is_Const(mux_true) && is_Const(mux_false)) {
3548 /* both are const, good */
3549 ir_tarval *tv_true = get_Const_tarval(mux_true);
3550 ir_tarval *tv_false = get_Const_tarval(mux_false);
3551 setcc_transform_t res;
3554 find_const_transform(cc, tv_true, tv_false, &res);
3556 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3559 switch (res.steps[step].transform) {
3561 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3562 add_ia32_am_offs_int(new_node, res.steps[step].val);
3564 case SETCC_TR_ADDxx:
3565 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3568 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3569 set_ia32_am_scale(new_node, res.steps[step].scale);
3570 set_ia32_am_offs_int(new_node, res.steps[step].val);
3572 case SETCC_TR_LEAxx:
3573 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3574 set_ia32_am_scale(new_node, res.steps[step].scale);
3575 set_ia32_am_offs_int(new_node, res.steps[step].val);
3578 imm = ia32_immediate_from_long(res.steps[step].scale);
3579 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3582 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3585 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3588 imm = ia32_immediate_from_long(res.steps[step].val);
3589 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3592 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3595 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3598 panic("unknown setcc transform");
3602 new_node = create_CMov(node, sel, flags, cc);
3609 * Create a conversion from x87 state register to general purpose.
3611 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3613 ir_node *block = be_transform_node(get_nodes_block(node));
3614 ir_node *op = get_Conv_op(node);
3615 ir_node *new_op = be_transform_node(op);
3616 ir_graph *irg = current_ir_graph;
3617 dbg_info *dbgi = get_irn_dbg_info(node);
3618 ir_mode *mode = get_irn_mode(node);
3619 ir_node *frame = get_irg_frame(irg);
3620 ir_node *fist, *load, *mem;
3622 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3623 set_irn_pinned(fist, op_pin_state_floats);
3624 set_ia32_use_frame(fist);
3625 set_ia32_op_type(fist, ia32_AddrModeD);
3627 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3628 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3630 assert(get_mode_size_bits(mode) <= 32);
3631 /* exception we can only store signed 32 bit integers, so for unsigned
3632 we store a 64bit (signed) integer and load the lower bits */
3633 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3634 set_ia32_ls_mode(fist, mode_Ls);
3636 set_ia32_ls_mode(fist, mode_Is);
3638 SET_IA32_ORIG_NODE(fist, node);
3641 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3643 set_irn_pinned(load, op_pin_state_floats);
3644 set_ia32_use_frame(load);
3645 set_ia32_op_type(load, ia32_AddrModeS);
3646 set_ia32_ls_mode(load, mode_Is);
3647 if (get_ia32_ls_mode(fist) == mode_Ls) {
3648 ia32_attr_t *attr = get_ia32_attr(load);
3649 attr->data.need_64bit_stackent = 1;
3651 ia32_attr_t *attr = get_ia32_attr(load);
3652 attr->data.need_32bit_stackent = 1;
3654 SET_IA32_ORIG_NODE(load, node);
3656 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3660 * Creates a x87 strict Conv by placing a Store and a Load
3662 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3664 ir_node *block = get_nodes_block(node);
3665 ir_graph *irg = get_Block_irg(block);
3666 dbg_info *dbgi = get_irn_dbg_info(node);
3667 ir_node *frame = get_irg_frame(irg);
3669 ir_node *store, *load;
3672 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3673 set_ia32_use_frame(store);
3674 set_ia32_op_type(store, ia32_AddrModeD);
3675 SET_IA32_ORIG_NODE(store, node);
3677 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3679 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3680 set_ia32_use_frame(load);
3681 set_ia32_op_type(load, ia32_AddrModeS);
3682 SET_IA32_ORIG_NODE(load, node);
3684 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_vfld_res);
3688 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3689 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3691 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3693 func = get_mode_size_bits(mode) == 8 ?
3694 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3695 return func(dbgi, block, base, index, mem, val, mode);
3699 * Create a conversion from general purpose to x87 register
3701 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3703 ir_node *src_block = get_nodes_block(node);
3704 ir_node *block = be_transform_node(src_block);
3705 ir_graph *irg = get_Block_irg(block);
3706 dbg_info *dbgi = get_irn_dbg_info(node);
3707 ir_node *op = get_Conv_op(node);
3708 ir_node *new_op = NULL;
3710 ir_mode *store_mode;
3716 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3717 if (possible_int_mode_for_fp(src_mode)) {
3718 ia32_address_mode_t am;
3720 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3721 if (am.op_type == ia32_AddrModeS) {
3722 ia32_address_t *addr = &am.addr;
3724 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3725 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3727 set_am_attributes(fild, &am);
3728 SET_IA32_ORIG_NODE(fild, node);
3730 fix_mem_proj(fild, &am);
3735 if (new_op == NULL) {
3736 new_op = be_transform_node(op);
3739 mode = get_irn_mode(op);
3741 /* first convert to 32 bit signed if necessary */
3742 if (get_mode_size_bits(src_mode) < 32) {
3743 if (!upper_bits_clean(new_op, src_mode)) {
3744 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3745 SET_IA32_ORIG_NODE(new_op, node);
3750 assert(get_mode_size_bits(mode) == 32);
3753 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3755 set_ia32_use_frame(store);
3756 set_ia32_op_type(store, ia32_AddrModeD);
3757 set_ia32_ls_mode(store, mode_Iu);
3759 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3761 /* exception for 32bit unsigned, do a 64bit spill+load */
3762 if (!mode_is_signed(mode)) {
3765 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3767 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3768 noreg_GP, nomem, zero_const);
3769 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3771 set_ia32_use_frame(zero_store);
3772 set_ia32_op_type(zero_store, ia32_AddrModeD);
3773 add_ia32_am_offs_int(zero_store, 4);
3774 set_ia32_ls_mode(zero_store, mode_Iu);
3776 in[0] = zero_store_mem;
3779 store_mem = new_rd_Sync(dbgi, block, 2, in);
3780 store_mode = mode_Ls;
3782 store_mode = mode_Is;
3786 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3788 set_ia32_use_frame(fild);
3789 set_ia32_op_type(fild, ia32_AddrModeS);
3790 set_ia32_ls_mode(fild, store_mode);
3792 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3798 * Create a conversion from one integer mode into another one
3800 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3801 dbg_info *dbgi, ir_node *block, ir_node *op,
3804 ir_node *new_block = be_transform_node(block);
3806 ir_mode *smaller_mode;
3807 ia32_address_mode_t am;
3808 ia32_address_t *addr = &am.addr;
3811 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3812 smaller_mode = src_mode;
3814 smaller_mode = tgt_mode;
3817 #ifdef DEBUG_libfirm
3819 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3824 match_arguments(&am, block, NULL, op, NULL,
3825 match_am | match_8bit_am | match_16bit_am);
3827 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3828 /* unnecessary conv. in theory it shouldn't have been AM */
3829 assert(is_ia32_NoReg_GP(addr->base));
3830 assert(is_ia32_NoReg_GP(addr->index));
3831 assert(is_NoMem(addr->mem));
3832 assert(am.addr.offset == 0);
3833 assert(am.addr.symconst_ent == NULL);
3837 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3838 addr->mem, am.new_op2, smaller_mode);
3839 set_am_attributes(new_node, &am);
3840 /* match_arguments assume that out-mode = in-mode, this isn't true here
3842 set_ia32_ls_mode(new_node, smaller_mode);
3843 SET_IA32_ORIG_NODE(new_node, node);
3844 new_node = fix_mem_proj(new_node, &am);
3849 * Transforms a Conv node.
3851 * @return The created ia32 Conv node
3853 static ir_node *gen_Conv(ir_node *node)
3855 ir_node *block = get_nodes_block(node);
3856 ir_node *new_block = be_transform_node(block);
3857 ir_node *op = get_Conv_op(node);
3858 ir_node *new_op = NULL;
3859 dbg_info *dbgi = get_irn_dbg_info(node);
3860 ir_mode *src_mode = get_irn_mode(op);
3861 ir_mode *tgt_mode = get_irn_mode(node);
3862 int src_bits = get_mode_size_bits(src_mode);
3863 int tgt_bits = get_mode_size_bits(tgt_mode);
3864 ir_node *res = NULL;
3866 assert(!mode_is_int(src_mode) || src_bits <= 32);
3867 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3869 /* modeB -> X should already be lowered by the lower_mode_b pass */
3870 if (src_mode == mode_b) {
3871 panic("ConvB not lowered %+F", node);
3874 if (src_mode == tgt_mode) {
3875 if (get_Conv_strict(node)) {
3876 if (ia32_cg_config.use_sse2) {
3877 /* when we are in SSE mode, we can kill all strict no-op conversion */
3878 return be_transform_node(op);
3881 /* this should be optimized already, but who knows... */
3882 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3883 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3884 return be_transform_node(op);
3888 if (mode_is_float(src_mode)) {
3889 new_op = be_transform_node(op);
3890 /* we convert from float ... */
3891 if (mode_is_float(tgt_mode)) {
3893 if (ia32_cg_config.use_sse2) {
3894 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3895 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3897 set_ia32_ls_mode(res, tgt_mode);
3899 if (get_Conv_strict(node)) {
3900 /* if fp_no_float_fold is not set then we assume that we
3901 * don't have any float operations in a non
3902 * mode_float_arithmetic mode and can skip strict upconvs */
3903 if (src_bits < tgt_bits) {
3904 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3907 res = gen_x87_strict_conv(tgt_mode, new_op);
3908 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3912 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3917 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3918 if (ia32_cg_config.use_sse2) {
3919 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3921 set_ia32_ls_mode(res, src_mode);
3923 return gen_x87_fp_to_gp(node);
3927 /* we convert from int ... */
3928 if (mode_is_float(tgt_mode)) {
3930 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3931 if (ia32_cg_config.use_sse2) {
3932 new_op = be_transform_node(op);
3933 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3935 set_ia32_ls_mode(res, tgt_mode);
3937 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3938 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3939 res = gen_x87_gp_to_fp(node, src_mode);
3941 /* we need a strict-Conv, if the int mode has more bits than the
3943 if (float_mantissa < int_mantissa) {
3944 res = gen_x87_strict_conv(tgt_mode, res);
3945 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3949 } else if (tgt_mode == mode_b) {
3950 /* mode_b lowering already took care that we only have 0/1 values */
3951 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3952 src_mode, tgt_mode));
3953 return be_transform_node(op);
3956 if (src_bits == tgt_bits) {
3957 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3958 src_mode, tgt_mode));
3959 return be_transform_node(op);
3962 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3970 static ir_node *create_immediate_or_transform(ir_node *node,
3971 char immediate_constraint_type)
3973 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3974 if (new_node == NULL) {
3975 new_node = be_transform_node(node);
3981 * Transforms a FrameAddr into an ia32 Add.
3983 static ir_node *gen_be_FrameAddr(ir_node *node)
3985 ir_node *block = be_transform_node(get_nodes_block(node));
3986 ir_node *op = be_get_FrameAddr_frame(node);
3987 ir_node *new_op = be_transform_node(op);
3988 dbg_info *dbgi = get_irn_dbg_info(node);
3991 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3992 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3993 set_ia32_use_frame(new_node);
3995 SET_IA32_ORIG_NODE(new_node, node);
4001 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
4003 static ir_node *gen_be_Return(ir_node *node)
4005 ir_graph *irg = current_ir_graph;
4006 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4007 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4008 ir_node *new_ret_val = be_transform_node(ret_val);
4009 ir_node *new_ret_mem = be_transform_node(ret_mem);
4010 ir_entity *ent = get_irg_entity(irg);
4011 ir_type *tp = get_entity_type(ent);
4012 dbg_info *dbgi = get_irn_dbg_info(node);
4013 ir_node *block = be_transform_node(get_nodes_block(node));
4027 assert(ret_val != NULL);
4028 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4029 return be_duplicate_node(node);
4032 res_type = get_method_res_type(tp, 0);
4034 if (! is_Primitive_type(res_type)) {
4035 return be_duplicate_node(node);
4038 mode = get_type_mode(res_type);
4039 if (! mode_is_float(mode)) {
4040 return be_duplicate_node(node);
4043 assert(get_method_n_ress(tp) == 1);
4045 frame = get_irg_frame(irg);
4047 /* store xmm0 onto stack */
4048 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4049 new_ret_mem, new_ret_val);
4050 set_ia32_ls_mode(sse_store, mode);
4051 set_ia32_op_type(sse_store, ia32_AddrModeD);
4052 set_ia32_use_frame(sse_store);
4053 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4055 /* load into x87 register */
4056 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4057 set_ia32_op_type(fld, ia32_AddrModeS);
4058 set_ia32_use_frame(fld);
4060 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4061 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4063 /* create a new return */
4064 arity = get_irn_arity(node);
4065 in = ALLOCAN(ir_node*, arity);
4066 pop = be_Return_get_pop(node);
4067 for (i = 0; i < arity; ++i) {
4068 ir_node *op = get_irn_n(node, i);
4069 if (op == ret_val) {
4071 } else if (op == ret_mem) {
4074 in[i] = be_transform_node(op);
4077 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4078 copy_node_attr(irg, node, new_node);
4084 * Transform a be_AddSP into an ia32_SubSP.
4086 static ir_node *gen_be_AddSP(ir_node *node)
4088 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4089 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4091 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4092 match_am | match_immediate);
4093 assert(is_ia32_SubSP(new_node));
4094 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4095 &ia32_registers[REG_ESP]);
4100 * Transform a be_SubSP into an ia32_AddSP
4102 static ir_node *gen_be_SubSP(ir_node *node)
4104 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4105 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4107 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4108 match_am | match_immediate);
4109 assert(is_ia32_AddSP(new_node));
4110 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4111 &ia32_registers[REG_ESP]);
4116 * Change some phi modes
4118 static ir_node *gen_Phi(ir_node *node)
4120 const arch_register_req_t *req;
4121 ir_node *block = be_transform_node(get_nodes_block(node));
4122 ir_graph *irg = current_ir_graph;
4123 dbg_info *dbgi = get_irn_dbg_info(node);
4124 ir_mode *mode = get_irn_mode(node);
4127 if (ia32_mode_needs_gp_reg(mode)) {
4128 /* we shouldn't have any 64bit stuff around anymore */
4129 assert(get_mode_size_bits(mode) <= 32);
4130 /* all integer operations are on 32bit registers now */
4132 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4133 } else if (mode_is_float(mode)) {
4134 if (ia32_cg_config.use_sse2) {
4136 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4139 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4142 req = arch_no_register_req;
4145 /* phi nodes allow loops, so we use the old arguments for now
4146 * and fix this later */
4147 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4148 get_irn_in(node) + 1);
4149 copy_node_attr(irg, node, phi);
4150 be_duplicate_deps(node, phi);
4152 arch_set_irn_register_req_out(phi, 0, req);
4154 be_enqueue_preds(node);
4159 static ir_node *gen_Jmp(ir_node *node)
4161 ir_node *block = get_nodes_block(node);
4162 ir_node *new_block = be_transform_node(block);
4163 dbg_info *dbgi = get_irn_dbg_info(node);
4166 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4167 SET_IA32_ORIG_NODE(new_node, node);
4175 static ir_node *gen_IJmp(ir_node *node)
4177 ir_node *block = get_nodes_block(node);
4178 ir_node *new_block = be_transform_node(block);
4179 dbg_info *dbgi = get_irn_dbg_info(node);
4180 ir_node *op = get_IJmp_target(node);
4182 ia32_address_mode_t am;
4183 ia32_address_t *addr = &am.addr;
4185 assert(get_irn_mode(op) == mode_P);
4187 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4189 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4190 addr->mem, am.new_op2);
4191 set_am_attributes(new_node, &am);
4192 SET_IA32_ORIG_NODE(new_node, node);
4194 new_node = fix_mem_proj(new_node, &am);
4199 static ir_node *gen_ia32_l_Add(ir_node *node)
4201 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4202 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4203 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4204 match_commutative | match_am | match_immediate |
4205 match_mode_neutral);
4207 if (is_Proj(lowered)) {
4208 lowered = get_Proj_pred(lowered);
4210 assert(is_ia32_Add(lowered));
4211 set_irn_mode(lowered, mode_T);
4217 static ir_node *gen_ia32_l_Adc(ir_node *node)
4219 return gen_binop_flags(node, new_bd_ia32_Adc,
4220 match_commutative | match_am | match_immediate |
4221 match_mode_neutral);
4225 * Transforms a l_MulS into a "real" MulS node.
4227 * @return the created ia32 Mul node
4229 static ir_node *gen_ia32_l_Mul(ir_node *node)
4231 ir_node *left = get_binop_left(node);
4232 ir_node *right = get_binop_right(node);
4234 return gen_binop(node, left, right, new_bd_ia32_Mul,
4235 match_commutative | match_am | match_mode_neutral);
4239 * Transforms a l_IMulS into a "real" IMul1OPS node.
4241 * @return the created ia32 IMul1OP node
4243 static ir_node *gen_ia32_l_IMul(ir_node *node)
4245 ir_node *left = get_binop_left(node);
4246 ir_node *right = get_binop_right(node);
4248 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4249 match_commutative | match_am | match_mode_neutral);
4252 static ir_node *gen_ia32_l_Sub(ir_node *node)
4254 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4255 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4256 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4257 match_am | match_immediate | match_mode_neutral);
4259 if (is_Proj(lowered)) {
4260 lowered = get_Proj_pred(lowered);
4262 assert(is_ia32_Sub(lowered));
4263 set_irn_mode(lowered, mode_T);
4269 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4271 return gen_binop_flags(node, new_bd_ia32_Sbb,
4272 match_am | match_immediate | match_mode_neutral);
4275 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4277 ir_node *src_block = get_nodes_block(node);
4278 ir_node *block = be_transform_node(src_block);
4279 ir_graph *irg = current_ir_graph;
4280 dbg_info *dbgi = get_irn_dbg_info(node);
4281 ir_node *frame = get_irg_frame(irg);
4282 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4283 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4284 ir_node *new_val_low = be_transform_node(val_low);
4285 ir_node *new_val_high = be_transform_node(val_high);
4287 ir_node *sync, *fild, *res;
4289 ir_node *store_high;
4293 if (ia32_cg_config.use_sse2) {
4294 panic("ia32_l_LLtoFloat not implemented for SSE2");
4298 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4300 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4302 SET_IA32_ORIG_NODE(store_low, node);
4303 SET_IA32_ORIG_NODE(store_high, node);
4305 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4306 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4308 set_ia32_use_frame(store_low);
4309 set_ia32_use_frame(store_high);
4310 set_ia32_op_type(store_low, ia32_AddrModeD);
4311 set_ia32_op_type(store_high, ia32_AddrModeD);
4312 set_ia32_ls_mode(store_low, mode_Iu);
4313 set_ia32_ls_mode(store_high, mode_Is);
4314 add_ia32_am_offs_int(store_high, 4);
4318 sync = new_rd_Sync(dbgi, block, 2, in);
4321 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4323 set_ia32_use_frame(fild);
4324 set_ia32_op_type(fild, ia32_AddrModeS);
4325 set_ia32_ls_mode(fild, mode_Ls);
4327 SET_IA32_ORIG_NODE(fild, node);
4329 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4331 if (! mode_is_signed(get_irn_mode(val_high))) {
4332 ia32_address_mode_t am;
4334 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4337 am.addr.base = get_symconst_base();
4338 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4339 am.addr.mem = nomem;
4342 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4343 am.addr.tls_segment = false;
4344 am.addr.use_frame = 0;
4345 am.addr.frame_entity = NULL;
4346 am.addr.symconst_sign = 0;
4347 am.ls_mode = mode_F;
4348 am.mem_proj = nomem;
4349 am.op_type = ia32_AddrModeS;
4351 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4352 am.pinned = op_pin_state_floats;
4354 am.ins_permuted = false;
4356 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4357 am.new_op1, am.new_op2, get_fpcw());
4358 set_am_attributes(fadd, &am);
4360 set_irn_mode(fadd, mode_T);
4361 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4366 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4368 ir_node *src_block = get_nodes_block(node);
4369 ir_node *block = be_transform_node(src_block);
4370 ir_graph *irg = get_Block_irg(block);
4371 dbg_info *dbgi = get_irn_dbg_info(node);
4372 ir_node *frame = get_irg_frame(irg);
4373 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4374 ir_node *new_val = be_transform_node(val);
4377 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4378 SET_IA32_ORIG_NODE(fist, node);
4379 set_ia32_use_frame(fist);
4380 set_ia32_op_type(fist, ia32_AddrModeD);
4381 set_ia32_ls_mode(fist, mode_Ls);
4383 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4384 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4387 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4389 ir_node *block = be_transform_node(get_nodes_block(node));
4390 ir_graph *irg = get_Block_irg(block);
4391 ir_node *pred = get_Proj_pred(node);
4392 ir_node *new_pred = be_transform_node(pred);
4393 ir_node *frame = get_irg_frame(irg);
4394 dbg_info *dbgi = get_irn_dbg_info(node);
4395 long pn = get_Proj_proj(node);
4400 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4401 SET_IA32_ORIG_NODE(load, node);
4402 set_ia32_use_frame(load);
4403 set_ia32_op_type(load, ia32_AddrModeS);
4404 set_ia32_ls_mode(load, mode_Iu);
4405 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4406 * 32 bit from it with this particular load */
4407 attr = get_ia32_attr(load);
4408 attr->data.need_64bit_stackent = 1;
4410 if (pn == pn_ia32_l_FloattoLL_res_high) {
4411 add_ia32_am_offs_int(load, 4);
4413 assert(pn == pn_ia32_l_FloattoLL_res_low);
4416 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4422 * Transform the Projs of an AddSP.
4424 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4426 ir_node *pred = get_Proj_pred(node);
4427 ir_node *new_pred = be_transform_node(pred);
4428 dbg_info *dbgi = get_irn_dbg_info(node);
4429 long proj = get_Proj_proj(node);
4431 if (proj == pn_be_AddSP_sp) {
4432 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4433 pn_ia32_SubSP_stack);
4434 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4436 } else if (proj == pn_be_AddSP_res) {
4437 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4438 pn_ia32_SubSP_addr);
4439 } else if (proj == pn_be_AddSP_M) {
4440 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4443 panic("No idea how to transform proj->AddSP");
4447 * Transform the Projs of a SubSP.
4449 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4451 ir_node *pred = get_Proj_pred(node);
4452 ir_node *new_pred = be_transform_node(pred);
4453 dbg_info *dbgi = get_irn_dbg_info(node);
4454 long proj = get_Proj_proj(node);
4456 if (proj == pn_be_SubSP_sp) {
4457 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4458 pn_ia32_AddSP_stack);
4459 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4461 } else if (proj == pn_be_SubSP_M) {
4462 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4465 panic("No idea how to transform proj->SubSP");
4469 * Transform and renumber the Projs from a Load.
4471 static ir_node *gen_Proj_Load(ir_node *node)
4474 ir_node *pred = get_Proj_pred(node);
4475 dbg_info *dbgi = get_irn_dbg_info(node);
4476 long proj = get_Proj_proj(node);
4478 /* loads might be part of source address mode matches, so we don't
4479 * transform the ProjMs yet (with the exception of loads whose result is
4482 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4485 /* this is needed, because sometimes we have loops that are only
4486 reachable through the ProjM */
4487 be_enqueue_preds(node);
4488 /* do it in 2 steps, to silence firm verifier */
4489 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4490 set_Proj_proj(res, pn_ia32_mem);
4494 /* renumber the proj */
4495 new_pred = be_transform_node(pred);
4496 if (is_ia32_Load(new_pred)) {
4497 switch ((pn_Load)proj) {
4499 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4501 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4502 case pn_Load_X_except:
4503 /* This Load might raise an exception. Mark it. */
4504 set_ia32_exc_label(new_pred, 1);
4505 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4506 case pn_Load_X_regular:
4507 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4509 } else if (is_ia32_Conv_I2I(new_pred) ||
4510 is_ia32_Conv_I2I8Bit(new_pred)) {
4511 set_irn_mode(new_pred, mode_T);
4512 switch ((pn_Load)proj) {
4514 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4516 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4517 case pn_Load_X_except:
4518 /* This Load might raise an exception. Mark it. */
4519 set_ia32_exc_label(new_pred, 1);
4520 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4521 case pn_Load_X_regular:
4522 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4524 } else if (is_ia32_xLoad(new_pred)) {
4525 switch ((pn_Load)proj) {
4527 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4529 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4530 case pn_Load_X_except:
4531 /* This Load might raise an exception. Mark it. */
4532 set_ia32_exc_label(new_pred, 1);
4533 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4534 case pn_Load_X_regular:
4535 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4537 } else if (is_ia32_vfld(new_pred)) {
4538 switch ((pn_Load)proj) {
4540 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4542 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4543 case pn_Load_X_except:
4544 /* This Load might raise an exception. Mark it. */
4545 set_ia32_exc_label(new_pred, 1);
4546 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4547 case pn_Load_X_regular:
4548 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4551 /* can happen for ProJMs when source address mode happened for the
4554 /* however it should not be the result proj, as that would mean the
4555 load had multiple users and should not have been used for
4557 if (proj != pn_Load_M) {
4558 panic("internal error: transformed node not a Load");
4560 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4563 panic("No idea how to transform Proj(Load) %+F", node);
4566 static ir_node *gen_Proj_Store(ir_node *node)
4568 ir_node *pred = get_Proj_pred(node);
4569 ir_node *new_pred = be_transform_node(pred);
4570 dbg_info *dbgi = get_irn_dbg_info(node);
4571 long pn = get_Proj_proj(node);
4573 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4574 switch ((pn_Store)pn) {
4576 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4577 case pn_Store_X_except:
4578 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4579 case pn_Store_X_regular:
4580 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4582 } else if (is_ia32_vfist(new_pred)) {
4583 switch ((pn_Store)pn) {
4585 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4586 case pn_Store_X_except:
4587 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4588 case pn_Store_X_regular:
4589 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4591 } else if (is_ia32_vfisttp(new_pred)) {
4592 switch ((pn_Store)pn) {
4594 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4595 case pn_Store_X_except:
4596 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4597 case pn_Store_X_regular:
4598 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4600 } else if (is_ia32_vfst(new_pred)) {
4601 switch ((pn_Store)pn) {
4603 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4604 case pn_Store_X_except:
4605 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4606 case pn_Store_X_regular:
4607 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4609 } else if (is_ia32_xStore(new_pred)) {
4610 switch ((pn_Store)pn) {
4612 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4613 case pn_Store_X_except:
4614 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4615 case pn_Store_X_regular:
4616 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4618 } else if (is_Sync(new_pred)) {
4619 /* hack for the case that gen_float_const_Store produced a Sync */
4620 if (pn == pn_Store_M) {
4623 panic("exception control flow for gen_float_const_Store not implemented yet");
4624 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4625 /* destination address mode */
4626 if (pn == pn_Store_M) {
4629 panic("exception control flow for destination AM not implemented yet");
4632 panic("No idea how to transform Proj(Store) %+F", node);
4636 * Transform and renumber the Projs from a Div or Mod instruction.
4638 static ir_node *gen_Proj_Div(ir_node *node)
4640 ir_node *pred = get_Proj_pred(node);
4641 ir_node *new_pred = be_transform_node(pred);
4642 dbg_info *dbgi = get_irn_dbg_info(node);
4643 long proj = get_Proj_proj(node);
4645 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4646 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4648 switch ((pn_Div)proj) {
4650 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4651 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4652 } else if (is_ia32_xDiv(new_pred)) {
4653 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4654 } else if (is_ia32_vfdiv(new_pred)) {
4655 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4657 panic("Div transformed to unexpected thing %+F", new_pred);
4660 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4661 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4662 } else if (is_ia32_xDiv(new_pred)) {
4663 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4664 } else if (is_ia32_vfdiv(new_pred)) {
4665 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4667 panic("Div transformed to unexpected thing %+F", new_pred);
4669 case pn_Div_X_except:
4670 set_ia32_exc_label(new_pred, 1);
4671 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4672 case pn_Div_X_regular:
4673 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4676 panic("No idea how to transform proj->Div");
4680 * Transform and renumber the Projs from a Div or Mod instruction.
4682 static ir_node *gen_Proj_Mod(ir_node *node)
4684 ir_node *pred = get_Proj_pred(node);
4685 ir_node *new_pred = be_transform_node(pred);
4686 dbg_info *dbgi = get_irn_dbg_info(node);
4687 long proj = get_Proj_proj(node);
4689 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4690 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4691 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4693 switch ((pn_Mod)proj) {
4695 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4697 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4698 case pn_Mod_X_except:
4699 set_ia32_exc_label(new_pred, 1);
4700 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4701 case pn_Mod_X_regular:
4702 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4704 panic("No idea how to transform proj->Mod");
4708 * Transform and renumber the Projs from a CopyB.
4710 static ir_node *gen_Proj_CopyB(ir_node *node)
4712 ir_node *pred = get_Proj_pred(node);
4713 ir_node *new_pred = be_transform_node(pred);
4714 dbg_info *dbgi = get_irn_dbg_info(node);
4715 long proj = get_Proj_proj(node);
4717 switch ((pn_CopyB)proj) {
4719 if (is_ia32_CopyB_i(new_pred)) {
4720 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4721 } else if (is_ia32_CopyB(new_pred)) {
4722 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4725 case pn_CopyB_X_regular:
4726 if (is_ia32_CopyB_i(new_pred)) {
4727 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4728 } else if (is_ia32_CopyB(new_pred)) {
4729 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4732 case pn_CopyB_X_except:
4733 if (is_ia32_CopyB_i(new_pred)) {
4734 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4735 } else if (is_ia32_CopyB(new_pred)) {
4736 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4741 panic("No idea how to transform proj->CopyB");
4744 static ir_node *gen_be_Call(ir_node *node)
4746 dbg_info *const dbgi = get_irn_dbg_info(node);
4747 ir_node *const src_block = get_nodes_block(node);
4748 ir_node *const block = be_transform_node(src_block);
4749 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4750 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4751 ir_node *const sp = be_transform_node(src_sp);
4752 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4753 ia32_address_mode_t am;
4754 ia32_address_t *const addr = &am.addr;
4759 ir_node * eax = noreg_GP;
4760 ir_node * ecx = noreg_GP;
4761 ir_node * edx = noreg_GP;
4762 unsigned const pop = be_Call_get_pop(node);
4763 ir_type *const call_tp = be_Call_get_type(node);
4764 int old_no_pic_adjust;
4765 int throws_exception = ir_throws_exception(node);
4767 /* Run the x87 simulator if the call returns a float value */
4768 if (get_method_n_ress(call_tp) > 0) {
4769 ir_type *const res_type = get_method_res_type(call_tp, 0);
4770 ir_mode *const res_mode = get_type_mode(res_type);
4772 if (res_mode != NULL && mode_is_float(res_mode)) {
4773 ir_graph *irg = current_ir_graph;
4774 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4775 irg_data->do_x87_sim = 1;
4779 /* We do not want be_Call direct calls */
4780 assert(be_Call_get_entity(node) == NULL);
4782 /* special case for PIC trampoline calls */
4783 old_no_pic_adjust = ia32_no_pic_adjust;
4784 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4786 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4787 match_am | match_immediate);
4789 ia32_no_pic_adjust = old_no_pic_adjust;
4791 i = get_irn_arity(node) - 1;
4792 fpcw = be_transform_node(get_irn_n(node, i--));
4793 for (; i >= n_be_Call_first_arg; --i) {
4794 arch_register_req_t const *const req
4795 = arch_get_irn_register_req_in(node, i);
4796 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4798 assert(req->type == arch_register_req_type_limited);
4799 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4801 switch (*req->limited) {
4802 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4803 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4804 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4805 default: panic("Invalid GP register for register parameter");
4809 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4810 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4811 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4812 ir_set_throws_exception(call, throws_exception);
4813 set_am_attributes(call, &am);
4814 call = fix_mem_proj(call, &am);
4816 if (get_irn_pinned(node) == op_pin_state_pinned)
4817 set_irn_pinned(call, op_pin_state_pinned);
4819 SET_IA32_ORIG_NODE(call, node);
4821 if (ia32_cg_config.use_sse2) {
4822 /* remember this call for post-processing */
4823 ARR_APP1(ir_node *, call_list, call);
4824 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4831 * Transform Builtin trap
4833 static ir_node *gen_trap(ir_node *node)
4835 dbg_info *dbgi = get_irn_dbg_info(node);
4836 ir_node *block = be_transform_node(get_nodes_block(node));
4837 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4839 return new_bd_ia32_UD2(dbgi, block, mem);
4843 * Transform Builtin debugbreak
4845 static ir_node *gen_debugbreak(ir_node *node)
4847 dbg_info *dbgi = get_irn_dbg_info(node);
4848 ir_node *block = be_transform_node(get_nodes_block(node));
4849 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4851 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4855 * Transform Builtin return_address
4857 static ir_node *gen_return_address(ir_node *node)
4859 ir_node *param = get_Builtin_param(node, 0);
4860 ir_node *frame = get_Builtin_param(node, 1);
4861 dbg_info *dbgi = get_irn_dbg_info(node);
4862 ir_tarval *tv = get_Const_tarval(param);
4863 ir_graph *irg = get_irn_irg(node);
4864 unsigned long value = get_tarval_long(tv);
4866 ir_node *block = be_transform_node(get_nodes_block(node));
4867 ir_node *ptr = be_transform_node(frame);
4871 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4872 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4873 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4876 /* load the return address from this frame */
4877 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4879 set_irn_pinned(load, get_irn_pinned(node));
4880 set_ia32_op_type(load, ia32_AddrModeS);
4881 set_ia32_ls_mode(load, mode_Iu);
4883 set_ia32_am_offs_int(load, 0);
4884 set_ia32_use_frame(load);
4885 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4887 if (get_irn_pinned(node) == op_pin_state_floats) {
4888 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4889 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4890 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4891 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4894 SET_IA32_ORIG_NODE(load, node);
4895 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4899 * Transform Builtin frame_address
4901 static ir_node *gen_frame_address(ir_node *node)
4903 ir_node *param = get_Builtin_param(node, 0);
4904 ir_node *frame = get_Builtin_param(node, 1);
4905 dbg_info *dbgi = get_irn_dbg_info(node);
4906 ir_tarval *tv = get_Const_tarval(param);
4907 ir_graph *irg = get_irn_irg(node);
4908 unsigned long value = get_tarval_long(tv);
4910 ir_node *block = be_transform_node(get_nodes_block(node));
4911 ir_node *ptr = be_transform_node(frame);
4916 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4917 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4918 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4921 /* load the frame address from this frame */
4922 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4924 set_irn_pinned(load, get_irn_pinned(node));
4925 set_ia32_op_type(load, ia32_AddrModeS);
4926 set_ia32_ls_mode(load, mode_Iu);
4928 ent = ia32_get_frame_address_entity(irg);
4930 set_ia32_am_offs_int(load, 0);
4931 set_ia32_use_frame(load);
4932 set_ia32_frame_ent(load, ent);
4934 /* will fail anyway, but gcc does this: */
4935 set_ia32_am_offs_int(load, 0);
4938 if (get_irn_pinned(node) == op_pin_state_floats) {
4939 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4940 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4941 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4942 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4945 SET_IA32_ORIG_NODE(load, node);
4946 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4950 * Transform Builtin frame_address
4952 static ir_node *gen_prefetch(ir_node *node)
4955 ir_node *ptr, *block, *mem, *base, *idx;
4956 ir_node *param, *new_node;
4959 ia32_address_t addr;
4961 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4962 /* no prefetch at all, route memory */
4963 return be_transform_node(get_Builtin_mem(node));
4966 param = get_Builtin_param(node, 1);
4967 tv = get_Const_tarval(param);
4968 rw = get_tarval_long(tv);
4970 /* construct load address */
4971 memset(&addr, 0, sizeof(addr));
4972 ptr = get_Builtin_param(node, 0);
4973 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4980 base = be_transform_node(base);
4986 idx = be_transform_node(idx);
4989 dbgi = get_irn_dbg_info(node);
4990 block = be_transform_node(get_nodes_block(node));
4991 mem = be_transform_node(get_Builtin_mem(node));
4993 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4994 /* we have 3DNow!, this was already checked above */
4995 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4996 } else if (ia32_cg_config.use_sse_prefetch) {
4997 /* note: rw == 1 is IGNORED in that case */
4998 param = get_Builtin_param(node, 2);
4999 tv = get_Const_tarval(param);
5000 locality = get_tarval_long(tv);
5002 /* SSE style prefetch */
5005 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
5008 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
5011 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
5014 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
5018 assert(ia32_cg_config.use_3dnow_prefetch);
5019 /* 3DNow! style prefetch */
5020 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
5023 set_irn_pinned(new_node, get_irn_pinned(node));
5024 set_ia32_op_type(new_node, ia32_AddrModeS);
5025 set_ia32_ls_mode(new_node, mode_Bu);
5026 set_address(new_node, &addr);
5028 SET_IA32_ORIG_NODE(new_node, node);
5030 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5034 * Transform bsf like node
5036 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5038 ir_node *param = get_Builtin_param(node, 0);
5039 dbg_info *dbgi = get_irn_dbg_info(node);
5041 ir_node *block = get_nodes_block(node);
5042 ir_node *new_block = be_transform_node(block);
5044 ia32_address_mode_t am;
5045 ia32_address_t *addr = &am.addr;
5048 match_arguments(&am, block, NULL, param, NULL, match_am);
5050 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5051 set_am_attributes(cnt, &am);
5052 set_ia32_ls_mode(cnt, get_irn_mode(param));
5054 SET_IA32_ORIG_NODE(cnt, node);
5055 return fix_mem_proj(cnt, &am);
5059 * Transform builtin ffs.
5061 static ir_node *gen_ffs(ir_node *node)
5063 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5064 ir_node *real = skip_Proj(bsf);
5065 dbg_info *dbgi = get_irn_dbg_info(real);
5066 ir_node *block = get_nodes_block(real);
5067 ir_node *flag, *set, *conv, *neg, *orn, *add;
5070 if (get_irn_mode(real) != mode_T) {
5071 set_irn_mode(real, mode_T);
5072 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5075 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5078 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5079 SET_IA32_ORIG_NODE(set, node);
5082 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5083 SET_IA32_ORIG_NODE(conv, node);
5086 neg = new_bd_ia32_Neg(dbgi, block, conv);
5089 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5090 set_ia32_ls_mode(orn, mode_Iu);
5091 set_ia32_commutative(orn);
5094 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5095 add_ia32_am_offs_int(add, 1);
5100 * Transform builtin clz.
5102 static ir_node *gen_clz(ir_node *node)
5104 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5105 ir_node *real = skip_Proj(bsr);
5106 dbg_info *dbgi = get_irn_dbg_info(real);
5107 ir_node *block = get_nodes_block(real);
5108 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5110 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5114 * Transform builtin ctz.
5116 static ir_node *gen_ctz(ir_node *node)
5118 return gen_unop_AM(node, new_bd_ia32_Bsf);
5122 * Transform builtin parity.
5124 static ir_node *gen_parity(ir_node *node)
5126 dbg_info *dbgi = get_irn_dbg_info(node);
5127 ir_node *block = get_nodes_block(node);
5128 ir_node *new_block = be_transform_node(block);
5129 ir_node *param = get_Builtin_param(node, 0);
5130 ir_node *new_param = be_transform_node(param);
5133 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5134 * so we have to do complicated xoring first.
5135 * (we should also better lower this before the backend so we still have a
5136 * chance for CSE, constant folding and other goodies for some of these
5139 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5140 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5141 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5143 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5146 set_ia32_ls_mode(xor, mode_Iu);
5147 set_ia32_commutative(xor);
5149 set_irn_mode(xor2, mode_T);
5150 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5153 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5154 SET_IA32_ORIG_NODE(new_node, node);
5157 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5158 nomem, new_node, mode_Bu);
5159 SET_IA32_ORIG_NODE(new_node, node);
5164 * Transform builtin popcount
5166 static ir_node *gen_popcount(ir_node *node)
5168 ir_node *param = get_Builtin_param(node, 0);
5169 dbg_info *dbgi = get_irn_dbg_info(node);
5171 ir_node *block = get_nodes_block(node);
5172 ir_node *new_block = be_transform_node(block);
5175 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5177 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5178 if (ia32_cg_config.use_popcnt) {
5179 ia32_address_mode_t am;
5180 ia32_address_t *addr = &am.addr;
5183 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5185 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5186 set_am_attributes(cnt, &am);
5187 set_ia32_ls_mode(cnt, get_irn_mode(param));
5189 SET_IA32_ORIG_NODE(cnt, node);
5190 return fix_mem_proj(cnt, &am);
5193 new_param = be_transform_node(param);
5195 /* do the standard popcount algo */
5196 /* TODO: This is stupid, we should transform this before the backend,
5197 * to get CSE, localopts, etc. for the operations
5198 * TODO: This is also not the optimal algorithm (it is just the starting
5199 * example in hackers delight, they optimize it more on the following page)
5200 * But I'm too lazy to fix this now, as the code should get lowered before
5201 * the backend anyway.
5204 /* m1 = x & 0x55555555 */
5205 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5206 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5209 simm = ia32_create_Immediate(NULL, 0, 1);
5210 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5212 /* m2 = s1 & 0x55555555 */
5213 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5216 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5218 /* m4 = m3 & 0x33333333 */
5219 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5220 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5223 simm = ia32_create_Immediate(NULL, 0, 2);
5224 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5226 /* m5 = s2 & 0x33333333 */
5227 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5230 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5232 /* m7 = m6 & 0x0F0F0F0F */
5233 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5234 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5237 simm = ia32_create_Immediate(NULL, 0, 4);
5238 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5240 /* m8 = s3 & 0x0F0F0F0F */
5241 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5244 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5246 /* m10 = m9 & 0x00FF00FF */
5247 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5248 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5251 simm = ia32_create_Immediate(NULL, 0, 8);
5252 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5254 /* m11 = s4 & 0x00FF00FF */
5255 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5257 /* m12 = m10 + m11 */
5258 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5260 /* m13 = m12 & 0x0000FFFF */
5261 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5262 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5264 /* s5 = m12 >> 16 */
5265 simm = ia32_create_Immediate(NULL, 0, 16);
5266 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5268 /* res = m13 + s5 */
5269 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5273 * Transform builtin byte swap.
5275 static ir_node *gen_bswap(ir_node *node)
5277 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5278 dbg_info *dbgi = get_irn_dbg_info(node);
5280 ir_node *block = get_nodes_block(node);
5281 ir_node *new_block = be_transform_node(block);
5282 ir_mode *mode = get_irn_mode(param);
5283 unsigned size = get_mode_size_bits(mode);
5287 if (ia32_cg_config.use_bswap) {
5288 /* swap available */
5289 return new_bd_ia32_Bswap(dbgi, new_block, param);
5291 ir_node *i8 = ia32_create_Immediate(NULL, 0, 8);
5292 ir_node *rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5293 ir_node *i16 = ia32_create_Immediate(NULL, 0, 16);
5294 ir_node *rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5295 ir_node *rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5296 set_ia32_ls_mode(rol1, mode_Hu);
5297 set_ia32_ls_mode(rol2, mode_Iu);
5298 set_ia32_ls_mode(rol3, mode_Hu);
5303 /* swap16 always available */
5304 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5307 panic("Invalid bswap size (%d)", size);
5312 * Transform builtin outport.
5314 static ir_node *gen_outport(ir_node *node)
5316 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5317 ir_node *oldv = get_Builtin_param(node, 1);
5318 ir_mode *mode = get_irn_mode(oldv);
5319 ir_node *value = be_transform_node(oldv);
5320 ir_node *block = be_transform_node(get_nodes_block(node));
5321 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5322 dbg_info *dbgi = get_irn_dbg_info(node);
5324 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5325 set_ia32_ls_mode(res, mode);
5330 * Transform builtin inport.
5332 static ir_node *gen_inport(ir_node *node)
5334 ir_type *tp = get_Builtin_type(node);
5335 ir_type *rstp = get_method_res_type(tp, 0);
5336 ir_mode *mode = get_type_mode(rstp);
5337 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5338 ir_node *block = be_transform_node(get_nodes_block(node));
5339 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5340 dbg_info *dbgi = get_irn_dbg_info(node);
5342 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5343 set_ia32_ls_mode(res, mode);
5345 /* check for missing Result Proj */
5350 * Transform a builtin inner trampoline
5352 static ir_node *gen_inner_trampoline(ir_node *node)
5354 ir_node *ptr = get_Builtin_param(node, 0);
5355 ir_node *callee = get_Builtin_param(node, 1);
5356 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5357 ir_node *mem = get_Builtin_mem(node);
5358 ir_node *block = get_nodes_block(node);
5359 ir_node *new_block = be_transform_node(block);
5363 ir_node *trampoline;
5365 dbg_info *dbgi = get_irn_dbg_info(node);
5366 ia32_address_t addr;
5368 /* construct store address */
5369 memset(&addr, 0, sizeof(addr));
5370 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5372 if (addr.base == NULL) {
5373 addr.base = noreg_GP;
5375 addr.base = be_transform_node(addr.base);
5378 if (addr.index == NULL) {
5379 addr.index = noreg_GP;
5381 addr.index = be_transform_node(addr.index);
5383 addr.mem = be_transform_node(mem);
5385 /* mov ecx, <env> */
5386 val = ia32_create_Immediate(NULL, 0, 0xB9);
5387 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5388 addr.index, addr.mem, val);
5389 set_irn_pinned(store, get_irn_pinned(node));
5390 set_ia32_op_type(store, ia32_AddrModeD);
5391 set_ia32_ls_mode(store, mode_Bu);
5392 set_address(store, &addr);
5396 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5397 addr.index, addr.mem, env);
5398 set_irn_pinned(store, get_irn_pinned(node));
5399 set_ia32_op_type(store, ia32_AddrModeD);
5400 set_ia32_ls_mode(store, mode_Iu);
5401 set_address(store, &addr);
5405 /* jmp rel <callee> */
5406 val = ia32_create_Immediate(NULL, 0, 0xE9);
5407 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5408 addr.index, addr.mem, val);
5409 set_irn_pinned(store, get_irn_pinned(node));
5410 set_ia32_op_type(store, ia32_AddrModeD);
5411 set_ia32_ls_mode(store, mode_Bu);
5412 set_address(store, &addr);
5416 trampoline = be_transform_node(ptr);
5418 /* the callee is typically an immediate */
5419 if (is_SymConst(callee)) {
5420 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5422 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5423 add_ia32_am_offs_int(rel, -10);
5425 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5427 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5428 addr.index, addr.mem, rel);
5429 set_irn_pinned(store, get_irn_pinned(node));
5430 set_ia32_op_type(store, ia32_AddrModeD);
5431 set_ia32_ls_mode(store, mode_Iu);
5432 set_address(store, &addr);
5437 return new_r_Tuple(new_block, 2, in);
5441 * Transform Builtin node.
5443 static ir_node *gen_Builtin(ir_node *node)
5445 ir_builtin_kind kind = get_Builtin_kind(node);
5449 return gen_trap(node);
5450 case ir_bk_debugbreak:
5451 return gen_debugbreak(node);
5452 case ir_bk_return_address:
5453 return gen_return_address(node);
5454 case ir_bk_frame_address:
5455 return gen_frame_address(node);
5456 case ir_bk_prefetch:
5457 return gen_prefetch(node);
5459 return gen_ffs(node);
5461 return gen_clz(node);
5463 return gen_ctz(node);
5465 return gen_parity(node);
5466 case ir_bk_popcount:
5467 return gen_popcount(node);
5469 return gen_bswap(node);
5471 return gen_outport(node);
5473 return gen_inport(node);
5474 case ir_bk_inner_trampoline:
5475 return gen_inner_trampoline(node);
5477 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5481 * Transform Proj(Builtin) node.
5483 static ir_node *gen_Proj_Builtin(ir_node *proj)
5485 ir_node *node = get_Proj_pred(proj);
5486 ir_node *new_node = be_transform_node(node);
5487 ir_builtin_kind kind = get_Builtin_kind(node);
5490 case ir_bk_return_address:
5491 case ir_bk_frame_address:
5496 case ir_bk_popcount:
5498 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5501 case ir_bk_debugbreak:
5502 case ir_bk_prefetch:
5504 assert(get_Proj_proj(proj) == pn_Builtin_M);
5507 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5508 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5510 assert(get_Proj_proj(proj) == pn_Builtin_M);
5511 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5513 case ir_bk_inner_trampoline:
5514 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5515 return get_Tuple_pred(new_node, 1);
5517 assert(get_Proj_proj(proj) == pn_Builtin_M);
5518 return get_Tuple_pred(new_node, 0);
5521 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5524 static ir_node *gen_be_IncSP(ir_node *node)
5526 ir_node *res = be_duplicate_node(node);
5527 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5533 * Transform the Projs from a be_Call.
5535 static ir_node *gen_Proj_be_Call(ir_node *node)
5537 ir_node *call = get_Proj_pred(node);
5538 ir_node *new_call = be_transform_node(call);
5539 dbg_info *dbgi = get_irn_dbg_info(node);
5540 long proj = get_Proj_proj(node);
5541 ir_mode *mode = get_irn_mode(node);
5544 if (proj == pn_be_Call_M) {
5545 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5547 /* transform call modes */
5548 if (mode_is_data(mode)) {
5549 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5553 /* Map from be_Call to ia32_Call proj number */
5554 if (proj == pn_be_Call_sp) {
5555 proj = pn_ia32_Call_stack;
5556 } else if (proj == pn_be_Call_M) {
5557 proj = pn_ia32_Call_M;
5558 } else if (proj == pn_be_Call_X_except) {
5559 proj = pn_ia32_Call_X_except;
5560 } else if (proj == pn_be_Call_X_regular) {
5561 proj = pn_ia32_Call_X_regular;
5563 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5564 int const n_outs = arch_get_irn_n_outs(new_call);
5567 assert(proj >= pn_be_Call_first_res);
5568 assert(req->type & arch_register_req_type_limited);
5570 for (i = 0; i < n_outs; ++i) {
5571 arch_register_req_t const *const new_req
5572 = arch_get_irn_register_req_out(new_call, i);
5574 if (!(new_req->type & arch_register_req_type_limited) ||
5575 new_req->cls != req->cls ||
5576 *new_req->limited != *req->limited)
5585 res = new_rd_Proj(dbgi, new_call, mode, proj);
5587 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5589 case pn_ia32_Call_stack:
5590 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5593 case pn_ia32_Call_fpcw:
5594 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5602 * Transform the Projs from a Cmp.
5604 static ir_node *gen_Proj_Cmp(ir_node *node)
5606 /* this probably means not all mode_b nodes were lowered... */
5607 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5611 static ir_node *gen_Proj_ASM(ir_node *node)
5613 ir_mode *mode = get_irn_mode(node);
5614 ir_node *pred = get_Proj_pred(node);
5615 ir_node *new_pred = be_transform_node(pred);
5616 long pos = get_Proj_proj(node);
5618 if (mode == mode_M) {
5619 pos = arch_get_irn_n_outs(new_pred)-1;
5620 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5622 } else if (mode_is_float(mode)) {
5625 panic("unexpected proj mode at ASM");
5628 return new_r_Proj(new_pred, mode, pos);
5632 * Transform and potentially renumber Proj nodes.
5634 static ir_node *gen_Proj(ir_node *node)
5636 ir_node *pred = get_Proj_pred(node);
5639 switch (get_irn_opcode(pred)) {
5641 return gen_Proj_Load(node);
5643 return gen_Proj_Store(node);
5645 return gen_Proj_ASM(node);
5647 return gen_Proj_Builtin(node);
5649 return gen_Proj_Div(node);
5651 return gen_Proj_Mod(node);
5653 return gen_Proj_CopyB(node);
5655 return gen_Proj_be_SubSP(node);
5657 return gen_Proj_be_AddSP(node);
5659 return gen_Proj_be_Call(node);
5661 return gen_Proj_Cmp(node);
5663 proj = get_Proj_proj(node);
5665 case pn_Start_X_initial_exec: {
5666 ir_node *block = get_nodes_block(pred);
5667 ir_node *new_block = be_transform_node(block);
5668 dbg_info *dbgi = get_irn_dbg_info(node);
5669 /* we exchange the ProjX with a jump */
5670 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5678 if (is_ia32_l_FloattoLL(pred)) {
5679 return gen_Proj_l_FloattoLL(node);
5681 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5685 ir_mode *mode = get_irn_mode(node);
5686 if (ia32_mode_needs_gp_reg(mode)) {
5687 ir_node *new_pred = be_transform_node(pred);
5688 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5689 get_Proj_proj(node));
5690 new_proj->node_nr = node->node_nr;
5695 return be_duplicate_node(node);
5699 * Enters all transform functions into the generic pointer
5701 static void register_transformers(void)
5703 /* first clear the generic function pointer for all ops */
5704 be_start_transform_setup();
5706 be_set_transform_function(op_Add, gen_Add);
5707 be_set_transform_function(op_And, gen_And);
5708 be_set_transform_function(op_ASM, ia32_gen_ASM);
5709 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5710 be_set_transform_function(op_be_Call, gen_be_Call);
5711 be_set_transform_function(op_be_Copy, gen_be_Copy);
5712 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5713 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5714 be_set_transform_function(op_be_Return, gen_be_Return);
5715 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5716 be_set_transform_function(op_Builtin, gen_Builtin);
5717 be_set_transform_function(op_Cmp, gen_Cmp);
5718 be_set_transform_function(op_Cond, gen_Cond);
5719 be_set_transform_function(op_Const, gen_Const);
5720 be_set_transform_function(op_Conv, gen_Conv);
5721 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5722 be_set_transform_function(op_Div, gen_Div);
5723 be_set_transform_function(op_Eor, gen_Eor);
5724 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5725 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5726 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5727 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5728 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5729 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5730 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5731 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5732 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5733 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5734 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5735 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5736 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5737 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5738 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5739 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5740 be_set_transform_function(op_IJmp, gen_IJmp);
5741 be_set_transform_function(op_Jmp, gen_Jmp);
5742 be_set_transform_function(op_Load, gen_Load);
5743 be_set_transform_function(op_Minus, gen_Minus);
5744 be_set_transform_function(op_Mod, gen_Mod);
5745 be_set_transform_function(op_Mul, gen_Mul);
5746 be_set_transform_function(op_Mulh, gen_Mulh);
5747 be_set_transform_function(op_Mux, gen_Mux);
5748 be_set_transform_function(op_Not, gen_Not);
5749 be_set_transform_function(op_Or, gen_Or);
5750 be_set_transform_function(op_Phi, gen_Phi);
5751 be_set_transform_function(op_Proj, gen_Proj);
5752 be_set_transform_function(op_Rotl, gen_Rotl);
5753 be_set_transform_function(op_Shl, gen_Shl);
5754 be_set_transform_function(op_Shr, gen_Shr);
5755 be_set_transform_function(op_Shrs, gen_Shrs);
5756 be_set_transform_function(op_Store, gen_Store);
5757 be_set_transform_function(op_Sub, gen_Sub);
5758 be_set_transform_function(op_Switch, gen_Switch);
5759 be_set_transform_function(op_SymConst, gen_SymConst);
5760 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5764 * Pre-transform all unknown and noreg nodes.
5766 static void ia32_pretransform_node(void)
5768 ir_graph *irg = current_ir_graph;
5769 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5771 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5772 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5773 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5774 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5775 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5777 nomem = get_irg_no_mem(irg);
5778 noreg_GP = ia32_new_NoReg_gp(irg);
5782 * Post-process all calls if we are in SSE mode.
5783 * The ABI requires that the results are in st0, copy them
5784 * to a xmm register.
5786 static void postprocess_fp_call_results(void)
5790 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5791 ir_node *call = call_list[i];
5792 ir_type *mtp = call_types[i];
5795 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5796 ir_type *res_tp = get_method_res_type(mtp, j);
5797 ir_node *res, *new_res;
5798 const ir_edge_t *edge, *next;
5801 if (! is_atomic_type(res_tp)) {
5802 /* no floating point return */
5805 res_mode = get_type_mode(res_tp);
5806 if (! mode_is_float(res_mode)) {
5807 /* no floating point return */
5811 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5814 /* now patch the users */
5815 foreach_out_edge_safe(res, edge, next) {
5816 ir_node *succ = get_edge_src_irn(edge);
5819 if (be_is_Keep(succ))
5822 if (is_ia32_xStore(succ)) {
5823 /* an xStore can be patched into an vfst */
5824 dbg_info *db = get_irn_dbg_info(succ);
5825 ir_node *block = get_nodes_block(succ);
5826 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5827 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5828 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5829 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5830 ir_mode *mode = get_ia32_ls_mode(succ);
5832 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5833 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5834 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5835 if (is_ia32_use_frame(succ))
5836 set_ia32_use_frame(st);
5837 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5838 set_irn_pinned(st, get_irn_pinned(succ));
5839 set_ia32_op_type(st, ia32_AddrModeD);
5841 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5842 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5843 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5850 if (new_res == NULL) {
5851 dbg_info *db = get_irn_dbg_info(call);
5852 ir_node *block = get_nodes_block(call);
5853 ir_node *frame = get_irg_frame(current_ir_graph);
5854 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5855 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5856 ir_node *vfst, *xld, *new_mem;
5859 /* store st(0) on stack */
5860 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5862 set_ia32_op_type(vfst, ia32_AddrModeD);
5863 set_ia32_use_frame(vfst);
5865 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5867 /* load into SSE register */
5868 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5870 set_ia32_op_type(xld, ia32_AddrModeS);
5871 set_ia32_use_frame(xld);
5873 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5874 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5876 if (old_mem != NULL) {
5877 edges_reroute(old_mem, new_mem);
5881 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5887 /* do the transformation */
5888 void ia32_transform_graph(ir_graph *irg)
5892 register_transformers();
5893 initial_fpcw = NULL;
5894 ia32_no_pic_adjust = 0;
5896 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5898 be_timer_push(T_HEIGHTS);
5899 ia32_heights = heights_new(irg);
5900 be_timer_pop(T_HEIGHTS);
5901 ia32_calculate_non_address_mode_nodes(irg);
5903 /* the transform phase is not safe for CSE (yet) because several nodes get
5904 * attributes set after their creation */
5905 cse_last = get_opt_cse();
5908 call_list = NEW_ARR_F(ir_node *, 0);
5909 call_types = NEW_ARR_F(ir_type *, 0);
5910 be_transform_graph(irg, ia32_pretransform_node);
5912 if (ia32_cg_config.use_sse2)
5913 postprocess_fp_call_results();
5914 DEL_ARR_F(call_types);
5915 DEL_ARR_F(call_list);
5917 set_opt_cse(cse_last);
5919 ia32_free_non_address_mode_nodes();
5920 heights_free(ia32_heights);
5921 ia32_heights = NULL;
5924 void ia32_init_transform(void)
5926 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");