2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
55 #include "betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_address_mode.h"
66 #include "ia32_architecture.h"
68 #include "gen_ia32_regalloc_if.h"
70 /* define this to construct SSE constants instead of load them */
71 #undef CONSTRUCT_SSE_CONST
73 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
74 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
76 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
78 static ir_node *old_initial_fpcw = NULL;
79 static ir_node *initial_fpcw = NULL;
80 int ia32_no_pic_adjust;
82 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
83 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
86 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
87 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
90 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
91 ir_node *op1, ir_node *op2);
93 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
94 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
96 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem);
99 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
100 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
103 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
105 static ir_node *create_immediate_or_transform(ir_node *node,
106 char immediate_constraint_type);
108 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
109 dbg_info *dbgi, ir_node *block,
110 ir_node *op, ir_node *orig_node);
112 /* its enough to have those once */
113 static ir_node *nomem, *noreg_GP;
115 /** a list to postprocess all calls */
116 static ir_node **call_list;
117 static ir_type **call_types;
119 /** Return non-zero is a node represents the 0 constant. */
120 static bool is_Const_0(ir_node *node)
122 return is_Const(node) && is_Const_null(node);
125 /** Return non-zero is a node represents the 1 constant. */
126 static bool is_Const_1(ir_node *node)
128 return is_Const(node) && is_Const_one(node);
131 /** Return non-zero is a node represents the -1 constant. */
132 static bool is_Const_Minus_1(ir_node *node)
134 return is_Const(node) && is_Const_all_one(node);
138 * returns true if constant can be created with a simple float command
140 static bool is_simple_x87_Const(ir_node *node)
142 ir_tarval *tv = get_Const_tarval(node);
143 if (tarval_is_null(tv) || tarval_is_one(tv))
146 /* TODO: match all the other float constants */
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_sse_Const(ir_node *node)
155 ir_tarval *tv = get_Const_tarval(node);
156 ir_mode *mode = get_tarval_mode(tv);
161 if (tarval_is_null(tv)
162 #ifdef CONSTRUCT_SSE_CONST
167 #ifdef CONSTRUCT_SSE_CONST
168 if (mode == mode_D) {
169 unsigned val = get_tarval_sub_bits(tv, 0) |
170 (get_tarval_sub_bits(tv, 1) << 8) |
171 (get_tarval_sub_bits(tv, 2) << 16) |
172 (get_tarval_sub_bits(tv, 3) << 24);
174 /* lower 32bit are zero, really a 32bit constant */
177 #endif /* CONSTRUCT_SSE_CONST */
178 /* TODO: match all the other float constants */
183 * return NoREG or pic_base in case of PIC.
184 * This is necessary as base address for newly created symbols
186 static ir_node *get_symconst_base(void)
188 ir_graph *irg = current_ir_graph;
190 if (be_get_irg_options(irg)->pic) {
191 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
192 return arch_env->impl->get_pic_base(irg);
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
207 ir_tarval *tv = get_Const_tarval(node);
209 assert(is_Const(node));
211 if (mode_is_float(mode)) {
212 ir_graph *irg = get_irn_irg(node);
213 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
214 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
219 if (ia32_cg_config.use_sse2) {
220 if (tarval_is_null(tv)) {
221 load = new_bd_ia32_xZero(dbgi, block);
222 set_ia32_ls_mode(load, mode);
224 #ifdef CONSTRUCT_SSE_CONST
225 } else if (tarval_is_one(tv)) {
226 int cnst = mode == mode_F ? 26 : 55;
227 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
228 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
229 ir_node *pslld, *psrld;
231 load = new_bd_ia32_xAllOnes(dbgi, block);
232 set_ia32_ls_mode(load, mode);
233 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
234 set_ia32_ls_mode(pslld, mode);
235 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
236 set_ia32_ls_mode(psrld, mode);
238 #endif /* CONSTRUCT_SSE_CONST */
239 } else if (mode == mode_F) {
240 /* we can place any 32bit constant by using a movd gp, sse */
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
245 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
246 load = new_bd_ia32_xMovd(dbgi, block, cnst);
247 set_ia32_ls_mode(load, mode);
251 #ifdef CONSTRUCT_SSE_CONST
252 if (mode == mode_D) {
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
259 ir_node *cnst, *psllq;
261 /* fine, lower 32bit are zero, produce 32bit value */
262 val = get_tarval_sub_bits(tv, 4) |
263 (get_tarval_sub_bits(tv, 5) << 8) |
264 (get_tarval_sub_bits(tv, 6) << 16) |
265 (get_tarval_sub_bits(tv, 7) << 24);
266 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
267 load = new_bd_ia32_xMovd(dbgi, block, cnst);
268 set_ia32_ls_mode(load, mode);
269 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
270 set_ia32_ls_mode(psllq, mode);
275 #endif /* CONSTRUCT_SSE_CONST */
276 floatent = ia32_create_float_const_entity(isa, tv, NULL);
278 base = get_symconst_base();
279 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
281 set_ia32_op_type(load, ia32_AddrModeS);
282 set_ia32_am_sc(load, floatent);
283 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
284 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
287 if (tarval_is_null(tv)) {
288 load = new_bd_ia32_vfldz(dbgi, block);
290 set_ia32_ls_mode(load, mode);
291 } else if (tarval_is_one(tv)) {
292 load = new_bd_ia32_vfld1(dbgi, block);
294 set_ia32_ls_mode(load, mode);
299 floatent = ia32_create_float_const_entity(isa, tv, NULL);
300 /* create_float_const_ent is smart and sometimes creates
302 ls_mode = get_type_mode(get_entity_type(floatent));
303 base = get_symconst_base();
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 } else { /* non-float mode */
321 tv = tarval_convert_to(tv, mode_Iu);
323 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
325 panic("couldn't convert constant tarval (%+F)", node);
327 val = get_tarval_long(tv);
329 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
330 SET_IA32_ORIG_NODE(cnst, node);
337 * Transforms a SymConst.
339 static ir_node *gen_SymConst(ir_node *node)
341 ir_node *old_block = get_nodes_block(node);
342 ir_node *block = be_transform_node(old_block);
343 dbg_info *dbgi = get_irn_dbg_info(node);
344 ir_mode *mode = get_irn_mode(node);
347 if (mode_is_float(mode)) {
348 if (ia32_cg_config.use_sse2)
349 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
351 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
352 set_ia32_am_sc(cnst, get_SymConst_entity(node));
353 set_ia32_use_frame(cnst);
357 if (get_SymConst_kind(node) != symconst_addr_ent) {
358 panic("backend only support symconst_addr_ent (at %+F)", node);
360 entity = get_SymConst_entity(node);
361 if (get_entity_owner(entity) == get_tls_type()) {
362 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
363 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
364 set_ia32_am_sc(lea, entity);
367 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
371 SET_IA32_ORIG_NODE(cnst, node);
376 static ir_type *make_array_type(ir_type *tp)
378 unsigned alignment = get_type_alignment_bytes(tp);
379 unsigned size = get_type_size_bytes(tp);
380 ir_type *res = new_type_array(1, tp);
381 set_type_alignment_bytes(res, alignment);
382 set_array_bounds_int(res, 0, 0, 2);
383 if (alignment > size)
385 set_type_size_bytes(res, 2 * size);
386 set_type_state(res, layout_fixed);
391 * Create a float[2] array type for the given atomic type.
393 * @param tp the atomic type
395 static ir_type *ia32_create_float_array(ir_type *tp)
397 ir_mode *mode = get_type_mode(tp);
400 if (mode == mode_F) {
401 static ir_type *float_F;
405 arr = float_F = make_array_type(tp);
406 } else if (mode == mode_D) {
407 static ir_type *float_D;
411 arr = float_D = make_array_type(tp);
413 static ir_type *float_E;
417 arr = float_E = make_array_type(tp);
422 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
423 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
425 static const struct {
427 const char *cnst_str;
429 } names [ia32_known_const_max] = {
430 { "C_sfp_sign", "0x80000000", 0 },
431 { "C_dfp_sign", "0x8000000000000000", 1 },
432 { "C_sfp_abs", "0x7FFFFFFF", 0 },
433 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
434 { "C_ull_bias", "0x10000000000000000", 2 }
436 static ir_entity *ent_cache[ia32_known_const_max];
438 ir_entity *ent = ent_cache[kct];
441 ir_graph *irg = current_ir_graph;
442 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
443 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
444 const char *cnst_str = names[kct].cnst_str;
445 ident *name = new_id_from_str(names[kct].name);
448 switch (names[kct].mode) {
449 case 0: mode = mode_Iu; break;
450 case 1: mode = mode_Lu; break;
451 case 2: mode = mode_F; break;
452 default: panic("internal compiler error (ia32_gen_fp_known_const)");
454 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
456 if (kct == ia32_ULLBIAS) {
457 ir_type *type = ia32_get_prim_type(mode_F);
458 ir_type *atype = ia32_create_float_array(type);
459 ir_initializer_t *initializer;
461 ent = new_entity(get_glob_type(), name, atype);
463 set_entity_ld_ident(ent, name);
464 set_entity_visibility(ent, ir_visibility_private);
465 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
467 initializer = create_initializer_compound(2);
468 set_initializer_compound_value(initializer, 0,
469 create_initializer_tarval(get_mode_null(mode)));
470 set_initializer_compound_value(initializer, 1,
471 create_initializer_tarval(tv));
472 set_entity_initializer(ent, initializer);
474 ent = ia32_create_float_const_entity(isa, tv, name);
476 /* cache the entry */
477 ent_cache[kct] = ent;
480 return ent_cache[kct];
484 * return true if the node is a Proj(Load) and could be used in source address
485 * mode for another node. Will return only true if the @p other node is not
486 * dependent on the memory of the Load (for binary operations use the other
487 * input here, for unary operations use NULL).
489 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
490 ir_node *other, ir_node *other2,
497 /* float constants are always available */
498 if (is_Const(node)) {
499 mode = get_irn_mode(node);
500 if (mode_is_float(mode)) {
501 ir_tarval *tv = get_Const_tarval(node);
502 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
504 if (ia32_cg_config.use_sse2) {
505 if (is_simple_sse_Const(node))
508 if (is_simple_x87_Const(node))
511 if (get_irn_n_edges(node) > 1)
520 load = get_Proj_pred(node);
521 pn = get_Proj_proj(node);
522 if (!is_Load(load) || pn != pn_Load_res)
524 if (get_nodes_block(load) != block)
526 mode = get_irn_mode(node);
527 /* we can't fold mode_E AM */
528 if (mode == ia32_mode_E)
530 /* we only use address mode if we're the only user of the load */
531 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
533 /* in some edge cases with address mode we might reach the load normally
534 * and through some AM sequence, if it is already materialized then we
535 * can't create an AM node from it */
536 if (be_is_transformed(node))
539 /* don't do AM if other node inputs depend on the load (via mem-proj) */
540 if (other != NULL && ia32_prevents_AM(block, load, other))
543 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
549 typedef struct ia32_address_mode_t ia32_address_mode_t;
550 struct ia32_address_mode_t {
555 ia32_op_type_t op_type;
559 unsigned commutative : 1;
560 unsigned ins_permuted : 1;
563 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
565 /* construct load address */
566 memset(addr, 0, sizeof(addr[0]));
567 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
569 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
570 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
571 addr->mem = be_transform_node(mem);
574 static void build_address(ia32_address_mode_t *am, ir_node *node,
575 ia32_create_am_flags_t flags)
577 ia32_address_t *addr = &am->addr;
583 /* floating point immediates */
584 if (is_Const(node)) {
585 ir_graph *irg = get_irn_irg(node);
586 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
587 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
588 ir_tarval *tv = get_Const_tarval(node);
589 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
590 addr->base = get_symconst_base();
591 addr->index = noreg_GP;
593 addr->symconst_ent = entity;
594 addr->tls_segment = false;
596 am->ls_mode = get_type_mode(get_entity_type(entity));
597 am->pinned = op_pin_state_floats;
601 load = get_Proj_pred(node);
602 ptr = get_Load_ptr(load);
603 mem = get_Load_mem(load);
604 new_mem = be_transform_node(mem);
605 am->pinned = get_irn_pinned(load);
606 am->ls_mode = get_Load_mode(load);
607 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
610 /* construct load address */
611 ia32_create_address_mode(addr, ptr, flags);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
618 static void set_address(ir_node *node, const ia32_address_t *addr)
620 set_ia32_am_scale(node, addr->scale);
621 set_ia32_am_sc(node, addr->symconst_ent);
622 set_ia32_am_offs_int(node, addr->offset);
623 set_ia32_am_tls_segment(node, addr->tls_segment);
624 if (addr->symconst_sign)
625 set_ia32_am_sc_sign(node);
627 set_ia32_use_frame(node);
628 set_ia32_frame_ent(node, addr->frame_entity);
632 * Apply attributes of a given address mode to a node.
634 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
636 set_address(node, &am->addr);
638 set_ia32_op_type(node, am->op_type);
639 set_ia32_ls_mode(node, am->ls_mode);
640 if (am->pinned == op_pin_state_pinned) {
641 /* beware: some nodes are already pinned and did not allow to change the state */
642 if (get_irn_pinned(node) != op_pin_state_pinned)
643 set_irn_pinned(node, op_pin_state_pinned);
646 set_ia32_commutative(node);
650 * Check, if a given node is a Down-Conv, ie. a integer Conv
651 * from a mode with a mode with more bits to a mode with lesser bits.
652 * Moreover, we return only true if the node has not more than 1 user.
654 * @param node the node
655 * @return non-zero if node is a Down-Conv
657 static int is_downconv(const ir_node *node)
665 /* we only want to skip the conv when we're the only user
666 * (because this test is used in the context of address-mode selection
667 * and we don't want to use address mode for multiple users) */
668 if (get_irn_n_edges(node) > 1)
671 src_mode = get_irn_mode(get_Conv_op(node));
672 dest_mode = get_irn_mode(node);
674 ia32_mode_needs_gp_reg(src_mode) &&
675 ia32_mode_needs_gp_reg(dest_mode) &&
676 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
679 /** Skip all Down-Conv's on a given node and return the resulting node. */
680 ir_node *ia32_skip_downconv(ir_node *node)
682 while (is_downconv(node))
683 node = get_Conv_op(node);
688 static bool is_sameconv(ir_node *node)
696 /* we only want to skip the conv when we're the only user
697 * (because this test is used in the context of address-mode selection
698 * and we don't want to use address mode for multiple users) */
699 if (get_irn_n_edges(node) > 1)
702 src_mode = get_irn_mode(get_Conv_op(node));
703 dest_mode = get_irn_mode(node);
705 ia32_mode_needs_gp_reg(src_mode) &&
706 ia32_mode_needs_gp_reg(dest_mode) &&
707 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
710 /** Skip all signedness convs */
711 static ir_node *ia32_skip_sameconv(ir_node *node)
713 while (is_sameconv(node))
714 node = get_Conv_op(node);
719 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
721 ir_mode *mode = get_irn_mode(node);
726 if (mode_is_signed(mode)) {
731 block = get_nodes_block(node);
732 dbgi = get_irn_dbg_info(node);
734 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
738 * matches operands of a node into ia32 addressing/operand modes. This covers
739 * usage of source address mode, immediates, operations with non 32-bit modes,
741 * The resulting data is filled into the @p am struct. block is the block
742 * of the node whose arguments are matched. op1, op2 are the first and second
743 * input that are matched (op1 may be NULL). other_op is another unrelated
744 * input that is not matched! but which is needed sometimes to check if AM
745 * for op1/op2 is legal.
746 * @p flags describes the supported modes of the operation in detail.
748 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
749 ir_node *op1, ir_node *op2, ir_node *other_op,
752 ia32_address_t *addr = &am->addr;
753 ir_mode *mode = get_irn_mode(op2);
754 int mode_bits = get_mode_size_bits(mode);
755 ir_node *new_op1, *new_op2;
757 unsigned commutative;
758 int use_am_and_immediates;
761 memset(am, 0, sizeof(am[0]));
763 commutative = (flags & match_commutative) != 0;
764 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
765 use_am = (flags & match_am) != 0;
766 use_immediate = (flags & match_immediate) != 0;
767 assert(!use_am_and_immediates || use_immediate);
770 assert(!commutative || op1 != NULL);
771 assert(use_am || !(flags & match_8bit_am));
772 assert(use_am || !(flags & match_16bit_am));
774 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
775 (mode_bits == 16 && !(flags & match_16bit_am))) {
779 /* we can simply skip downconvs for mode neutral nodes: the upper bits
780 * can be random for these operations */
781 if (flags & match_mode_neutral) {
782 op2 = ia32_skip_downconv(op2);
784 op1 = ia32_skip_downconv(op1);
787 op2 = ia32_skip_sameconv(op2);
789 op1 = ia32_skip_sameconv(op1);
793 /* match immediates. firm nodes are normalized: constants are always on the
796 if (!(flags & match_try_am) && use_immediate) {
797 new_op2 = ia32_try_create_Immediate(op2, 0);
800 if (new_op2 == NULL &&
801 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
802 build_address(am, op2, ia32_create_am_normal);
803 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
804 if (mode_is_float(mode)) {
805 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
809 am->op_type = ia32_AddrModeS;
810 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
812 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
814 build_address(am, op1, ia32_create_am_normal);
816 if (mode_is_float(mode)) {
817 noreg = ia32_new_NoReg_vfp(current_ir_graph);
822 if (new_op2 != NULL) {
825 new_op1 = be_transform_node(op2);
827 am->ins_permuted = true;
829 am->op_type = ia32_AddrModeS;
831 am->op_type = ia32_Normal;
833 if (flags & match_try_am) {
839 mode = get_irn_mode(op2);
840 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
841 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
843 new_op2 = create_upconv(op2, NULL);
844 am->ls_mode = mode_Iu;
846 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
848 new_op2 = be_transform_node(op2);
849 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
852 if (addr->base == NULL)
853 addr->base = noreg_GP;
854 if (addr->index == NULL)
855 addr->index = noreg_GP;
856 if (addr->mem == NULL)
859 am->new_op1 = new_op1;
860 am->new_op2 = new_op2;
861 am->commutative = commutative;
865 * "Fixes" a node that uses address mode by turning it into mode_T
866 * and returning a pn_ia32_res Proj.
868 * @param node the node
869 * @param am its address mode
871 * @return a Proj(pn_ia32_res) if a memory address mode is used,
874 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
879 if (am->mem_proj == NULL)
882 /* we have to create a mode_T so the old MemProj can attach to us */
883 mode = get_irn_mode(node);
884 load = get_Proj_pred(am->mem_proj);
886 be_set_transformed_node(load, node);
888 if (mode != mode_T) {
889 set_irn_mode(node, mode_T);
890 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
897 * Construct a standard binary operation, set AM and immediate if required.
899 * @param node The original node for which the binop is created
900 * @param op1 The first operand
901 * @param op2 The second operand
902 * @param func The node constructor function
903 * @return The constructed ia32 node.
905 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
906 construct_binop_func *func, match_flags_t flags)
909 ir_node *block, *new_block, *new_node;
910 ia32_address_mode_t am;
911 ia32_address_t *addr = &am.addr;
913 block = get_nodes_block(node);
914 match_arguments(&am, block, op1, op2, NULL, flags);
916 dbgi = get_irn_dbg_info(node);
917 new_block = be_transform_node(block);
918 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
919 am.new_op1, am.new_op2);
920 set_am_attributes(new_node, &am);
921 /* we can't use source address mode anymore when using immediates */
922 if (!(flags & match_am_and_immediates) &&
923 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
924 set_ia32_am_support(new_node, ia32_am_none);
925 SET_IA32_ORIG_NODE(new_node, node);
927 new_node = fix_mem_proj(new_node, &am);
933 * Generic names for the inputs of an ia32 binary op.
936 n_ia32_l_binop_left, /**< ia32 left input */
937 n_ia32_l_binop_right, /**< ia32 right input */
938 n_ia32_l_binop_eflags /**< ia32 eflags input */
940 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
941 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
942 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
943 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
944 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
945 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
948 * Construct a binary operation which also consumes the eflags.
950 * @param node The node to transform
951 * @param func The node constructor function
952 * @param flags The match flags
953 * @return The constructor ia32 node
955 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
958 ir_node *src_block = get_nodes_block(node);
959 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
960 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
961 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
963 ir_node *block, *new_node, *new_eflags;
964 ia32_address_mode_t am;
965 ia32_address_t *addr = &am.addr;
967 match_arguments(&am, src_block, op1, op2, eflags, flags);
969 dbgi = get_irn_dbg_info(node);
970 block = be_transform_node(src_block);
971 new_eflags = be_transform_node(eflags);
972 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
973 am.new_op1, am.new_op2, new_eflags);
974 set_am_attributes(new_node, &am);
975 /* we can't use source address mode anymore when using immediates */
976 if (!(flags & match_am_and_immediates) &&
977 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
978 set_ia32_am_support(new_node, ia32_am_none);
979 SET_IA32_ORIG_NODE(new_node, node);
981 new_node = fix_mem_proj(new_node, &am);
986 static ir_node *get_fpcw(void)
988 if (initial_fpcw != NULL)
991 initial_fpcw = be_transform_node(old_initial_fpcw);
995 static ir_node *skip_float_upconv(ir_node *node)
997 ir_mode *mode = get_irn_mode(node);
998 assert(mode_is_float(mode));
1000 while (is_Conv(node)) {
1001 ir_node *pred = get_Conv_op(node);
1002 ir_mode *pred_mode = get_irn_mode(pred);
1005 * suboptimal, but without this check the address mode matcher
1006 * can incorrectly think that something has only 1 user
1008 if (get_irn_n_edges(node) > 1)
1011 if (!mode_is_float(pred_mode)
1012 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1021 * Construct a standard binary operation, set AM and immediate if required.
1023 * @param op1 The first operand
1024 * @param op2 The second operand
1025 * @param func The node constructor function
1026 * @return The constructed ia32 node.
1028 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1029 construct_binop_float_func *func)
1035 ia32_address_mode_t am;
1036 ia32_address_t *addr = &am.addr;
1037 ia32_x87_attr_t *attr;
1038 /* All operations are considered commutative, because there are reverse
1040 match_flags_t flags = match_commutative | match_am;
1042 op1 = skip_float_upconv(op1);
1043 op2 = skip_float_upconv(op2);
1045 block = get_nodes_block(node);
1046 match_arguments(&am, block, op1, op2, NULL, flags);
1048 dbgi = get_irn_dbg_info(node);
1049 new_block = be_transform_node(block);
1050 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1051 am.new_op1, am.new_op2, get_fpcw());
1052 set_am_attributes(new_node, &am);
1054 attr = get_ia32_x87_attr(new_node);
1055 attr->attr.data.ins_permuted = am.ins_permuted;
1057 SET_IA32_ORIG_NODE(new_node, node);
1059 new_node = fix_mem_proj(new_node, &am);
1065 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1067 * @param op1 The first operand
1068 * @param op2 The second operand
1069 * @param func The node constructor function
1070 * @return The constructed ia32 node.
1072 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1073 construct_shift_func *func,
1074 match_flags_t flags)
1077 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1078 ir_mode *mode = get_irn_mode(node);
1080 assert(! mode_is_float(mode));
1081 assert(flags & match_immediate);
1082 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1084 if (get_mode_modulo_shift(mode) != 32)
1085 panic("modulo shift!=32 not supported by ia32 backend");
1087 if (flags & match_mode_neutral) {
1088 op1 = ia32_skip_downconv(op1);
1089 new_op1 = be_transform_node(op1);
1090 } else if (get_mode_size_bits(mode) != 32) {
1091 new_op1 = create_upconv(op1, node);
1093 new_op1 = be_transform_node(op1);
1096 /* the shift amount can be any mode that is bigger than 5 bits, since all
1097 * other bits are ignored anyway */
1098 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1099 ir_node *const op = get_Conv_op(op2);
1100 if (mode_is_float(get_irn_mode(op)))
1103 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1105 new_op2 = create_immediate_or_transform(op2, 0);
1107 dbgi = get_irn_dbg_info(node);
1108 block = get_nodes_block(node);
1109 new_block = be_transform_node(block);
1110 new_node = func(dbgi, new_block, new_op1, new_op2);
1111 SET_IA32_ORIG_NODE(new_node, node);
1113 /* lowered shift instruction may have a dependency operand, handle it here */
1114 if (get_irn_arity(node) == 3) {
1115 /* we have a dependency */
1116 ir_node* dep = get_irn_n(node, 2);
1117 if (get_irn_n_edges(dep) > 1) {
1118 /* ... which has at least one user other than 'node' */
1119 ir_node *new_dep = be_transform_node(dep);
1120 add_irn_dep(new_node, new_dep);
1129 * Construct a standard unary operation, set AM and immediate if required.
1131 * @param op The operand
1132 * @param func The node constructor function
1133 * @return The constructed ia32 node.
1135 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1136 match_flags_t flags)
1139 ir_node *block, *new_block, *new_op, *new_node;
1141 assert(flags == 0 || flags == match_mode_neutral);
1142 if (flags & match_mode_neutral) {
1143 op = ia32_skip_downconv(op);
1146 new_op = be_transform_node(op);
1147 dbgi = get_irn_dbg_info(node);
1148 block = get_nodes_block(node);
1149 new_block = be_transform_node(block);
1150 new_node = func(dbgi, new_block, new_op);
1152 SET_IA32_ORIG_NODE(new_node, node);
1157 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1158 ia32_address_t *addr)
1168 base = be_transform_node(base);
1175 idx = be_transform_node(idx);
1178 /* segment overrides are ineffective for Leas :-( so we have to patch
1180 if (addr->tls_segment) {
1181 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1182 assert(addr->symconst_ent != NULL);
1183 if (base == noreg_GP)
1186 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1187 addr->tls_segment = false;
1190 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1191 set_address(res, addr);
1197 * Returns non-zero if a given address mode has a symbolic or
1198 * numerical offset != 0.
1200 static int am_has_immediates(const ia32_address_t *addr)
1202 return addr->offset != 0 || addr->symconst_ent != NULL
1203 || addr->frame_entity || addr->use_frame;
1206 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1207 ir_node *high, ir_node *low,
1211 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1212 * op1 - target to be shifted
1213 * op2 - contains bits to be shifted into target
1215 * Only op3 can be an immediate.
1217 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1218 ir_node *high, ir_node *low, ir_node *count,
1219 new_shiftd_func func)
1221 ir_node *new_block = be_transform_node(block);
1222 ir_node *new_high = be_transform_node(high);
1223 ir_node *new_low = be_transform_node(low);
1227 /* the shift amount can be any mode that is bigger than 5 bits, since all
1228 * other bits are ignored anyway */
1229 while (is_Conv(count) &&
1230 get_irn_n_edges(count) == 1 &&
1231 mode_is_int(get_irn_mode(count))) {
1232 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1233 count = get_Conv_op(count);
1235 new_count = create_immediate_or_transform(count, 0);
1237 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1242 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1245 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1247 if (is_Const(value1) && is_Const(value2)) {
1248 ir_tarval *tv1 = get_Const_tarval(value1);
1249 ir_tarval *tv2 = get_Const_tarval(value2);
1250 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1251 long v1 = get_tarval_long(tv1);
1252 long v2 = get_tarval_long(tv2);
1253 return v1 <= v2 && v2 == 32-v1;
1259 static ir_node *match_64bit_shift(ir_node *node)
1261 ir_node *op1 = get_binop_left(node);
1262 ir_node *op2 = get_binop_right(node);
1263 assert(is_Or(node) || is_Add(node));
1271 /* match ShlD operation */
1272 if (is_Shl(op1) && is_Shr(op2)) {
1273 ir_node *shl_right = get_Shl_right(op1);
1274 ir_node *shl_left = get_Shl_left(op1);
1275 ir_node *shr_right = get_Shr_right(op2);
1276 ir_node *shr_left = get_Shr_left(op2);
1277 /* constant ShlD operation */
1278 if (is_complementary_shifts(shl_right, shr_right)) {
1279 dbg_info *dbgi = get_irn_dbg_info(node);
1280 ir_node *block = get_nodes_block(node);
1281 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1284 /* constant ShrD operation */
1285 if (is_complementary_shifts(shr_right, shl_right)) {
1286 dbg_info *dbgi = get_irn_dbg_info(node);
1287 ir_node *block = get_nodes_block(node);
1288 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1291 /* lower_dw produces the following for ShlD:
1292 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1293 if (is_Shr(shr_left) && is_Not(shr_right)
1294 && is_Const_1(get_Shr_right(shr_left))
1295 && get_Not_op(shr_right) == shl_right) {
1296 dbg_info *dbgi = get_irn_dbg_info(node);
1297 ir_node *block = get_nodes_block(node);
1298 ir_node *val_h = get_Shr_left(shr_left);
1299 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1302 /* lower_dw produces the following for ShrD:
1303 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1304 if (is_Shl(shl_left) && is_Not(shl_right)
1305 && is_Const_1(get_Shl_right(shl_left))
1306 && get_Not_op(shl_right) == shr_right) {
1307 dbg_info *dbgi = get_irn_dbg_info(node);
1308 ir_node *block = get_nodes_block(node);
1309 ir_node *val_h = get_Shl_left(shl_left);
1310 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1319 * Creates an ia32 Add.
1321 * @return the created ia32 Add node
1323 static ir_node *gen_Add(ir_node *node)
1325 ir_mode *mode = get_irn_mode(node);
1326 ir_node *op1 = get_Add_left(node);
1327 ir_node *op2 = get_Add_right(node);
1329 ir_node *block, *new_block, *new_node, *add_immediate_op;
1330 ia32_address_t addr;
1331 ia32_address_mode_t am;
1333 new_node = match_64bit_shift(node);
1334 if (new_node != NULL)
1337 if (mode_is_float(mode)) {
1338 if (ia32_cg_config.use_sse2)
1339 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1340 match_commutative | match_am);
1342 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1345 ia32_mark_non_am(node);
1347 op2 = ia32_skip_downconv(op2);
1348 op1 = ia32_skip_downconv(op1);
1352 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1353 * 1. Add with immediate -> Lea
1354 * 2. Add with possible source address mode -> Add
1355 * 3. Otherwise -> Lea
1357 memset(&addr, 0, sizeof(addr));
1358 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1359 add_immediate_op = NULL;
1361 dbgi = get_irn_dbg_info(node);
1362 block = get_nodes_block(node);
1363 new_block = be_transform_node(block);
1366 if (addr.base == NULL && addr.index == NULL) {
1367 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1368 addr.symconst_sign, 0, addr.offset);
1369 SET_IA32_ORIG_NODE(new_node, node);
1372 /* add with immediate? */
1373 if (addr.index == NULL) {
1374 add_immediate_op = addr.base;
1375 } else if (addr.base == NULL && addr.scale == 0) {
1376 add_immediate_op = addr.index;
1379 if (add_immediate_op != NULL) {
1380 if (!am_has_immediates(&addr)) {
1381 #ifdef DEBUG_libfirm
1382 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1385 return be_transform_node(add_immediate_op);
1388 new_node = create_lea_from_address(dbgi, new_block, &addr);
1389 SET_IA32_ORIG_NODE(new_node, node);
1393 /* test if we can use source address mode */
1394 match_arguments(&am, block, op1, op2, NULL, match_commutative
1395 | match_mode_neutral | match_am | match_immediate | match_try_am);
1397 /* construct an Add with source address mode */
1398 if (am.op_type == ia32_AddrModeS) {
1399 ia32_address_t *am_addr = &am.addr;
1400 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1401 am_addr->index, am_addr->mem, am.new_op1,
1403 set_am_attributes(new_node, &am);
1404 SET_IA32_ORIG_NODE(new_node, node);
1406 new_node = fix_mem_proj(new_node, &am);
1411 /* otherwise construct a lea */
1412 new_node = create_lea_from_address(dbgi, new_block, &addr);
1413 SET_IA32_ORIG_NODE(new_node, node);
1418 * Creates an ia32 Mul.
1420 * @return the created ia32 Mul node
1422 static ir_node *gen_Mul(ir_node *node)
1424 ir_node *op1 = get_Mul_left(node);
1425 ir_node *op2 = get_Mul_right(node);
1426 ir_mode *mode = get_irn_mode(node);
1428 if (mode_is_float(mode)) {
1429 if (ia32_cg_config.use_sse2)
1430 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1431 match_commutative | match_am);
1433 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1435 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1436 match_commutative | match_am | match_mode_neutral |
1437 match_immediate | match_am_and_immediates);
1441 * Creates an ia32 Mulh.
1442 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1443 * this result while Mul returns the lower 32 bit.
1445 * @return the created ia32 Mulh node
1447 static ir_node *gen_Mulh(ir_node *node)
1449 dbg_info *dbgi = get_irn_dbg_info(node);
1450 ir_node *op1 = get_Mulh_left(node);
1451 ir_node *op2 = get_Mulh_right(node);
1452 ir_mode *mode = get_irn_mode(node);
1454 ir_node *proj_res_high;
1456 if (get_mode_size_bits(mode) != 32) {
1457 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1460 if (mode_is_signed(mode)) {
1461 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1462 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1464 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1465 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1467 return proj_res_high;
1471 * Creates an ia32 And.
1473 * @return The created ia32 And node
1475 static ir_node *gen_And(ir_node *node)
1477 ir_node *op1 = get_And_left(node);
1478 ir_node *op2 = get_And_right(node);
1479 assert(! mode_is_float(get_irn_mode(node)));
1481 /* is it a zero extension? */
1482 if (is_Const(op2)) {
1483 ir_tarval *tv = get_Const_tarval(op2);
1484 long v = get_tarval_long(tv);
1486 if (v == 0xFF || v == 0xFFFF) {
1487 dbg_info *dbgi = get_irn_dbg_info(node);
1488 ir_node *block = get_nodes_block(node);
1495 assert(v == 0xFFFF);
1498 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1503 return gen_binop(node, op1, op2, new_bd_ia32_And,
1504 match_commutative | match_mode_neutral | match_am | match_immediate);
1508 * Creates an ia32 Or.
1510 * @return The created ia32 Or node
1512 static ir_node *gen_Or(ir_node *node)
1514 ir_node *op1 = get_Or_left(node);
1515 ir_node *op2 = get_Or_right(node);
1518 res = match_64bit_shift(node);
1522 assert (! mode_is_float(get_irn_mode(node)));
1523 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1524 | match_mode_neutral | match_am | match_immediate);
1530 * Creates an ia32 Eor.
1532 * @return The created ia32 Eor node
1534 static ir_node *gen_Eor(ir_node *node)
1536 ir_node *op1 = get_Eor_left(node);
1537 ir_node *op2 = get_Eor_right(node);
1539 assert(! mode_is_float(get_irn_mode(node)));
1540 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1541 | match_mode_neutral | match_am | match_immediate);
1546 * Creates an ia32 Sub.
1548 * @return The created ia32 Sub node
1550 static ir_node *gen_Sub(ir_node *node)
1552 ir_node *op1 = get_Sub_left(node);
1553 ir_node *op2 = get_Sub_right(node);
1554 ir_mode *mode = get_irn_mode(node);
1556 if (mode_is_float(mode)) {
1557 if (ia32_cg_config.use_sse2)
1558 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1560 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1563 if (is_Const(op2)) {
1564 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1568 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1569 | match_am | match_immediate);
1572 static ir_node *transform_AM_mem(ir_node *const block,
1573 ir_node *const src_val,
1574 ir_node *const src_mem,
1575 ir_node *const am_mem)
1577 if (is_NoMem(am_mem)) {
1578 return be_transform_node(src_mem);
1579 } else if (is_Proj(src_val) &&
1581 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1582 /* avoid memory loop */
1584 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1585 ir_node *const ptr_pred = get_Proj_pred(src_val);
1586 int const arity = get_Sync_n_preds(src_mem);
1591 NEW_ARR_A(ir_node*, ins, arity + 1);
1593 /* NOTE: This sometimes produces dead-code because the old sync in
1594 * src_mem might not be used anymore, we should detect this case
1595 * and kill the sync... */
1596 for (i = arity - 1; i >= 0; --i) {
1597 ir_node *const pred = get_Sync_pred(src_mem, i);
1599 /* avoid memory loop */
1600 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1603 ins[n++] = be_transform_node(pred);
1606 if (n==1 && ins[0] == am_mem) {
1608 /* creating a new Sync and relying on CSE may fail,
1609 * if am_mem is a ProjM, which does not yet verify. */
1613 return new_r_Sync(block, n, ins);
1617 ins[0] = be_transform_node(src_mem);
1619 return new_r_Sync(block, 2, ins);
1624 * Create a 32bit to 64bit signed extension.
1626 * @param dbgi debug info
1627 * @param block the block where node nodes should be placed
1628 * @param val the value to extend
1629 * @param orig the original node
1631 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1632 ir_node *val, const ir_node *orig)
1637 if (ia32_cg_config.use_short_sex_eax) {
1638 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1639 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1641 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1642 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1644 SET_IA32_ORIG_NODE(res, orig);
1649 * Generates an ia32 Div with additional infrastructure for the
1650 * register allocator if needed.
1652 static ir_node *create_Div(ir_node *node)
1654 dbg_info *dbgi = get_irn_dbg_info(node);
1655 ir_node *block = get_nodes_block(node);
1656 ir_node *new_block = be_transform_node(block);
1657 int throws_exception = ir_throws_exception(node);
1664 ir_node *sign_extension;
1665 ia32_address_mode_t am;
1666 ia32_address_t *addr = &am.addr;
1668 /* the upper bits have random contents for smaller modes */
1669 switch (get_irn_opcode(node)) {
1671 op1 = get_Div_left(node);
1672 op2 = get_Div_right(node);
1673 mem = get_Div_mem(node);
1674 mode = get_Div_resmode(node);
1677 op1 = get_Mod_left(node);
1678 op2 = get_Mod_right(node);
1679 mem = get_Mod_mem(node);
1680 mode = get_Mod_resmode(node);
1683 panic("invalid divmod node %+F", node);
1686 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1688 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1689 is the memory of the consumed address. We can have only the second op as address
1690 in Div nodes, so check only op2. */
1691 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1693 if (mode_is_signed(mode)) {
1694 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1695 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1696 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1698 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1700 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1701 addr->index, new_mem, am.new_op2,
1702 am.new_op1, sign_extension);
1704 ir_set_throws_exception(new_node, throws_exception);
1706 set_irn_pinned(new_node, get_irn_pinned(node));
1708 set_am_attributes(new_node, &am);
1709 SET_IA32_ORIG_NODE(new_node, node);
1711 new_node = fix_mem_proj(new_node, &am);
1717 * Generates an ia32 Mod.
1719 static ir_node *gen_Mod(ir_node *node)
1721 return create_Div(node);
1725 * Generates an ia32 Div.
1727 static ir_node *gen_Div(ir_node *node)
1729 ir_mode *mode = get_Div_resmode(node);
1730 if (mode_is_float(mode)) {
1731 ir_node *op1 = get_Div_left(node);
1732 ir_node *op2 = get_Div_right(node);
1734 if (ia32_cg_config.use_sse2) {
1735 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1737 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1741 return create_Div(node);
1745 * Creates an ia32 Shl.
1747 * @return The created ia32 Shl node
1749 static ir_node *gen_Shl(ir_node *node)
1751 ir_node *left = get_Shl_left(node);
1752 ir_node *right = get_Shl_right(node);
1754 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1755 match_mode_neutral | match_immediate);
1759 * Creates an ia32 Shr.
1761 * @return The created ia32 Shr node
1763 static ir_node *gen_Shr(ir_node *node)
1765 ir_node *left = get_Shr_left(node);
1766 ir_node *right = get_Shr_right(node);
1768 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1774 * Creates an ia32 Sar.
1776 * @return The created ia32 Shrs node
1778 static ir_node *gen_Shrs(ir_node *node)
1780 ir_node *left = get_Shrs_left(node);
1781 ir_node *right = get_Shrs_right(node);
1783 if (is_Const(right)) {
1784 ir_tarval *tv = get_Const_tarval(right);
1785 long val = get_tarval_long(tv);
1787 /* this is a sign extension */
1788 dbg_info *dbgi = get_irn_dbg_info(node);
1789 ir_node *block = be_transform_node(get_nodes_block(node));
1790 ir_node *new_op = be_transform_node(left);
1792 return create_sex_32_64(dbgi, block, new_op, node);
1796 /* 8 or 16 bit sign extension? */
1797 if (is_Const(right) && is_Shl(left)) {
1798 ir_node *shl_left = get_Shl_left(left);
1799 ir_node *shl_right = get_Shl_right(left);
1800 if (is_Const(shl_right)) {
1801 ir_tarval *tv1 = get_Const_tarval(right);
1802 ir_tarval *tv2 = get_Const_tarval(shl_right);
1803 if (tv1 == tv2 && tarval_is_long(tv1)) {
1804 long val = get_tarval_long(tv1);
1805 if (val == 16 || val == 24) {
1806 dbg_info *dbgi = get_irn_dbg_info(node);
1807 ir_node *block = get_nodes_block(node);
1817 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1826 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1832 * Creates an ia32 Rol.
1834 * @param op1 The first operator
1835 * @param op2 The second operator
1836 * @return The created ia32 RotL node
1838 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1840 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1846 * Creates an ia32 Ror.
1847 * NOTE: There is no RotR with immediate because this would always be a RotL
1848 * "imm-mode_size_bits" which can be pre-calculated.
1850 * @param op1 The first operator
1851 * @param op2 The second operator
1852 * @return The created ia32 RotR node
1854 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1856 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1862 * Creates an ia32 RotR or RotL (depending on the found pattern).
1864 * @return The created ia32 RotL or RotR node
1866 static ir_node *gen_Rotl(ir_node *node)
1868 ir_node *op1 = get_Rotl_left(node);
1869 ir_node *op2 = get_Rotl_right(node);
1871 if (is_Minus(op2)) {
1872 return gen_Ror(node, op1, get_Minus_op(op2));
1875 return gen_Rol(node, op1, op2);
1881 * Transforms a Minus node.
1883 * @return The created ia32 Minus node
1885 static ir_node *gen_Minus(ir_node *node)
1887 ir_node *op = get_Minus_op(node);
1888 ir_node *block = be_transform_node(get_nodes_block(node));
1889 dbg_info *dbgi = get_irn_dbg_info(node);
1890 ir_mode *mode = get_irn_mode(node);
1895 if (mode_is_float(mode)) {
1896 ir_node *new_op = be_transform_node(op);
1897 if (ia32_cg_config.use_sse2) {
1898 /* TODO: non-optimal... if we have many xXors, then we should
1899 * rather create a load for the const and use that instead of
1900 * several AM nodes... */
1901 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1903 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1904 noreg_GP, nomem, new_op, noreg_xmm);
1906 size = get_mode_size_bits(mode);
1907 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1909 set_ia32_am_sc(new_node, ent);
1910 set_ia32_op_type(new_node, ia32_AddrModeS);
1911 set_ia32_ls_mode(new_node, mode);
1913 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1916 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1919 SET_IA32_ORIG_NODE(new_node, node);
1925 * Transforms a Not node.
1927 * @return The created ia32 Not node
1929 static ir_node *gen_Not(ir_node *node)
1931 ir_node *op = get_Not_op(node);
1933 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1934 assert (! mode_is_float(get_irn_mode(node)));
1936 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1939 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1940 bool negate, ir_node *node)
1942 ir_node *new_block = be_transform_node(block);
1943 ir_mode *mode = get_irn_mode(op);
1944 ir_node *new_op = be_transform_node(op);
1949 assert(mode_is_float(mode));
1951 if (ia32_cg_config.use_sse2) {
1952 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1953 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1954 noreg_GP, nomem, new_op, noreg_fp);
1956 size = get_mode_size_bits(mode);
1957 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1959 set_ia32_am_sc(new_node, ent);
1961 SET_IA32_ORIG_NODE(new_node, node);
1963 set_ia32_op_type(new_node, ia32_AddrModeS);
1964 set_ia32_ls_mode(new_node, mode);
1966 /* TODO, implement -Abs case */
1969 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1970 SET_IA32_ORIG_NODE(new_node, node);
1972 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1973 SET_IA32_ORIG_NODE(new_node, node);
1981 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1983 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1985 dbg_info *dbgi = get_irn_dbg_info(cmp);
1986 ir_node *block = get_nodes_block(cmp);
1987 ir_node *new_block = be_transform_node(block);
1988 ir_node *op1 = be_transform_node(x);
1989 ir_node *op2 = be_transform_node(n);
1991 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1994 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
1996 bool overflow_possible)
1998 if (mode_is_float(mode)) {
2000 case ir_relation_equal: return ia32_cc_float_equal;
2001 case ir_relation_less: return ia32_cc_float_below;
2002 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2003 case ir_relation_greater: return ia32_cc_float_above;
2004 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2005 case ir_relation_less_greater: return ia32_cc_not_equal;
2006 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2007 case ir_relation_unordered: return ia32_cc_parity;
2008 case ir_relation_unordered_equal: return ia32_cc_equal;
2009 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2010 case ir_relation_unordered_less_equal:
2011 return ia32_cc_float_unordered_below_equal;
2012 case ir_relation_unordered_greater:
2013 return ia32_cc_float_unordered_above;
2014 case ir_relation_unordered_greater_equal:
2015 return ia32_cc_float_unordered_above_equal;
2016 case ir_relation_unordered_less_greater:
2017 return ia32_cc_float_not_equal;
2018 case ir_relation_false:
2019 case ir_relation_true:
2020 /* should we introduce a jump always/jump never? */
2023 panic("Unexpected float pnc");
2024 } else if (mode_is_signed(mode)) {
2026 case ir_relation_unordered_equal:
2027 case ir_relation_equal: return ia32_cc_equal;
2028 case ir_relation_unordered_less:
2029 case ir_relation_less:
2030 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2031 case ir_relation_unordered_less_equal:
2032 case ir_relation_less_equal: return ia32_cc_less_equal;
2033 case ir_relation_unordered_greater:
2034 case ir_relation_greater: return ia32_cc_greater;
2035 case ir_relation_unordered_greater_equal:
2036 case ir_relation_greater_equal:
2037 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2038 case ir_relation_unordered_less_greater:
2039 case ir_relation_less_greater: return ia32_cc_not_equal;
2040 case ir_relation_less_equal_greater:
2041 case ir_relation_unordered:
2042 case ir_relation_false:
2043 case ir_relation_true:
2044 /* introduce jump always/jump never? */
2047 panic("Unexpected pnc");
2050 case ir_relation_unordered_equal:
2051 case ir_relation_equal: return ia32_cc_equal;
2052 case ir_relation_unordered_less:
2053 case ir_relation_less: return ia32_cc_below;
2054 case ir_relation_unordered_less_equal:
2055 case ir_relation_less_equal: return ia32_cc_below_equal;
2056 case ir_relation_unordered_greater:
2057 case ir_relation_greater: return ia32_cc_above;
2058 case ir_relation_unordered_greater_equal:
2059 case ir_relation_greater_equal: return ia32_cc_above_equal;
2060 case ir_relation_unordered_less_greater:
2061 case ir_relation_less_greater: return ia32_cc_not_equal;
2062 case ir_relation_less_equal_greater:
2063 case ir_relation_unordered:
2064 case ir_relation_false:
2065 case ir_relation_true:
2066 /* introduce jump always/jump never? */
2069 panic("Unexpected pnc");
2073 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2075 /* must have a Cmp as input */
2076 ir_relation relation = get_Cmp_relation(cmp);
2077 ir_node *l = get_Cmp_left(cmp);
2078 ir_node *r = get_Cmp_right(cmp);
2079 ir_mode *mode = get_irn_mode(l);
2080 bool overflow_possible;
2081 ir_relation possible;
2084 /* check for bit-test */
2085 if (ia32_cg_config.use_bt
2086 && (relation == ir_relation_equal
2087 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2088 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2090 ir_node *la = get_And_left(l);
2091 ir_node *ra = get_And_right(l);
2098 ir_node *c = get_Shl_left(la);
2099 if (is_Const_1(c) && is_Const_0(r)) {
2100 /* (1 << n) & ra) */
2101 ir_node *n = get_Shl_right(la);
2102 flags = gen_bt(cmp, ra, n);
2103 /* the bit is copied into the CF flag */
2104 if (relation & ir_relation_equal)
2105 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2107 *cc_out = ia32_cc_below; /* test for CF=1 */
2113 /* the middle-end tries to eliminate impossible relations, so a ptr != 0
2114 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2115 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2116 * a predecessor node). So add the < bit */
2117 possible = ir_get_possible_cmp_relations(l, r);
2118 if (((relation & ir_relation_less) && !(possible & ir_relation_greater))
2119 || ((relation & ir_relation_greater) && !(possible & ir_relation_less)))
2120 relation |= ir_relation_less_greater;
2122 overflow_possible = true;
2123 if (is_Const(r) && is_Const_null(r))
2124 overflow_possible = false;
2126 /* just do a normal transformation of the Cmp */
2127 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2128 flags = be_transform_node(cmp);
2133 * Transforms a Load.
2135 * @return the created ia32 Load node
2137 static ir_node *gen_Load(ir_node *node)
2139 ir_node *old_block = get_nodes_block(node);
2140 ir_node *block = be_transform_node(old_block);
2141 ir_node *ptr = get_Load_ptr(node);
2142 ir_node *mem = get_Load_mem(node);
2143 ir_node *new_mem = be_transform_node(mem);
2144 dbg_info *dbgi = get_irn_dbg_info(node);
2145 ir_mode *mode = get_Load_mode(node);
2146 int throws_exception = ir_throws_exception(node);
2150 ia32_address_t addr;
2152 /* construct load address */
2153 memset(&addr, 0, sizeof(addr));
2154 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2161 base = be_transform_node(base);
2167 idx = be_transform_node(idx);
2170 if (mode_is_float(mode)) {
2171 if (ia32_cg_config.use_sse2) {
2172 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2175 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2179 assert(mode != mode_b);
2181 /* create a conv node with address mode for smaller modes */
2182 if (get_mode_size_bits(mode) < 32) {
2183 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2184 new_mem, noreg_GP, mode);
2186 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2189 ir_set_throws_exception(new_node, throws_exception);
2191 set_irn_pinned(new_node, get_irn_pinned(node));
2192 set_ia32_op_type(new_node, ia32_AddrModeS);
2193 set_ia32_ls_mode(new_node, mode);
2194 set_address(new_node, &addr);
2196 if (get_irn_pinned(node) == op_pin_state_floats) {
2197 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2198 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2199 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2200 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2203 SET_IA32_ORIG_NODE(new_node, node);
2208 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2209 ir_node *ptr, ir_node *other)
2216 /* we only use address mode if we're the only user of the load */
2217 if (get_irn_n_edges(node) > 1)
2220 load = get_Proj_pred(node);
2223 if (get_nodes_block(load) != block)
2226 /* store should have the same pointer as the load */
2227 if (get_Load_ptr(load) != ptr)
2230 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2231 if (other != NULL &&
2232 get_nodes_block(other) == block &&
2233 heights_reachable_in_block(ia32_heights, other, load)) {
2237 if (ia32_prevents_AM(block, load, mem))
2239 /* Store should be attached to the load via mem */
2240 assert(heights_reachable_in_block(ia32_heights, mem, load));
2245 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2246 ir_node *mem, ir_node *ptr, ir_mode *mode,
2247 construct_binop_dest_func *func,
2248 construct_binop_dest_func *func8bit,
2249 match_flags_t flags)
2251 ir_node *src_block = get_nodes_block(node);
2259 ia32_address_mode_t am;
2260 ia32_address_t *addr = &am.addr;
2261 memset(&am, 0, sizeof(am));
2263 assert(flags & match_immediate); /* there is no destam node without... */
2264 commutative = (flags & match_commutative) != 0;
2266 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2267 build_address(&am, op1, ia32_create_am_double_use);
2268 new_op = create_immediate_or_transform(op2, 0);
2269 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2270 build_address(&am, op2, ia32_create_am_double_use);
2271 new_op = create_immediate_or_transform(op1, 0);
2276 if (addr->base == NULL)
2277 addr->base = noreg_GP;
2278 if (addr->index == NULL)
2279 addr->index = noreg_GP;
2280 if (addr->mem == NULL)
2283 dbgi = get_irn_dbg_info(node);
2284 block = be_transform_node(src_block);
2285 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2287 if (get_mode_size_bits(mode) == 8) {
2288 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2290 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2292 set_address(new_node, addr);
2293 set_ia32_op_type(new_node, ia32_AddrModeD);
2294 set_ia32_ls_mode(new_node, mode);
2295 SET_IA32_ORIG_NODE(new_node, node);
2297 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2298 mem_proj = be_transform_node(am.mem_proj);
2299 be_set_transformed_node(am.mem_proj, new_node);
2300 be_set_transformed_node(mem_proj, new_node);
2305 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2306 ir_node *ptr, ir_mode *mode,
2307 construct_unop_dest_func *func)
2309 ir_node *src_block = get_nodes_block(node);
2315 ia32_address_mode_t am;
2316 ia32_address_t *addr = &am.addr;
2318 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2321 memset(&am, 0, sizeof(am));
2322 build_address(&am, op, ia32_create_am_double_use);
2324 dbgi = get_irn_dbg_info(node);
2325 block = be_transform_node(src_block);
2326 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2327 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2328 set_address(new_node, addr);
2329 set_ia32_op_type(new_node, ia32_AddrModeD);
2330 set_ia32_ls_mode(new_node, mode);
2331 SET_IA32_ORIG_NODE(new_node, node);
2333 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2334 mem_proj = be_transform_node(am.mem_proj);
2335 be_set_transformed_node(am.mem_proj, new_node);
2336 be_set_transformed_node(mem_proj, new_node);
2341 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2343 ir_mode *mode = get_irn_mode(node);
2344 ir_node *mux_true = get_Mux_true(node);
2345 ir_node *mux_false = get_Mux_false(node);
2353 ia32_condition_code_t cc;
2354 ia32_address_t addr;
2356 if (get_mode_size_bits(mode) != 8)
2359 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2361 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2367 cond = get_Mux_sel(node);
2368 flags = get_flags_node(cond, &cc);
2369 /* we can't handle the float special cases with SetM */
2370 if (cc & ia32_cc_additional_float_cases)
2373 cc = ia32_negate_condition_code(cc);
2375 build_address_ptr(&addr, ptr, mem);
2377 dbgi = get_irn_dbg_info(node);
2378 block = get_nodes_block(node);
2379 new_block = be_transform_node(block);
2380 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2381 addr.index, addr.mem, flags, cc);
2382 set_address(new_node, &addr);
2383 set_ia32_op_type(new_node, ia32_AddrModeD);
2384 set_ia32_ls_mode(new_node, mode);
2385 SET_IA32_ORIG_NODE(new_node, node);
2390 static ir_node *try_create_dest_am(ir_node *node)
2392 ir_node *val = get_Store_value(node);
2393 ir_node *mem = get_Store_mem(node);
2394 ir_node *ptr = get_Store_ptr(node);
2395 ir_mode *mode = get_irn_mode(val);
2396 unsigned bits = get_mode_size_bits(mode);
2401 /* handle only GP modes for now... */
2402 if (!ia32_mode_needs_gp_reg(mode))
2406 /* store must be the only user of the val node */
2407 if (get_irn_n_edges(val) > 1)
2409 /* skip pointless convs */
2411 ir_node *conv_op = get_Conv_op(val);
2412 ir_mode *pred_mode = get_irn_mode(conv_op);
2413 if (!ia32_mode_needs_gp_reg(pred_mode))
2415 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2423 /* value must be in the same block */
2424 if (get_nodes_block(node) != get_nodes_block(val))
2427 switch (get_irn_opcode(val)) {
2429 op1 = get_Add_left(val);
2430 op2 = get_Add_right(val);
2431 if (ia32_cg_config.use_incdec) {
2432 if (is_Const_1(op2)) {
2433 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2435 } else if (is_Const_Minus_1(op2)) {
2436 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2440 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2441 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2442 match_commutative | match_immediate);
2445 op1 = get_Sub_left(val);
2446 op2 = get_Sub_right(val);
2447 if (is_Const(op2)) {
2448 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2450 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2451 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2455 op1 = get_And_left(val);
2456 op2 = get_And_right(val);
2457 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2458 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2459 match_commutative | match_immediate);
2462 op1 = get_Or_left(val);
2463 op2 = get_Or_right(val);
2464 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2465 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2466 match_commutative | match_immediate);
2469 op1 = get_Eor_left(val);
2470 op2 = get_Eor_right(val);
2471 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2472 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2473 match_commutative | match_immediate);
2476 op1 = get_Shl_left(val);
2477 op2 = get_Shl_right(val);
2478 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2479 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2483 op1 = get_Shr_left(val);
2484 op2 = get_Shr_right(val);
2485 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2486 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2490 op1 = get_Shrs_left(val);
2491 op2 = get_Shrs_right(val);
2492 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2493 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2497 op1 = get_Rotl_left(val);
2498 op2 = get_Rotl_right(val);
2499 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2500 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2503 /* TODO: match ROR patterns... */
2505 new_node = try_create_SetMem(val, ptr, mem);
2509 op1 = get_Minus_op(val);
2510 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2513 /* should be lowered already */
2514 assert(mode != mode_b);
2515 op1 = get_Not_op(val);
2516 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2522 if (new_node != NULL) {
2523 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2524 get_irn_pinned(node) == op_pin_state_pinned) {
2525 set_irn_pinned(new_node, op_pin_state_pinned);
2532 static bool possible_int_mode_for_fp(ir_mode *mode)
2536 if (!mode_is_signed(mode))
2538 size = get_mode_size_bits(mode);
2539 if (size != 16 && size != 32)
2544 static int is_float_to_int_conv(const ir_node *node)
2546 ir_mode *mode = get_irn_mode(node);
2550 if (!possible_int_mode_for_fp(mode))
2555 conv_op = get_Conv_op(node);
2556 conv_mode = get_irn_mode(conv_op);
2558 if (!mode_is_float(conv_mode))
2565 * Transform a Store(floatConst) into a sequence of
2568 * @return the created ia32 Store node
2570 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2572 ir_mode *mode = get_irn_mode(cns);
2573 unsigned size = get_mode_size_bytes(mode);
2574 ir_tarval *tv = get_Const_tarval(cns);
2575 ir_node *block = get_nodes_block(node);
2576 ir_node *new_block = be_transform_node(block);
2577 ir_node *ptr = get_Store_ptr(node);
2578 ir_node *mem = get_Store_mem(node);
2579 dbg_info *dbgi = get_irn_dbg_info(node);
2582 int throws_exception = ir_throws_exception(node);
2584 ia32_address_t addr;
2586 build_address_ptr(&addr, ptr, mem);
2593 val= get_tarval_sub_bits(tv, ofs) |
2594 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2595 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2596 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2599 } else if (size >= 2) {
2600 val= get_tarval_sub_bits(tv, ofs) |
2601 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2605 panic("invalid size of Store float to mem (%+F)", node);
2607 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2609 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2610 addr.index, addr.mem, imm);
2611 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2613 ir_set_throws_exception(new_node, throws_exception);
2614 set_irn_pinned(new_node, get_irn_pinned(node));
2615 set_ia32_op_type(new_node, ia32_AddrModeD);
2616 set_ia32_ls_mode(new_node, mode);
2617 set_address(new_node, &addr);
2618 SET_IA32_ORIG_NODE(new_node, node);
2625 addr.offset += delta;
2626 } while (size != 0);
2629 return new_rd_Sync(dbgi, new_block, i, ins);
2631 return get_Proj_pred(ins[0]);
2636 * Generate a vfist or vfisttp instruction.
2638 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2639 ir_node *index, ir_node *mem, ir_node *val)
2641 if (ia32_cg_config.use_fisttp) {
2642 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2643 if other users exists */
2644 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2645 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_vfisttp_res);
2646 be_new_Keep(block, 1, &value);
2650 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2653 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2659 * Transforms a general (no special case) Store.
2661 * @return the created ia32 Store node
2663 static ir_node *gen_general_Store(ir_node *node)
2665 ir_node *val = get_Store_value(node);
2666 ir_mode *mode = get_irn_mode(val);
2667 ir_node *block = get_nodes_block(node);
2668 ir_node *new_block = be_transform_node(block);
2669 ir_node *ptr = get_Store_ptr(node);
2670 ir_node *mem = get_Store_mem(node);
2671 dbg_info *dbgi = get_irn_dbg_info(node);
2672 int throws_exception = ir_throws_exception(node);
2675 ia32_address_t addr;
2677 /* check for destination address mode */
2678 new_node = try_create_dest_am(node);
2679 if (new_node != NULL)
2682 /* construct store address */
2683 memset(&addr, 0, sizeof(addr));
2684 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2686 if (addr.base == NULL) {
2687 addr.base = noreg_GP;
2689 addr.base = be_transform_node(addr.base);
2692 if (addr.index == NULL) {
2693 addr.index = noreg_GP;
2695 addr.index = be_transform_node(addr.index);
2697 addr.mem = be_transform_node(mem);
2699 if (mode_is_float(mode)) {
2700 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2702 while (is_Conv(val) && mode == get_irn_mode(val)) {
2703 ir_node *op = get_Conv_op(val);
2704 if (!mode_is_float(get_irn_mode(op)))
2708 new_val = be_transform_node(val);
2709 if (ia32_cg_config.use_sse2) {
2710 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2711 addr.index, addr.mem, new_val);
2713 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2714 addr.index, addr.mem, new_val, mode);
2716 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2717 val = get_Conv_op(val);
2719 /* TODO: is this optimisation still necessary at all (middleend)? */
2720 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2722 while (is_Conv(val)) {
2723 ir_node *op = get_Conv_op(val);
2724 if (!mode_is_float(get_irn_mode(op)))
2726 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2730 new_val = be_transform_node(val);
2731 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2733 new_val = create_immediate_or_transform(val, 0);
2734 assert(mode != mode_b);
2736 if (get_mode_size_bits(mode) == 8) {
2737 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2738 addr.index, addr.mem, new_val);
2740 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2741 addr.index, addr.mem, new_val);
2744 ir_set_throws_exception(new_node, throws_exception);
2746 set_irn_pinned(new_node, get_irn_pinned(node));
2747 set_ia32_op_type(new_node, ia32_AddrModeD);
2748 set_ia32_ls_mode(new_node, mode);
2750 set_address(new_node, &addr);
2751 SET_IA32_ORIG_NODE(new_node, node);
2757 * Transforms a Store.
2759 * @return the created ia32 Store node
2761 static ir_node *gen_Store(ir_node *node)
2763 ir_node *val = get_Store_value(node);
2764 ir_mode *mode = get_irn_mode(val);
2766 if (mode_is_float(mode) && is_Const(val)) {
2767 /* We can transform every floating const store
2768 into a sequence of integer stores.
2769 If the constant is already in a register,
2770 it would be better to use it, but we don't
2771 have this information here. */
2772 return gen_float_const_Store(node, val);
2774 return gen_general_Store(node);
2778 * Transforms a Switch.
2780 * @return the created ia32 SwitchJmp node
2782 static ir_node *gen_Switch(ir_node *node)
2784 dbg_info *dbgi = get_irn_dbg_info(node);
2785 ir_graph *irg = get_irn_irg(node);
2786 ir_node *block = be_transform_node(get_nodes_block(node));
2787 ir_node *sel = get_Switch_selector(node);
2788 ir_node *new_sel = be_transform_node(sel);
2789 ir_mode *sel_mode = get_irn_mode(sel);
2790 const ir_switch_table *table = get_Switch_table(node);
2791 unsigned n_outs = get_Switch_n_outs(node);
2795 assert(get_mode_size_bits(get_irn_mode(sel)) <= 32);
2796 if (get_mode_size_bits(sel_mode) != 32)
2797 new_sel = create_upconv(new_sel, sel);
2799 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2800 set_entity_visibility(entity, ir_visibility_private);
2801 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2803 table = ir_switch_table_duplicate(irg, table);
2805 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2806 set_ia32_am_scale(new_node, 2);
2807 set_ia32_am_sc(new_node, entity);
2808 set_ia32_op_type(new_node, ia32_AddrModeS);
2809 set_ia32_ls_mode(new_node, mode_Iu);
2810 SET_IA32_ORIG_NODE(new_node, node);
2811 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2812 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2818 * Transform a Cond node.
2820 static ir_node *gen_Cond(ir_node *node)
2822 ir_node *block = get_nodes_block(node);
2823 ir_node *new_block = be_transform_node(block);
2824 dbg_info *dbgi = get_irn_dbg_info(node);
2825 ir_node *sel = get_Cond_selector(node);
2826 ir_node *flags = NULL;
2828 ia32_condition_code_t cc;
2830 /* we get flags from a Cmp */
2831 flags = get_flags_node(sel, &cc);
2833 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2834 SET_IA32_ORIG_NODE(new_node, node);
2840 * Transform a be_Copy.
2842 static ir_node *gen_be_Copy(ir_node *node)
2844 ir_node *new_node = be_duplicate_node(node);
2845 ir_mode *mode = get_irn_mode(new_node);
2847 if (ia32_mode_needs_gp_reg(mode)) {
2848 set_irn_mode(new_node, mode_Iu);
2854 static ir_node *create_Fucom(ir_node *node)
2856 dbg_info *dbgi = get_irn_dbg_info(node);
2857 ir_node *block = get_nodes_block(node);
2858 ir_node *new_block = be_transform_node(block);
2859 ir_node *left = get_Cmp_left(node);
2860 ir_node *new_left = be_transform_node(left);
2861 ir_node *right = get_Cmp_right(node);
2865 if (ia32_cg_config.use_fucomi) {
2866 new_right = be_transform_node(right);
2867 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2869 set_ia32_commutative(new_node);
2870 SET_IA32_ORIG_NODE(new_node, node);
2872 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2873 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2875 new_right = be_transform_node(right);
2876 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2879 set_ia32_commutative(new_node);
2881 SET_IA32_ORIG_NODE(new_node, node);
2883 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2884 SET_IA32_ORIG_NODE(new_node, node);
2890 static ir_node *create_Ucomi(ir_node *node)
2892 dbg_info *dbgi = get_irn_dbg_info(node);
2893 ir_node *src_block = get_nodes_block(node);
2894 ir_node *new_block = be_transform_node(src_block);
2895 ir_node *left = get_Cmp_left(node);
2896 ir_node *right = get_Cmp_right(node);
2898 ia32_address_mode_t am;
2899 ia32_address_t *addr = &am.addr;
2901 match_arguments(&am, src_block, left, right, NULL,
2902 match_commutative | match_am);
2904 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2905 addr->mem, am.new_op1, am.new_op2,
2907 set_am_attributes(new_node, &am);
2909 SET_IA32_ORIG_NODE(new_node, node);
2911 new_node = fix_mem_proj(new_node, &am);
2917 * returns true if it is assured, that the upper bits of a node are "clean"
2918 * which means for a 16 or 8 bit value, that the upper bits in the register
2919 * are 0 for unsigned and a copy of the last significant bit for signed
2922 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2924 assert(ia32_mode_needs_gp_reg(mode));
2925 if (get_mode_size_bits(mode) >= 32)
2928 if (is_Proj(transformed_node))
2929 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2931 switch (get_ia32_irn_opcode(transformed_node)) {
2932 case iro_ia32_Conv_I2I:
2933 case iro_ia32_Conv_I2I8Bit: {
2934 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2935 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2937 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2944 if (mode_is_signed(mode)) {
2945 return false; /* TODO handle signed modes */
2947 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2948 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2949 const ia32_immediate_attr_t *attr
2950 = get_ia32_immediate_attr_const(right);
2951 if (attr->symconst == 0 &&
2952 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2956 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2960 /* TODO too conservative if shift amount is constant */
2961 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2964 if (!mode_is_signed(mode)) {
2966 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2967 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2969 /* TODO if one is known to be zero extended, then || is sufficient */
2974 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2975 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2977 case iro_ia32_Const:
2978 case iro_ia32_Immediate: {
2979 const ia32_immediate_attr_t *attr =
2980 get_ia32_immediate_attr_const(transformed_node);
2981 if (mode_is_signed(mode)) {
2982 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2983 return shifted == 0 || shifted == -1;
2985 unsigned long shifted = (unsigned long)attr->offset;
2986 shifted >>= get_mode_size_bits(mode)-1;
2988 return shifted == 0;
2998 * Generate code for a Cmp.
3000 static ir_node *gen_Cmp(ir_node *node)
3002 dbg_info *dbgi = get_irn_dbg_info(node);
3003 ir_node *block = get_nodes_block(node);
3004 ir_node *new_block = be_transform_node(block);
3005 ir_node *left = get_Cmp_left(node);
3006 ir_node *right = get_Cmp_right(node);
3007 ir_mode *cmp_mode = get_irn_mode(left);
3009 ia32_address_mode_t am;
3010 ia32_address_t *addr = &am.addr;
3012 if (mode_is_float(cmp_mode)) {
3013 if (ia32_cg_config.use_sse2) {
3014 return create_Ucomi(node);
3016 return create_Fucom(node);
3020 assert(ia32_mode_needs_gp_reg(cmp_mode));
3022 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3023 if (is_Const_0(right) &&
3025 get_irn_n_edges(left) == 1) {
3026 /* Test(and_left, and_right) */
3027 ir_node *and_left = get_And_left(left);
3028 ir_node *and_right = get_And_right(left);
3030 /* matze: code here used mode instead of cmd_mode, I think it is always
3031 * the same as cmp_mode, but I leave this here to see if this is really
3034 assert(get_irn_mode(and_left) == cmp_mode);
3036 match_arguments(&am, block, and_left, and_right, NULL,
3038 match_am | match_8bit_am | match_16bit_am |
3039 match_am_and_immediates | match_immediate);
3041 /* use 32bit compare mode if possible since the opcode is smaller */
3042 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3043 upper_bits_clean(am.new_op2, cmp_mode)) {
3044 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3047 if (get_mode_size_bits(cmp_mode) == 8) {
3048 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3049 addr->index, addr->mem,
3050 am.new_op1, am.new_op2,
3053 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base,
3054 addr->index, addr->mem, am.new_op1,
3055 am.new_op2, am.ins_permuted);
3058 /* Cmp(left, right) */
3059 match_arguments(&am, block, left, right, NULL,
3060 match_commutative | match_am | match_8bit_am |
3061 match_16bit_am | match_am_and_immediates |
3063 /* use 32bit compare mode if possible since the opcode is smaller */
3064 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3065 upper_bits_clean(am.new_op2, cmp_mode)) {
3066 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3069 if (get_mode_size_bits(cmp_mode) == 8) {
3070 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3071 addr->index, addr->mem, am.new_op1,
3072 am.new_op2, am.ins_permuted);
3074 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3075 addr->mem, am.new_op1, am.new_op2,
3079 set_am_attributes(new_node, &am);
3080 set_ia32_ls_mode(new_node, cmp_mode);
3082 SET_IA32_ORIG_NODE(new_node, node);
3084 new_node = fix_mem_proj(new_node, &am);
3089 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3090 ia32_condition_code_t cc)
3092 dbg_info *dbgi = get_irn_dbg_info(node);
3093 ir_node *block = get_nodes_block(node);
3094 ir_node *new_block = be_transform_node(block);
3095 ir_node *val_true = get_Mux_true(node);
3096 ir_node *val_false = get_Mux_false(node);
3098 ia32_address_mode_t am;
3099 ia32_address_t *addr;
3101 assert(ia32_cg_config.use_cmov);
3102 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3106 match_arguments(&am, block, val_false, val_true, flags,
3107 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3109 if (am.ins_permuted)
3110 cc = ia32_negate_condition_code(cc);
3112 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3113 addr->mem, am.new_op1, am.new_op2, new_flags,
3115 set_am_attributes(new_node, &am);
3117 SET_IA32_ORIG_NODE(new_node, node);
3119 new_node = fix_mem_proj(new_node, &am);
3125 * Creates a ia32 Setcc instruction.
3127 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3128 ir_node *flags, ia32_condition_code_t cc,
3131 ir_mode *mode = get_irn_mode(orig_node);
3134 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3135 SET_IA32_ORIG_NODE(new_node, orig_node);
3137 /* we might need to conv the result up */
3138 if (get_mode_size_bits(mode) > 8) {
3139 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3140 nomem, new_node, mode_Bu);
3141 SET_IA32_ORIG_NODE(new_node, orig_node);
3148 * Create instruction for an unsigned Difference or Zero.
3150 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3152 ir_mode *mode = get_irn_mode(psi);
3162 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3163 match_mode_neutral | match_am | match_immediate | match_two_users);
3165 block = get_nodes_block(new_node);
3167 if (is_Proj(new_node)) {
3168 sub = get_Proj_pred(new_node);
3171 set_irn_mode(sub, mode_T);
3172 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3174 assert(is_ia32_Sub(sub));
3175 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3177 dbgi = get_irn_dbg_info(psi);
3178 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3179 set_ia32_ls_mode(sbb, mode_Iu);
3180 notn = new_bd_ia32_Not(dbgi, block, sbb);
3182 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3183 set_ia32_ls_mode(new_node, mode_Iu);
3184 set_ia32_commutative(new_node);
3189 * Create an const array of two float consts.
3191 * @param c0 the first constant
3192 * @param c1 the second constant
3193 * @param new_mode IN/OUT for the mode of the constants, if NULL
3194 * smallest possible mode will be used
3196 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3199 ir_mode *mode = *new_mode;
3201 ir_initializer_t *initializer;
3202 ir_tarval *tv0 = get_Const_tarval(c0);
3203 ir_tarval *tv1 = get_Const_tarval(c1);
3206 /* detect the best mode for the constants */
3207 mode = get_tarval_mode(tv0);
3209 if (mode != mode_F) {
3210 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3211 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3213 tv0 = tarval_convert_to(tv0, mode);
3214 tv1 = tarval_convert_to(tv1, mode);
3215 } else if (mode != mode_D) {
3216 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3217 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3219 tv0 = tarval_convert_to(tv0, mode);
3220 tv1 = tarval_convert_to(tv1, mode);
3227 tp = ia32_get_prim_type(mode);
3228 tp = ia32_create_float_array(tp);
3230 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3232 set_entity_ld_ident(ent, get_entity_ident(ent));
3233 set_entity_visibility(ent, ir_visibility_private);
3234 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3236 initializer = create_initializer_compound(2);
3238 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3239 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3241 set_entity_initializer(ent, initializer);
3248 * Possible transformations for creating a Setcc.
3250 enum setcc_transform_insn {
3263 typedef struct setcc_transform {
3265 ia32_condition_code_t cc;
3267 enum setcc_transform_insn transform;
3271 } setcc_transform_t;
3274 * Setcc can only handle 0 and 1 result.
3275 * Find a transformation that creates 0 and 1 from
3278 static void find_const_transform(ia32_condition_code_t cc,
3279 ir_tarval *t, ir_tarval *f,
3280 setcc_transform_t *res)
3286 if (tarval_is_null(t)) {
3290 cc = ia32_negate_condition_code(cc);
3291 } else if (tarval_cmp(t, f) == ir_relation_less) {
3292 // now, t is the bigger one
3296 cc = ia32_negate_condition_code(cc);
3300 if (! tarval_is_null(f)) {
3301 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3304 res->steps[step].transform = SETCC_TR_ADD;
3306 if (t == tarval_bad)
3307 panic("constant subtract failed");
3308 if (! tarval_is_long(f))
3309 panic("tarval is not long");
3311 res->steps[step].val = get_tarval_long(f);
3313 f = tarval_sub(f, f, NULL);
3314 assert(tarval_is_null(f));
3317 if (tarval_is_one(t)) {
3318 res->steps[step].transform = SETCC_TR_SET;
3319 res->num_steps = ++step;
3323 if (tarval_is_minus_one(t)) {
3324 res->steps[step].transform = SETCC_TR_NEG;
3326 res->steps[step].transform = SETCC_TR_SET;
3327 res->num_steps = ++step;
3330 if (tarval_is_long(t)) {
3331 long v = get_tarval_long(t);
3333 res->steps[step].val = 0;
3336 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3338 res->steps[step].transform = SETCC_TR_LEAxx;
3339 res->steps[step].scale = 3; /* (a << 3) + a */
3342 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3344 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3345 res->steps[step].scale = 3; /* (a << 3) */
3348 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3350 res->steps[step].transform = SETCC_TR_LEAxx;
3351 res->steps[step].scale = 2; /* (a << 2) + a */
3354 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3356 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3357 res->steps[step].scale = 2; /* (a << 2) */
3360 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3362 res->steps[step].transform = SETCC_TR_LEAxx;
3363 res->steps[step].scale = 1; /* (a << 1) + a */
3366 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3368 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3369 res->steps[step].scale = 1; /* (a << 1) */
3372 res->num_steps = step;
3375 if (! tarval_is_single_bit(t)) {
3376 res->steps[step].transform = SETCC_TR_AND;
3377 res->steps[step].val = v;
3379 res->steps[step].transform = SETCC_TR_NEG;
3381 int val = get_tarval_lowest_bit(t);
3384 res->steps[step].transform = SETCC_TR_SHL;
3385 res->steps[step].scale = val;
3389 res->steps[step].transform = SETCC_TR_SET;
3390 res->num_steps = ++step;
3393 panic("tarval is not long");
3397 * Transforms a Mux node into some code sequence.
3399 * @return The transformed node.
3401 static ir_node *gen_Mux(ir_node *node)
3403 dbg_info *dbgi = get_irn_dbg_info(node);
3404 ir_node *block = get_nodes_block(node);
3405 ir_node *new_block = be_transform_node(block);
3406 ir_node *mux_true = get_Mux_true(node);
3407 ir_node *mux_false = get_Mux_false(node);
3408 ir_node *sel = get_Mux_sel(node);
3409 ir_mode *mode = get_irn_mode(node);
3413 ia32_condition_code_t cc;
3415 assert(get_irn_mode(sel) == mode_b);
3417 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3419 if (ia32_mode_needs_gp_reg(mode)) {
3420 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3423 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3424 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3428 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3429 if (mode_is_float(mode)) {
3430 ir_node *cmp_left = get_Cmp_left(sel);
3431 ir_node *cmp_right = get_Cmp_right(sel);
3432 ir_relation relation = get_Cmp_relation(sel);
3434 if (ia32_cg_config.use_sse2) {
3435 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3436 if (cmp_left == mux_true && cmp_right == mux_false) {
3437 /* Mux(a <= b, a, b) => MIN */
3438 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3439 match_commutative | match_am | match_two_users);
3440 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3441 /* Mux(a <= b, b, a) => MAX */
3442 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3443 match_commutative | match_am | match_two_users);
3445 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3446 if (cmp_left == mux_true && cmp_right == mux_false) {
3447 /* Mux(a >= b, a, b) => MAX */
3448 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3449 match_commutative | match_am | match_two_users);
3450 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3451 /* Mux(a >= b, b, a) => MIN */
3452 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3453 match_commutative | match_am | match_two_users);
3458 if (is_Const(mux_true) && is_Const(mux_false)) {
3459 ia32_address_mode_t am;
3464 flags = get_flags_node(sel, &cc);
3465 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3467 if (ia32_cg_config.use_sse2) {
3468 /* cannot load from different mode on SSE */
3471 /* x87 can load any mode */
3475 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3477 if (new_mode == mode_F) {
3479 } else if (new_mode == mode_D) {
3481 } else if (new_mode == ia32_mode_E) {
3482 /* arg, shift 16 NOT supported */
3484 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3486 panic("Unsupported constant size");
3489 am.ls_mode = new_mode;
3490 am.addr.base = get_symconst_base();
3491 am.addr.index = new_node;
3492 am.addr.mem = nomem;
3494 am.addr.scale = scale;
3495 am.addr.use_frame = 0;
3496 am.addr.tls_segment = false;
3497 am.addr.frame_entity = NULL;
3498 am.addr.symconst_sign = 0;
3499 am.mem_proj = am.addr.mem;
3500 am.op_type = ia32_AddrModeS;
3503 am.pinned = op_pin_state_floats;
3505 am.ins_permuted = false;
3507 if (ia32_cg_config.use_sse2)
3508 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3510 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3511 set_am_attributes(load, &am);
3513 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3515 panic("cannot transform floating point Mux");
3518 assert(ia32_mode_needs_gp_reg(mode));
3521 ir_node *cmp_left = get_Cmp_left(sel);
3522 ir_node *cmp_right = get_Cmp_right(sel);
3523 ir_relation relation = get_Cmp_relation(sel);
3524 ir_node *val_true = mux_true;
3525 ir_node *val_false = mux_false;
3527 if (is_Const(val_true) && is_Const_null(val_true)) {
3528 ir_node *tmp = val_false;
3529 val_false = val_true;
3531 relation = get_negated_relation(relation);
3533 if (is_Const_0(val_false) && is_Sub(val_true)) {
3534 if ((relation & ir_relation_greater)
3535 && get_Sub_left(val_true) == cmp_left
3536 && get_Sub_right(val_true) == cmp_right) {
3537 return create_doz(node, cmp_left, cmp_right);
3539 if ((relation & ir_relation_less)
3540 && get_Sub_left(val_true) == cmp_right
3541 && get_Sub_right(val_true) == cmp_left) {
3542 return create_doz(node, cmp_right, cmp_left);
3547 flags = get_flags_node(sel, &cc);
3549 if (is_Const(mux_true) && is_Const(mux_false)) {
3550 /* both are const, good */
3551 ir_tarval *tv_true = get_Const_tarval(mux_true);
3552 ir_tarval *tv_false = get_Const_tarval(mux_false);
3553 setcc_transform_t res;
3556 find_const_transform(cc, tv_true, tv_false, &res);
3558 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3561 switch (res.steps[step].transform) {
3563 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3564 add_ia32_am_offs_int(new_node, res.steps[step].val);
3566 case SETCC_TR_ADDxx:
3567 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3570 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3571 set_ia32_am_scale(new_node, res.steps[step].scale);
3572 set_ia32_am_offs_int(new_node, res.steps[step].val);
3574 case SETCC_TR_LEAxx:
3575 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3576 set_ia32_am_scale(new_node, res.steps[step].scale);
3577 set_ia32_am_offs_int(new_node, res.steps[step].val);
3580 imm = ia32_immediate_from_long(res.steps[step].scale);
3581 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3584 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3587 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3590 imm = ia32_immediate_from_long(res.steps[step].val);
3591 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3594 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3597 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3600 panic("unknown setcc transform");
3604 new_node = create_CMov(node, sel, flags, cc);
3611 * Create a conversion from x87 state register to general purpose.
3613 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3615 ir_node *block = be_transform_node(get_nodes_block(node));
3616 ir_node *op = get_Conv_op(node);
3617 ir_node *new_op = be_transform_node(op);
3618 ir_graph *irg = current_ir_graph;
3619 dbg_info *dbgi = get_irn_dbg_info(node);
3620 ir_mode *mode = get_irn_mode(node);
3621 ir_node *frame = get_irg_frame(irg);
3622 ir_node *fist, *load, *mem;
3624 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3625 set_irn_pinned(fist, op_pin_state_floats);
3626 set_ia32_use_frame(fist);
3627 set_ia32_op_type(fist, ia32_AddrModeD);
3629 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3630 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3632 assert(get_mode_size_bits(mode) <= 32);
3633 /* exception we can only store signed 32 bit integers, so for unsigned
3634 we store a 64bit (signed) integer and load the lower bits */
3635 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3636 set_ia32_ls_mode(fist, mode_Ls);
3638 set_ia32_ls_mode(fist, mode_Is);
3640 SET_IA32_ORIG_NODE(fist, node);
3643 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3645 set_irn_pinned(load, op_pin_state_floats);
3646 set_ia32_use_frame(load);
3647 set_ia32_op_type(load, ia32_AddrModeS);
3648 set_ia32_ls_mode(load, mode_Is);
3649 if (get_ia32_ls_mode(fist) == mode_Ls) {
3650 ia32_attr_t *attr = get_ia32_attr(load);
3651 attr->data.need_64bit_stackent = 1;
3653 ia32_attr_t *attr = get_ia32_attr(load);
3654 attr->data.need_32bit_stackent = 1;
3656 SET_IA32_ORIG_NODE(load, node);
3658 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3662 * Creates a x87 strict Conv by placing a Store and a Load
3664 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3666 ir_node *block = get_nodes_block(node);
3667 ir_graph *irg = get_Block_irg(block);
3668 dbg_info *dbgi = get_irn_dbg_info(node);
3669 ir_node *frame = get_irg_frame(irg);
3671 ir_node *store, *load;
3674 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3675 set_ia32_use_frame(store);
3676 set_ia32_op_type(store, ia32_AddrModeD);
3677 SET_IA32_ORIG_NODE(store, node);
3679 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3681 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3682 set_ia32_use_frame(load);
3683 set_ia32_op_type(load, ia32_AddrModeS);
3684 SET_IA32_ORIG_NODE(load, node);
3686 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_vfld_res);
3690 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3691 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3693 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3695 func = get_mode_size_bits(mode) == 8 ?
3696 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3697 return func(dbgi, block, base, index, mem, val, mode);
3701 * Create a conversion from general purpose to x87 register
3703 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3705 ir_node *src_block = get_nodes_block(node);
3706 ir_node *block = be_transform_node(src_block);
3707 ir_graph *irg = get_Block_irg(block);
3708 dbg_info *dbgi = get_irn_dbg_info(node);
3709 ir_node *op = get_Conv_op(node);
3710 ir_node *new_op = NULL;
3712 ir_mode *store_mode;
3718 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3719 if (possible_int_mode_for_fp(src_mode)) {
3720 ia32_address_mode_t am;
3722 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3723 if (am.op_type == ia32_AddrModeS) {
3724 ia32_address_t *addr = &am.addr;
3726 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3727 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3729 set_am_attributes(fild, &am);
3730 SET_IA32_ORIG_NODE(fild, node);
3732 fix_mem_proj(fild, &am);
3737 if (new_op == NULL) {
3738 new_op = be_transform_node(op);
3741 mode = get_irn_mode(op);
3743 /* first convert to 32 bit signed if necessary */
3744 if (get_mode_size_bits(src_mode) < 32) {
3745 if (!upper_bits_clean(new_op, src_mode)) {
3746 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3747 SET_IA32_ORIG_NODE(new_op, node);
3752 assert(get_mode_size_bits(mode) == 32);
3755 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3757 set_ia32_use_frame(store);
3758 set_ia32_op_type(store, ia32_AddrModeD);
3759 set_ia32_ls_mode(store, mode_Iu);
3761 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3763 /* exception for 32bit unsigned, do a 64bit spill+load */
3764 if (!mode_is_signed(mode)) {
3767 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3769 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3770 noreg_GP, nomem, zero_const);
3771 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3773 set_ia32_use_frame(zero_store);
3774 set_ia32_op_type(zero_store, ia32_AddrModeD);
3775 add_ia32_am_offs_int(zero_store, 4);
3776 set_ia32_ls_mode(zero_store, mode_Iu);
3778 in[0] = zero_store_mem;
3781 store_mem = new_rd_Sync(dbgi, block, 2, in);
3782 store_mode = mode_Ls;
3784 store_mode = mode_Is;
3788 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3790 set_ia32_use_frame(fild);
3791 set_ia32_op_type(fild, ia32_AddrModeS);
3792 set_ia32_ls_mode(fild, store_mode);
3794 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3800 * Create a conversion from one integer mode into another one
3802 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3803 dbg_info *dbgi, ir_node *block, ir_node *op,
3806 ir_node *new_block = be_transform_node(block);
3808 ir_mode *smaller_mode;
3809 ia32_address_mode_t am;
3810 ia32_address_t *addr = &am.addr;
3813 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3814 smaller_mode = src_mode;
3816 smaller_mode = tgt_mode;
3819 #ifdef DEBUG_libfirm
3821 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3826 match_arguments(&am, block, NULL, op, NULL,
3827 match_am | match_8bit_am | match_16bit_am);
3829 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3830 /* unnecessary conv. in theory it shouldn't have been AM */
3831 assert(is_ia32_NoReg_GP(addr->base));
3832 assert(is_ia32_NoReg_GP(addr->index));
3833 assert(is_NoMem(addr->mem));
3834 assert(am.addr.offset == 0);
3835 assert(am.addr.symconst_ent == NULL);
3839 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3840 addr->mem, am.new_op2, smaller_mode);
3841 set_am_attributes(new_node, &am);
3842 /* match_arguments assume that out-mode = in-mode, this isn't true here
3844 set_ia32_ls_mode(new_node, smaller_mode);
3845 SET_IA32_ORIG_NODE(new_node, node);
3846 new_node = fix_mem_proj(new_node, &am);
3851 * Transforms a Conv node.
3853 * @return The created ia32 Conv node
3855 static ir_node *gen_Conv(ir_node *node)
3857 ir_node *block = get_nodes_block(node);
3858 ir_node *new_block = be_transform_node(block);
3859 ir_node *op = get_Conv_op(node);
3860 ir_node *new_op = NULL;
3861 dbg_info *dbgi = get_irn_dbg_info(node);
3862 ir_mode *src_mode = get_irn_mode(op);
3863 ir_mode *tgt_mode = get_irn_mode(node);
3864 int src_bits = get_mode_size_bits(src_mode);
3865 int tgt_bits = get_mode_size_bits(tgt_mode);
3866 ir_node *res = NULL;
3868 assert(!mode_is_int(src_mode) || src_bits <= 32);
3869 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3871 /* modeB -> X should already be lowered by the lower_mode_b pass */
3872 if (src_mode == mode_b) {
3873 panic("ConvB not lowered %+F", node);
3876 if (src_mode == tgt_mode) {
3877 if (get_Conv_strict(node)) {
3878 if (ia32_cg_config.use_sse2) {
3879 /* when we are in SSE mode, we can kill all strict no-op conversion */
3880 return be_transform_node(op);
3883 /* this should be optimized already, but who knows... */
3884 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3885 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3886 return be_transform_node(op);
3890 if (mode_is_float(src_mode)) {
3891 new_op = be_transform_node(op);
3892 /* we convert from float ... */
3893 if (mode_is_float(tgt_mode)) {
3895 if (ia32_cg_config.use_sse2) {
3896 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3897 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3899 set_ia32_ls_mode(res, tgt_mode);
3901 if (get_Conv_strict(node)) {
3902 /* if fp_no_float_fold is not set then we assume that we
3903 * don't have any float operations in a non
3904 * mode_float_arithmetic mode and can skip strict upconvs */
3905 if (src_bits < tgt_bits) {
3906 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3909 res = gen_x87_strict_conv(tgt_mode, new_op);
3910 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3914 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3919 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3920 if (ia32_cg_config.use_sse2) {
3921 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3923 set_ia32_ls_mode(res, src_mode);
3925 return gen_x87_fp_to_gp(node);
3929 /* we convert from int ... */
3930 if (mode_is_float(tgt_mode)) {
3932 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3933 if (ia32_cg_config.use_sse2) {
3934 new_op = be_transform_node(op);
3935 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3937 set_ia32_ls_mode(res, tgt_mode);
3939 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3940 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3941 res = gen_x87_gp_to_fp(node, src_mode);
3943 /* we need a strict-Conv, if the int mode has more bits than the
3945 if (float_mantissa < int_mantissa) {
3946 res = gen_x87_strict_conv(tgt_mode, res);
3947 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3951 } else if (tgt_mode == mode_b) {
3952 /* mode_b lowering already took care that we only have 0/1 values */
3953 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3954 src_mode, tgt_mode));
3955 return be_transform_node(op);
3958 if (src_bits == tgt_bits) {
3959 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3960 src_mode, tgt_mode));
3961 return be_transform_node(op);
3964 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3972 static ir_node *create_immediate_or_transform(ir_node *node,
3973 char immediate_constraint_type)
3975 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3976 if (new_node == NULL) {
3977 new_node = be_transform_node(node);
3983 * Transforms a FrameAddr into an ia32 Add.
3985 static ir_node *gen_be_FrameAddr(ir_node *node)
3987 ir_node *block = be_transform_node(get_nodes_block(node));
3988 ir_node *op = be_get_FrameAddr_frame(node);
3989 ir_node *new_op = be_transform_node(op);
3990 dbg_info *dbgi = get_irn_dbg_info(node);
3993 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3994 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3995 set_ia32_use_frame(new_node);
3997 SET_IA32_ORIG_NODE(new_node, node);
4003 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
4005 static ir_node *gen_be_Return(ir_node *node)
4007 ir_graph *irg = current_ir_graph;
4008 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
4009 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
4010 ir_node *new_ret_val = be_transform_node(ret_val);
4011 ir_node *new_ret_mem = be_transform_node(ret_mem);
4012 ir_entity *ent = get_irg_entity(irg);
4013 ir_type *tp = get_entity_type(ent);
4014 dbg_info *dbgi = get_irn_dbg_info(node);
4015 ir_node *block = be_transform_node(get_nodes_block(node));
4029 assert(ret_val != NULL);
4030 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4031 return be_duplicate_node(node);
4034 res_type = get_method_res_type(tp, 0);
4036 if (! is_Primitive_type(res_type)) {
4037 return be_duplicate_node(node);
4040 mode = get_type_mode(res_type);
4041 if (! mode_is_float(mode)) {
4042 return be_duplicate_node(node);
4045 assert(get_method_n_ress(tp) == 1);
4047 frame = get_irg_frame(irg);
4049 /* store xmm0 onto stack */
4050 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4051 new_ret_mem, new_ret_val);
4052 set_ia32_ls_mode(sse_store, mode);
4053 set_ia32_op_type(sse_store, ia32_AddrModeD);
4054 set_ia32_use_frame(sse_store);
4055 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4057 /* load into x87 register */
4058 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4059 set_ia32_op_type(fld, ia32_AddrModeS);
4060 set_ia32_use_frame(fld);
4062 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4063 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4065 /* create a new return */
4066 arity = get_irn_arity(node);
4067 in = ALLOCAN(ir_node*, arity);
4068 pop = be_Return_get_pop(node);
4069 for (i = 0; i < arity; ++i) {
4070 ir_node *op = get_irn_n(node, i);
4071 if (op == ret_val) {
4073 } else if (op == ret_mem) {
4076 in[i] = be_transform_node(op);
4079 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4080 copy_node_attr(irg, node, new_node);
4086 * Transform a be_AddSP into an ia32_SubSP.
4088 static ir_node *gen_be_AddSP(ir_node *node)
4090 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4091 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4093 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4094 match_am | match_immediate);
4095 assert(is_ia32_SubSP(new_node));
4096 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4097 &ia32_registers[REG_ESP]);
4102 * Transform a be_SubSP into an ia32_AddSP
4104 static ir_node *gen_be_SubSP(ir_node *node)
4106 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4107 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4109 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4110 match_am | match_immediate);
4111 assert(is_ia32_AddSP(new_node));
4112 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4113 &ia32_registers[REG_ESP]);
4118 * Change some phi modes
4120 static ir_node *gen_Phi(ir_node *node)
4122 const arch_register_req_t *req;
4123 ir_node *block = be_transform_node(get_nodes_block(node));
4124 ir_graph *irg = current_ir_graph;
4125 dbg_info *dbgi = get_irn_dbg_info(node);
4126 ir_mode *mode = get_irn_mode(node);
4129 if (ia32_mode_needs_gp_reg(mode)) {
4130 /* we shouldn't have any 64bit stuff around anymore */
4131 assert(get_mode_size_bits(mode) <= 32);
4132 /* all integer operations are on 32bit registers now */
4134 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4135 } else if (mode_is_float(mode)) {
4136 if (ia32_cg_config.use_sse2) {
4138 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4141 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4144 req = arch_no_register_req;
4147 /* phi nodes allow loops, so we use the old arguments for now
4148 * and fix this later */
4149 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4150 get_irn_in(node) + 1);
4151 copy_node_attr(irg, node, phi);
4152 be_duplicate_deps(node, phi);
4154 arch_set_irn_register_req_out(phi, 0, req);
4156 be_enqueue_preds(node);
4161 static ir_node *gen_Jmp(ir_node *node)
4163 ir_node *block = get_nodes_block(node);
4164 ir_node *new_block = be_transform_node(block);
4165 dbg_info *dbgi = get_irn_dbg_info(node);
4168 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4169 SET_IA32_ORIG_NODE(new_node, node);
4177 static ir_node *gen_IJmp(ir_node *node)
4179 ir_node *block = get_nodes_block(node);
4180 ir_node *new_block = be_transform_node(block);
4181 dbg_info *dbgi = get_irn_dbg_info(node);
4182 ir_node *op = get_IJmp_target(node);
4184 ia32_address_mode_t am;
4185 ia32_address_t *addr = &am.addr;
4187 assert(get_irn_mode(op) == mode_P);
4189 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4191 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4192 addr->mem, am.new_op2);
4193 set_am_attributes(new_node, &am);
4194 SET_IA32_ORIG_NODE(new_node, node);
4196 new_node = fix_mem_proj(new_node, &am);
4201 static ir_node *gen_ia32_l_Add(ir_node *node)
4203 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4204 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4205 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4206 match_commutative | match_am | match_immediate |
4207 match_mode_neutral);
4209 if (is_Proj(lowered)) {
4210 lowered = get_Proj_pred(lowered);
4212 assert(is_ia32_Add(lowered));
4213 set_irn_mode(lowered, mode_T);
4219 static ir_node *gen_ia32_l_Adc(ir_node *node)
4221 return gen_binop_flags(node, new_bd_ia32_Adc,
4222 match_commutative | match_am | match_immediate |
4223 match_mode_neutral);
4227 * Transforms a l_MulS into a "real" MulS node.
4229 * @return the created ia32 Mul node
4231 static ir_node *gen_ia32_l_Mul(ir_node *node)
4233 ir_node *left = get_binop_left(node);
4234 ir_node *right = get_binop_right(node);
4236 return gen_binop(node, left, right, new_bd_ia32_Mul,
4237 match_commutative | match_am | match_mode_neutral);
4241 * Transforms a l_IMulS into a "real" IMul1OPS node.
4243 * @return the created ia32 IMul1OP node
4245 static ir_node *gen_ia32_l_IMul(ir_node *node)
4247 ir_node *left = get_binop_left(node);
4248 ir_node *right = get_binop_right(node);
4250 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4251 match_commutative | match_am | match_mode_neutral);
4254 static ir_node *gen_ia32_l_Sub(ir_node *node)
4256 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4257 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4258 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4259 match_am | match_immediate | match_mode_neutral);
4261 if (is_Proj(lowered)) {
4262 lowered = get_Proj_pred(lowered);
4264 assert(is_ia32_Sub(lowered));
4265 set_irn_mode(lowered, mode_T);
4271 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4273 return gen_binop_flags(node, new_bd_ia32_Sbb,
4274 match_am | match_immediate | match_mode_neutral);
4277 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4279 ir_node *src_block = get_nodes_block(node);
4280 ir_node *block = be_transform_node(src_block);
4281 ir_graph *irg = current_ir_graph;
4282 dbg_info *dbgi = get_irn_dbg_info(node);
4283 ir_node *frame = get_irg_frame(irg);
4284 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4285 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4286 ir_node *new_val_low = be_transform_node(val_low);
4287 ir_node *new_val_high = be_transform_node(val_high);
4289 ir_node *sync, *fild, *res;
4291 ir_node *store_high;
4295 if (ia32_cg_config.use_sse2) {
4296 panic("ia32_l_LLtoFloat not implemented for SSE2");
4300 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4302 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4304 SET_IA32_ORIG_NODE(store_low, node);
4305 SET_IA32_ORIG_NODE(store_high, node);
4307 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4308 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4310 set_ia32_use_frame(store_low);
4311 set_ia32_use_frame(store_high);
4312 set_ia32_op_type(store_low, ia32_AddrModeD);
4313 set_ia32_op_type(store_high, ia32_AddrModeD);
4314 set_ia32_ls_mode(store_low, mode_Iu);
4315 set_ia32_ls_mode(store_high, mode_Is);
4316 add_ia32_am_offs_int(store_high, 4);
4320 sync = new_rd_Sync(dbgi, block, 2, in);
4323 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4325 set_ia32_use_frame(fild);
4326 set_ia32_op_type(fild, ia32_AddrModeS);
4327 set_ia32_ls_mode(fild, mode_Ls);
4329 SET_IA32_ORIG_NODE(fild, node);
4331 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4333 if (! mode_is_signed(get_irn_mode(val_high))) {
4334 ia32_address_mode_t am;
4336 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4339 am.addr.base = get_symconst_base();
4340 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4341 am.addr.mem = nomem;
4344 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4345 am.addr.tls_segment = false;
4346 am.addr.use_frame = 0;
4347 am.addr.frame_entity = NULL;
4348 am.addr.symconst_sign = 0;
4349 am.ls_mode = mode_F;
4350 am.mem_proj = nomem;
4351 am.op_type = ia32_AddrModeS;
4353 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4354 am.pinned = op_pin_state_floats;
4356 am.ins_permuted = false;
4358 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4359 am.new_op1, am.new_op2, get_fpcw());
4360 set_am_attributes(fadd, &am);
4362 set_irn_mode(fadd, mode_T);
4363 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4368 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4370 ir_node *src_block = get_nodes_block(node);
4371 ir_node *block = be_transform_node(src_block);
4372 ir_graph *irg = get_Block_irg(block);
4373 dbg_info *dbgi = get_irn_dbg_info(node);
4374 ir_node *frame = get_irg_frame(irg);
4375 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4376 ir_node *new_val = be_transform_node(val);
4379 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4380 SET_IA32_ORIG_NODE(fist, node);
4381 set_ia32_use_frame(fist);
4382 set_ia32_op_type(fist, ia32_AddrModeD);
4383 set_ia32_ls_mode(fist, mode_Ls);
4385 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4386 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4389 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4391 ir_node *block = be_transform_node(get_nodes_block(node));
4392 ir_graph *irg = get_Block_irg(block);
4393 ir_node *pred = get_Proj_pred(node);
4394 ir_node *new_pred = be_transform_node(pred);
4395 ir_node *frame = get_irg_frame(irg);
4396 dbg_info *dbgi = get_irn_dbg_info(node);
4397 long pn = get_Proj_proj(node);
4402 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4403 SET_IA32_ORIG_NODE(load, node);
4404 set_ia32_use_frame(load);
4405 set_ia32_op_type(load, ia32_AddrModeS);
4406 set_ia32_ls_mode(load, mode_Iu);
4407 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4408 * 32 bit from it with this particular load */
4409 attr = get_ia32_attr(load);
4410 attr->data.need_64bit_stackent = 1;
4412 if (pn == pn_ia32_l_FloattoLL_res_high) {
4413 add_ia32_am_offs_int(load, 4);
4415 assert(pn == pn_ia32_l_FloattoLL_res_low);
4418 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4424 * Transform the Projs of an AddSP.
4426 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4428 ir_node *pred = get_Proj_pred(node);
4429 ir_node *new_pred = be_transform_node(pred);
4430 dbg_info *dbgi = get_irn_dbg_info(node);
4431 long proj = get_Proj_proj(node);
4433 if (proj == pn_be_AddSP_sp) {
4434 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4435 pn_ia32_SubSP_stack);
4436 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4438 } else if (proj == pn_be_AddSP_res) {
4439 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4440 pn_ia32_SubSP_addr);
4441 } else if (proj == pn_be_AddSP_M) {
4442 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4445 panic("No idea how to transform proj->AddSP");
4449 * Transform the Projs of a SubSP.
4451 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4453 ir_node *pred = get_Proj_pred(node);
4454 ir_node *new_pred = be_transform_node(pred);
4455 dbg_info *dbgi = get_irn_dbg_info(node);
4456 long proj = get_Proj_proj(node);
4458 if (proj == pn_be_SubSP_sp) {
4459 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4460 pn_ia32_AddSP_stack);
4461 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4463 } else if (proj == pn_be_SubSP_M) {
4464 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4467 panic("No idea how to transform proj->SubSP");
4471 * Transform and renumber the Projs from a Load.
4473 static ir_node *gen_Proj_Load(ir_node *node)
4476 ir_node *pred = get_Proj_pred(node);
4477 dbg_info *dbgi = get_irn_dbg_info(node);
4478 long proj = get_Proj_proj(node);
4480 /* loads might be part of source address mode matches, so we don't
4481 * transform the ProjMs yet (with the exception of loads whose result is
4484 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4487 /* this is needed, because sometimes we have loops that are only
4488 reachable through the ProjM */
4489 be_enqueue_preds(node);
4490 /* do it in 2 steps, to silence firm verifier */
4491 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4492 set_Proj_proj(res, pn_ia32_mem);
4496 /* renumber the proj */
4497 new_pred = be_transform_node(pred);
4498 if (is_ia32_Load(new_pred)) {
4499 switch ((pn_Load)proj) {
4501 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4503 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4504 case pn_Load_X_except:
4505 /* This Load might raise an exception. Mark it. */
4506 set_ia32_exc_label(new_pred, 1);
4507 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4508 case pn_Load_X_regular:
4509 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4511 } else if (is_ia32_Conv_I2I(new_pred) ||
4512 is_ia32_Conv_I2I8Bit(new_pred)) {
4513 set_irn_mode(new_pred, mode_T);
4514 switch ((pn_Load)proj) {
4516 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4518 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4519 case pn_Load_X_except:
4520 /* This Load might raise an exception. Mark it. */
4521 set_ia32_exc_label(new_pred, 1);
4522 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4523 case pn_Load_X_regular:
4524 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4526 } else if (is_ia32_xLoad(new_pred)) {
4527 switch ((pn_Load)proj) {
4529 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4531 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4532 case pn_Load_X_except:
4533 /* This Load might raise an exception. Mark it. */
4534 set_ia32_exc_label(new_pred, 1);
4535 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4536 case pn_Load_X_regular:
4537 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4539 } else if (is_ia32_vfld(new_pred)) {
4540 switch ((pn_Load)proj) {
4542 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4544 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4545 case pn_Load_X_except:
4546 /* This Load might raise an exception. Mark it. */
4547 set_ia32_exc_label(new_pred, 1);
4548 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4549 case pn_Load_X_regular:
4550 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4553 /* can happen for ProJMs when source address mode happened for the
4556 /* however it should not be the result proj, as that would mean the
4557 load had multiple users and should not have been used for
4559 if (proj != pn_Load_M) {
4560 panic("internal error: transformed node not a Load");
4562 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4565 panic("No idea how to transform Proj(Load) %+F", node);
4568 static ir_node *gen_Proj_Store(ir_node *node)
4570 ir_node *pred = get_Proj_pred(node);
4571 ir_node *new_pred = be_transform_node(pred);
4572 dbg_info *dbgi = get_irn_dbg_info(node);
4573 long pn = get_Proj_proj(node);
4575 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4576 switch ((pn_Store)pn) {
4578 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4579 case pn_Store_X_except:
4580 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4581 case pn_Store_X_regular:
4582 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4584 } else if (is_ia32_vfist(new_pred)) {
4585 switch ((pn_Store)pn) {
4587 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4588 case pn_Store_X_except:
4589 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4590 case pn_Store_X_regular:
4591 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4593 } else if (is_ia32_vfisttp(new_pred)) {
4594 switch ((pn_Store)pn) {
4596 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4597 case pn_Store_X_except:
4598 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4599 case pn_Store_X_regular:
4600 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4602 } else if (is_ia32_vfst(new_pred)) {
4603 switch ((pn_Store)pn) {
4605 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4606 case pn_Store_X_except:
4607 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4608 case pn_Store_X_regular:
4609 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4611 } else if (is_ia32_xStore(new_pred)) {
4612 switch ((pn_Store)pn) {
4614 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4615 case pn_Store_X_except:
4616 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4617 case pn_Store_X_regular:
4618 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4620 } else if (is_Sync(new_pred)) {
4621 /* hack for the case that gen_float_const_Store produced a Sync */
4622 if (pn == pn_Store_M) {
4625 panic("exception control flow for gen_float_const_Store not implemented yet");
4626 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4627 /* destination address mode */
4628 if (pn == pn_Store_M) {
4631 panic("exception control flow for destination AM not implemented yet");
4634 panic("No idea how to transform Proj(Store) %+F", node);
4638 * Transform and renumber the Projs from a Div or Mod instruction.
4640 static ir_node *gen_Proj_Div(ir_node *node)
4642 ir_node *pred = get_Proj_pred(node);
4643 ir_node *new_pred = be_transform_node(pred);
4644 dbg_info *dbgi = get_irn_dbg_info(node);
4645 long proj = get_Proj_proj(node);
4647 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4648 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4650 switch ((pn_Div)proj) {
4652 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4653 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4654 } else if (is_ia32_xDiv(new_pred)) {
4655 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4656 } else if (is_ia32_vfdiv(new_pred)) {
4657 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4659 panic("Div transformed to unexpected thing %+F", new_pred);
4662 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4663 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4664 } else if (is_ia32_xDiv(new_pred)) {
4665 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4666 } else if (is_ia32_vfdiv(new_pred)) {
4667 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4669 panic("Div transformed to unexpected thing %+F", new_pred);
4671 case pn_Div_X_except:
4672 set_ia32_exc_label(new_pred, 1);
4673 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4674 case pn_Div_X_regular:
4675 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4678 panic("No idea how to transform proj->Div");
4682 * Transform and renumber the Projs from a Div or Mod instruction.
4684 static ir_node *gen_Proj_Mod(ir_node *node)
4686 ir_node *pred = get_Proj_pred(node);
4687 ir_node *new_pred = be_transform_node(pred);
4688 dbg_info *dbgi = get_irn_dbg_info(node);
4689 long proj = get_Proj_proj(node);
4691 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4692 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4693 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4695 switch ((pn_Mod)proj) {
4697 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4699 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4700 case pn_Mod_X_except:
4701 set_ia32_exc_label(new_pred, 1);
4702 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4703 case pn_Mod_X_regular:
4704 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4706 panic("No idea how to transform proj->Mod");
4710 * Transform and renumber the Projs from a CopyB.
4712 static ir_node *gen_Proj_CopyB(ir_node *node)
4714 ir_node *pred = get_Proj_pred(node);
4715 ir_node *new_pred = be_transform_node(pred);
4716 dbg_info *dbgi = get_irn_dbg_info(node);
4717 long proj = get_Proj_proj(node);
4719 switch ((pn_CopyB)proj) {
4721 if (is_ia32_CopyB_i(new_pred)) {
4722 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4723 } else if (is_ia32_CopyB(new_pred)) {
4724 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4727 case pn_CopyB_X_regular:
4728 if (is_ia32_CopyB_i(new_pred)) {
4729 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4730 } else if (is_ia32_CopyB(new_pred)) {
4731 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4734 case pn_CopyB_X_except:
4735 if (is_ia32_CopyB_i(new_pred)) {
4736 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4737 } else if (is_ia32_CopyB(new_pred)) {
4738 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4743 panic("No idea how to transform proj->CopyB");
4746 static ir_node *gen_be_Call(ir_node *node)
4748 dbg_info *const dbgi = get_irn_dbg_info(node);
4749 ir_node *const src_block = get_nodes_block(node);
4750 ir_node *const block = be_transform_node(src_block);
4751 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4752 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4753 ir_node *const sp = be_transform_node(src_sp);
4754 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4755 ia32_address_mode_t am;
4756 ia32_address_t *const addr = &am.addr;
4761 ir_node * eax = noreg_GP;
4762 ir_node * ecx = noreg_GP;
4763 ir_node * edx = noreg_GP;
4764 unsigned const pop = be_Call_get_pop(node);
4765 ir_type *const call_tp = be_Call_get_type(node);
4766 int old_no_pic_adjust;
4767 int throws_exception = ir_throws_exception(node);
4769 /* Run the x87 simulator if the call returns a float value */
4770 if (get_method_n_ress(call_tp) > 0) {
4771 ir_type *const res_type = get_method_res_type(call_tp, 0);
4772 ir_mode *const res_mode = get_type_mode(res_type);
4774 if (res_mode != NULL && mode_is_float(res_mode)) {
4775 ir_graph *irg = current_ir_graph;
4776 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4777 irg_data->do_x87_sim = 1;
4781 /* We do not want be_Call direct calls */
4782 assert(be_Call_get_entity(node) == NULL);
4784 /* special case for PIC trampoline calls */
4785 old_no_pic_adjust = ia32_no_pic_adjust;
4786 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4788 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4789 match_am | match_immediate);
4791 ia32_no_pic_adjust = old_no_pic_adjust;
4793 i = get_irn_arity(node) - 1;
4794 fpcw = be_transform_node(get_irn_n(node, i--));
4795 for (; i >= n_be_Call_first_arg; --i) {
4796 arch_register_req_t const *const req
4797 = arch_get_irn_register_req_in(node, i);
4798 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4800 assert(req->type == arch_register_req_type_limited);
4801 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4803 switch (*req->limited) {
4804 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4805 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4806 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4807 default: panic("Invalid GP register for register parameter");
4811 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4812 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4813 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4814 ir_set_throws_exception(call, throws_exception);
4815 set_am_attributes(call, &am);
4816 call = fix_mem_proj(call, &am);
4818 if (get_irn_pinned(node) == op_pin_state_pinned)
4819 set_irn_pinned(call, op_pin_state_pinned);
4821 SET_IA32_ORIG_NODE(call, node);
4823 if (ia32_cg_config.use_sse2) {
4824 /* remember this call for post-processing */
4825 ARR_APP1(ir_node *, call_list, call);
4826 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4833 * Transform Builtin trap
4835 static ir_node *gen_trap(ir_node *node)
4837 dbg_info *dbgi = get_irn_dbg_info(node);
4838 ir_node *block = be_transform_node(get_nodes_block(node));
4839 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4841 return new_bd_ia32_UD2(dbgi, block, mem);
4845 * Transform Builtin debugbreak
4847 static ir_node *gen_debugbreak(ir_node *node)
4849 dbg_info *dbgi = get_irn_dbg_info(node);
4850 ir_node *block = be_transform_node(get_nodes_block(node));
4851 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4853 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4857 * Transform Builtin return_address
4859 static ir_node *gen_return_address(ir_node *node)
4861 ir_node *param = get_Builtin_param(node, 0);
4862 ir_node *frame = get_Builtin_param(node, 1);
4863 dbg_info *dbgi = get_irn_dbg_info(node);
4864 ir_tarval *tv = get_Const_tarval(param);
4865 ir_graph *irg = get_irn_irg(node);
4866 unsigned long value = get_tarval_long(tv);
4868 ir_node *block = be_transform_node(get_nodes_block(node));
4869 ir_node *ptr = be_transform_node(frame);
4873 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4874 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4875 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4878 /* load the return address from this frame */
4879 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4881 set_irn_pinned(load, get_irn_pinned(node));
4882 set_ia32_op_type(load, ia32_AddrModeS);
4883 set_ia32_ls_mode(load, mode_Iu);
4885 set_ia32_am_offs_int(load, 0);
4886 set_ia32_use_frame(load);
4887 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4889 if (get_irn_pinned(node) == op_pin_state_floats) {
4890 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4891 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4892 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4893 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4896 SET_IA32_ORIG_NODE(load, node);
4897 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4901 * Transform Builtin frame_address
4903 static ir_node *gen_frame_address(ir_node *node)
4905 ir_node *param = get_Builtin_param(node, 0);
4906 ir_node *frame = get_Builtin_param(node, 1);
4907 dbg_info *dbgi = get_irn_dbg_info(node);
4908 ir_tarval *tv = get_Const_tarval(param);
4909 ir_graph *irg = get_irn_irg(node);
4910 unsigned long value = get_tarval_long(tv);
4912 ir_node *block = be_transform_node(get_nodes_block(node));
4913 ir_node *ptr = be_transform_node(frame);
4918 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4919 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4920 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4923 /* load the frame address from this frame */
4924 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4926 set_irn_pinned(load, get_irn_pinned(node));
4927 set_ia32_op_type(load, ia32_AddrModeS);
4928 set_ia32_ls_mode(load, mode_Iu);
4930 ent = ia32_get_frame_address_entity(irg);
4932 set_ia32_am_offs_int(load, 0);
4933 set_ia32_use_frame(load);
4934 set_ia32_frame_ent(load, ent);
4936 /* will fail anyway, but gcc does this: */
4937 set_ia32_am_offs_int(load, 0);
4940 if (get_irn_pinned(node) == op_pin_state_floats) {
4941 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4942 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4943 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4944 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4947 SET_IA32_ORIG_NODE(load, node);
4948 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4952 * Transform Builtin frame_address
4954 static ir_node *gen_prefetch(ir_node *node)
4957 ir_node *ptr, *block, *mem, *base, *idx;
4958 ir_node *param, *new_node;
4961 ia32_address_t addr;
4963 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4964 /* no prefetch at all, route memory */
4965 return be_transform_node(get_Builtin_mem(node));
4968 param = get_Builtin_param(node, 1);
4969 tv = get_Const_tarval(param);
4970 rw = get_tarval_long(tv);
4972 /* construct load address */
4973 memset(&addr, 0, sizeof(addr));
4974 ptr = get_Builtin_param(node, 0);
4975 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4982 base = be_transform_node(base);
4988 idx = be_transform_node(idx);
4991 dbgi = get_irn_dbg_info(node);
4992 block = be_transform_node(get_nodes_block(node));
4993 mem = be_transform_node(get_Builtin_mem(node));
4995 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4996 /* we have 3DNow!, this was already checked above */
4997 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4998 } else if (ia32_cg_config.use_sse_prefetch) {
4999 /* note: rw == 1 is IGNORED in that case */
5000 param = get_Builtin_param(node, 2);
5001 tv = get_Const_tarval(param);
5002 locality = get_tarval_long(tv);
5004 /* SSE style prefetch */
5007 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
5010 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
5013 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
5016 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
5020 assert(ia32_cg_config.use_3dnow_prefetch);
5021 /* 3DNow! style prefetch */
5022 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
5025 set_irn_pinned(new_node, get_irn_pinned(node));
5026 set_ia32_op_type(new_node, ia32_AddrModeS);
5027 set_ia32_ls_mode(new_node, mode_Bu);
5028 set_address(new_node, &addr);
5030 SET_IA32_ORIG_NODE(new_node, node);
5032 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5036 * Transform bsf like node
5038 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5040 ir_node *param = get_Builtin_param(node, 0);
5041 dbg_info *dbgi = get_irn_dbg_info(node);
5043 ir_node *block = get_nodes_block(node);
5044 ir_node *new_block = be_transform_node(block);
5046 ia32_address_mode_t am;
5047 ia32_address_t *addr = &am.addr;
5050 match_arguments(&am, block, NULL, param, NULL, match_am);
5052 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5053 set_am_attributes(cnt, &am);
5054 set_ia32_ls_mode(cnt, get_irn_mode(param));
5056 SET_IA32_ORIG_NODE(cnt, node);
5057 return fix_mem_proj(cnt, &am);
5061 * Transform builtin ffs.
5063 static ir_node *gen_ffs(ir_node *node)
5065 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5066 ir_node *real = skip_Proj(bsf);
5067 dbg_info *dbgi = get_irn_dbg_info(real);
5068 ir_node *block = get_nodes_block(real);
5069 ir_node *flag, *set, *conv, *neg, *orn, *add;
5072 if (get_irn_mode(real) != mode_T) {
5073 set_irn_mode(real, mode_T);
5074 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5077 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5080 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5081 SET_IA32_ORIG_NODE(set, node);
5084 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5085 SET_IA32_ORIG_NODE(conv, node);
5088 neg = new_bd_ia32_Neg(dbgi, block, conv);
5091 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5092 set_ia32_ls_mode(orn, mode_Iu);
5093 set_ia32_commutative(orn);
5096 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5097 add_ia32_am_offs_int(add, 1);
5102 * Transform builtin clz.
5104 static ir_node *gen_clz(ir_node *node)
5106 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5107 ir_node *real = skip_Proj(bsr);
5108 dbg_info *dbgi = get_irn_dbg_info(real);
5109 ir_node *block = get_nodes_block(real);
5110 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5112 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5116 * Transform builtin ctz.
5118 static ir_node *gen_ctz(ir_node *node)
5120 return gen_unop_AM(node, new_bd_ia32_Bsf);
5124 * Transform builtin parity.
5126 static ir_node *gen_parity(ir_node *node)
5128 dbg_info *dbgi = get_irn_dbg_info(node);
5129 ir_node *block = get_nodes_block(node);
5130 ir_node *new_block = be_transform_node(block);
5131 ir_node *param = get_Builtin_param(node, 0);
5132 ir_node *new_param = be_transform_node(param);
5135 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5136 * so we have to do complicated xoring first.
5137 * (we should also better lower this before the backend so we still have a
5138 * chance for CSE, constant folding and other goodies for some of these
5141 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5142 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5143 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5145 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5148 set_ia32_ls_mode(xor, mode_Iu);
5149 set_ia32_commutative(xor);
5151 set_irn_mode(xor2, mode_T);
5152 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5155 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5156 SET_IA32_ORIG_NODE(new_node, node);
5159 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5160 nomem, new_node, mode_Bu);
5161 SET_IA32_ORIG_NODE(new_node, node);
5166 * Transform builtin popcount
5168 static ir_node *gen_popcount(ir_node *node)
5170 ir_node *param = get_Builtin_param(node, 0);
5171 dbg_info *dbgi = get_irn_dbg_info(node);
5173 ir_node *block = get_nodes_block(node);
5174 ir_node *new_block = be_transform_node(block);
5177 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5179 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5180 if (ia32_cg_config.use_popcnt) {
5181 ia32_address_mode_t am;
5182 ia32_address_t *addr = &am.addr;
5185 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5187 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5188 set_am_attributes(cnt, &am);
5189 set_ia32_ls_mode(cnt, get_irn_mode(param));
5191 SET_IA32_ORIG_NODE(cnt, node);
5192 return fix_mem_proj(cnt, &am);
5195 new_param = be_transform_node(param);
5197 /* do the standard popcount algo */
5198 /* TODO: This is stupid, we should transform this before the backend,
5199 * to get CSE, localopts, etc. for the operations
5200 * TODO: This is also not the optimal algorithm (it is just the starting
5201 * example in hackers delight, they optimize it more on the following page)
5202 * But I'm too lazy to fix this now, as the code should get lowered before
5203 * the backend anyway.
5206 /* m1 = x & 0x55555555 */
5207 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5208 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5211 simm = ia32_create_Immediate(NULL, 0, 1);
5212 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5214 /* m2 = s1 & 0x55555555 */
5215 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5218 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5220 /* m4 = m3 & 0x33333333 */
5221 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5222 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5225 simm = ia32_create_Immediate(NULL, 0, 2);
5226 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5228 /* m5 = s2 & 0x33333333 */
5229 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5232 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5234 /* m7 = m6 & 0x0F0F0F0F */
5235 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5236 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5239 simm = ia32_create_Immediate(NULL, 0, 4);
5240 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5242 /* m8 = s3 & 0x0F0F0F0F */
5243 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5246 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5248 /* m10 = m9 & 0x00FF00FF */
5249 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5250 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5253 simm = ia32_create_Immediate(NULL, 0, 8);
5254 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5256 /* m11 = s4 & 0x00FF00FF */
5257 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5259 /* m12 = m10 + m11 */
5260 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5262 /* m13 = m12 & 0x0000FFFF */
5263 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5264 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5266 /* s5 = m12 >> 16 */
5267 simm = ia32_create_Immediate(NULL, 0, 16);
5268 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5270 /* res = m13 + s5 */
5271 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5275 * Transform builtin byte swap.
5277 static ir_node *gen_bswap(ir_node *node)
5279 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5280 dbg_info *dbgi = get_irn_dbg_info(node);
5282 ir_node *block = get_nodes_block(node);
5283 ir_node *new_block = be_transform_node(block);
5284 ir_mode *mode = get_irn_mode(param);
5285 unsigned size = get_mode_size_bits(mode);
5286 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5290 if (ia32_cg_config.use_i486) {
5291 /* swap available */
5292 return new_bd_ia32_Bswap(dbgi, new_block, param);
5294 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5295 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5297 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5298 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5300 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5302 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5303 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5305 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5306 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5309 /* swap16 always available */
5310 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5313 panic("Invalid bswap size (%d)", size);
5318 * Transform builtin outport.
5320 static ir_node *gen_outport(ir_node *node)
5322 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5323 ir_node *oldv = get_Builtin_param(node, 1);
5324 ir_mode *mode = get_irn_mode(oldv);
5325 ir_node *value = be_transform_node(oldv);
5326 ir_node *block = be_transform_node(get_nodes_block(node));
5327 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5328 dbg_info *dbgi = get_irn_dbg_info(node);
5330 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5331 set_ia32_ls_mode(res, mode);
5336 * Transform builtin inport.
5338 static ir_node *gen_inport(ir_node *node)
5340 ir_type *tp = get_Builtin_type(node);
5341 ir_type *rstp = get_method_res_type(tp, 0);
5342 ir_mode *mode = get_type_mode(rstp);
5343 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5344 ir_node *block = be_transform_node(get_nodes_block(node));
5345 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5346 dbg_info *dbgi = get_irn_dbg_info(node);
5348 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5349 set_ia32_ls_mode(res, mode);
5351 /* check for missing Result Proj */
5356 * Transform a builtin inner trampoline
5358 static ir_node *gen_inner_trampoline(ir_node *node)
5360 ir_node *ptr = get_Builtin_param(node, 0);
5361 ir_node *callee = get_Builtin_param(node, 1);
5362 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5363 ir_node *mem = get_Builtin_mem(node);
5364 ir_node *block = get_nodes_block(node);
5365 ir_node *new_block = be_transform_node(block);
5369 ir_node *trampoline;
5371 dbg_info *dbgi = get_irn_dbg_info(node);
5372 ia32_address_t addr;
5374 /* construct store address */
5375 memset(&addr, 0, sizeof(addr));
5376 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5378 if (addr.base == NULL) {
5379 addr.base = noreg_GP;
5381 addr.base = be_transform_node(addr.base);
5384 if (addr.index == NULL) {
5385 addr.index = noreg_GP;
5387 addr.index = be_transform_node(addr.index);
5389 addr.mem = be_transform_node(mem);
5391 /* mov ecx, <env> */
5392 val = ia32_create_Immediate(NULL, 0, 0xB9);
5393 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5394 addr.index, addr.mem, val);
5395 set_irn_pinned(store, get_irn_pinned(node));
5396 set_ia32_op_type(store, ia32_AddrModeD);
5397 set_ia32_ls_mode(store, mode_Bu);
5398 set_address(store, &addr);
5402 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5403 addr.index, addr.mem, env);
5404 set_irn_pinned(store, get_irn_pinned(node));
5405 set_ia32_op_type(store, ia32_AddrModeD);
5406 set_ia32_ls_mode(store, mode_Iu);
5407 set_address(store, &addr);
5411 /* jmp rel <callee> */
5412 val = ia32_create_Immediate(NULL, 0, 0xE9);
5413 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5414 addr.index, addr.mem, val);
5415 set_irn_pinned(store, get_irn_pinned(node));
5416 set_ia32_op_type(store, ia32_AddrModeD);
5417 set_ia32_ls_mode(store, mode_Bu);
5418 set_address(store, &addr);
5422 trampoline = be_transform_node(ptr);
5424 /* the callee is typically an immediate */
5425 if (is_SymConst(callee)) {
5426 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5428 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5429 add_ia32_am_offs_int(rel, -10);
5431 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5433 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5434 addr.index, addr.mem, rel);
5435 set_irn_pinned(store, get_irn_pinned(node));
5436 set_ia32_op_type(store, ia32_AddrModeD);
5437 set_ia32_ls_mode(store, mode_Iu);
5438 set_address(store, &addr);
5443 return new_r_Tuple(new_block, 2, in);
5447 * Transform Builtin node.
5449 static ir_node *gen_Builtin(ir_node *node)
5451 ir_builtin_kind kind = get_Builtin_kind(node);
5455 return gen_trap(node);
5456 case ir_bk_debugbreak:
5457 return gen_debugbreak(node);
5458 case ir_bk_return_address:
5459 return gen_return_address(node);
5460 case ir_bk_frame_address:
5461 return gen_frame_address(node);
5462 case ir_bk_prefetch:
5463 return gen_prefetch(node);
5465 return gen_ffs(node);
5467 return gen_clz(node);
5469 return gen_ctz(node);
5471 return gen_parity(node);
5472 case ir_bk_popcount:
5473 return gen_popcount(node);
5475 return gen_bswap(node);
5477 return gen_outport(node);
5479 return gen_inport(node);
5480 case ir_bk_inner_trampoline:
5481 return gen_inner_trampoline(node);
5483 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5487 * Transform Proj(Builtin) node.
5489 static ir_node *gen_Proj_Builtin(ir_node *proj)
5491 ir_node *node = get_Proj_pred(proj);
5492 ir_node *new_node = be_transform_node(node);
5493 ir_builtin_kind kind = get_Builtin_kind(node);
5496 case ir_bk_return_address:
5497 case ir_bk_frame_address:
5502 case ir_bk_popcount:
5504 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5507 case ir_bk_debugbreak:
5508 case ir_bk_prefetch:
5510 assert(get_Proj_proj(proj) == pn_Builtin_M);
5513 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5514 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5516 assert(get_Proj_proj(proj) == pn_Builtin_M);
5517 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5519 case ir_bk_inner_trampoline:
5520 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5521 return get_Tuple_pred(new_node, 1);
5523 assert(get_Proj_proj(proj) == pn_Builtin_M);
5524 return get_Tuple_pred(new_node, 0);
5527 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5530 static ir_node *gen_be_IncSP(ir_node *node)
5532 ir_node *res = be_duplicate_node(node);
5533 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5539 * Transform the Projs from a be_Call.
5541 static ir_node *gen_Proj_be_Call(ir_node *node)
5543 ir_node *call = get_Proj_pred(node);
5544 ir_node *new_call = be_transform_node(call);
5545 dbg_info *dbgi = get_irn_dbg_info(node);
5546 long proj = get_Proj_proj(node);
5547 ir_mode *mode = get_irn_mode(node);
5550 if (proj == pn_be_Call_M) {
5551 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5553 /* transform call modes */
5554 if (mode_is_data(mode)) {
5555 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5559 /* Map from be_Call to ia32_Call proj number */
5560 if (proj == pn_be_Call_sp) {
5561 proj = pn_ia32_Call_stack;
5562 } else if (proj == pn_be_Call_M) {
5563 proj = pn_ia32_Call_M;
5564 } else if (proj == pn_be_Call_X_except) {
5565 proj = pn_ia32_Call_X_except;
5566 } else if (proj == pn_be_Call_X_regular) {
5567 proj = pn_ia32_Call_X_regular;
5569 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5570 int const n_outs = arch_get_irn_n_outs(new_call);
5573 assert(proj >= pn_be_Call_first_res);
5574 assert(req->type & arch_register_req_type_limited);
5576 for (i = 0; i < n_outs; ++i) {
5577 arch_register_req_t const *const new_req
5578 = arch_get_irn_register_req_out(new_call, i);
5580 if (!(new_req->type & arch_register_req_type_limited) ||
5581 new_req->cls != req->cls ||
5582 *new_req->limited != *req->limited)
5591 res = new_rd_Proj(dbgi, new_call, mode, proj);
5593 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5595 case pn_ia32_Call_stack:
5596 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5599 case pn_ia32_Call_fpcw:
5600 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5608 * Transform the Projs from a Cmp.
5610 static ir_node *gen_Proj_Cmp(ir_node *node)
5612 /* this probably means not all mode_b nodes were lowered... */
5613 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5617 static ir_node *gen_Proj_ASM(ir_node *node)
5619 ir_mode *mode = get_irn_mode(node);
5620 ir_node *pred = get_Proj_pred(node);
5621 ir_node *new_pred = be_transform_node(pred);
5622 long pos = get_Proj_proj(node);
5624 if (mode == mode_M) {
5625 pos = arch_get_irn_n_outs(new_pred)-1;
5626 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5628 } else if (mode_is_float(mode)) {
5631 panic("unexpected proj mode at ASM");
5634 return new_r_Proj(new_pred, mode, pos);
5638 * Transform and potentially renumber Proj nodes.
5640 static ir_node *gen_Proj(ir_node *node)
5642 ir_node *pred = get_Proj_pred(node);
5645 switch (get_irn_opcode(pred)) {
5647 return gen_Proj_Load(node);
5649 return gen_Proj_Store(node);
5651 return gen_Proj_ASM(node);
5653 return gen_Proj_Builtin(node);
5655 return gen_Proj_Div(node);
5657 return gen_Proj_Mod(node);
5659 return gen_Proj_CopyB(node);
5661 return gen_Proj_be_SubSP(node);
5663 return gen_Proj_be_AddSP(node);
5665 return gen_Proj_be_Call(node);
5667 return gen_Proj_Cmp(node);
5669 proj = get_Proj_proj(node);
5671 case pn_Start_X_initial_exec: {
5672 ir_node *block = get_nodes_block(pred);
5673 ir_node *new_block = be_transform_node(block);
5674 dbg_info *dbgi = get_irn_dbg_info(node);
5675 /* we exchange the ProjX with a jump */
5676 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5684 if (is_ia32_l_FloattoLL(pred)) {
5685 return gen_Proj_l_FloattoLL(node);
5687 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5691 ir_mode *mode = get_irn_mode(node);
5692 if (ia32_mode_needs_gp_reg(mode)) {
5693 ir_node *new_pred = be_transform_node(pred);
5694 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5695 get_Proj_proj(node));
5696 new_proj->node_nr = node->node_nr;
5701 return be_duplicate_node(node);
5705 * Enters all transform functions into the generic pointer
5707 static void register_transformers(void)
5709 /* first clear the generic function pointer for all ops */
5710 be_start_transform_setup();
5712 be_set_transform_function(op_Add, gen_Add);
5713 be_set_transform_function(op_And, gen_And);
5714 be_set_transform_function(op_ASM, ia32_gen_ASM);
5715 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5716 be_set_transform_function(op_be_Call, gen_be_Call);
5717 be_set_transform_function(op_be_Copy, gen_be_Copy);
5718 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5719 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5720 be_set_transform_function(op_be_Return, gen_be_Return);
5721 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5722 be_set_transform_function(op_Builtin, gen_Builtin);
5723 be_set_transform_function(op_Cmp, gen_Cmp);
5724 be_set_transform_function(op_Cond, gen_Cond);
5725 be_set_transform_function(op_Const, gen_Const);
5726 be_set_transform_function(op_Conv, gen_Conv);
5727 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5728 be_set_transform_function(op_Div, gen_Div);
5729 be_set_transform_function(op_Eor, gen_Eor);
5730 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5731 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5732 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5733 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5734 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5735 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5736 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5737 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5738 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5739 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5740 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5741 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5742 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5743 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5744 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5745 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5746 be_set_transform_function(op_IJmp, gen_IJmp);
5747 be_set_transform_function(op_Jmp, gen_Jmp);
5748 be_set_transform_function(op_Load, gen_Load);
5749 be_set_transform_function(op_Minus, gen_Minus);
5750 be_set_transform_function(op_Mod, gen_Mod);
5751 be_set_transform_function(op_Mul, gen_Mul);
5752 be_set_transform_function(op_Mulh, gen_Mulh);
5753 be_set_transform_function(op_Mux, gen_Mux);
5754 be_set_transform_function(op_Not, gen_Not);
5755 be_set_transform_function(op_Or, gen_Or);
5756 be_set_transform_function(op_Phi, gen_Phi);
5757 be_set_transform_function(op_Proj, gen_Proj);
5758 be_set_transform_function(op_Rotl, gen_Rotl);
5759 be_set_transform_function(op_Shl, gen_Shl);
5760 be_set_transform_function(op_Shr, gen_Shr);
5761 be_set_transform_function(op_Shrs, gen_Shrs);
5762 be_set_transform_function(op_Store, gen_Store);
5763 be_set_transform_function(op_Sub, gen_Sub);
5764 be_set_transform_function(op_Switch, gen_Switch);
5765 be_set_transform_function(op_SymConst, gen_SymConst);
5766 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5770 * Pre-transform all unknown and noreg nodes.
5772 static void ia32_pretransform_node(void)
5774 ir_graph *irg = current_ir_graph;
5775 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5777 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5778 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5779 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5780 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5781 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5783 nomem = get_irg_no_mem(irg);
5784 noreg_GP = ia32_new_NoReg_gp(irg);
5788 * Post-process all calls if we are in SSE mode.
5789 * The ABI requires that the results are in st0, copy them
5790 * to a xmm register.
5792 static void postprocess_fp_call_results(void)
5796 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5797 ir_node *call = call_list[i];
5798 ir_type *mtp = call_types[i];
5801 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5802 ir_type *res_tp = get_method_res_type(mtp, j);
5803 ir_node *res, *new_res;
5804 const ir_edge_t *edge, *next;
5807 if (! is_atomic_type(res_tp)) {
5808 /* no floating point return */
5811 res_mode = get_type_mode(res_tp);
5812 if (! mode_is_float(res_mode)) {
5813 /* no floating point return */
5817 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5820 /* now patch the users */
5821 foreach_out_edge_safe(res, edge, next) {
5822 ir_node *succ = get_edge_src_irn(edge);
5825 if (be_is_Keep(succ))
5828 if (is_ia32_xStore(succ)) {
5829 /* an xStore can be patched into an vfst */
5830 dbg_info *db = get_irn_dbg_info(succ);
5831 ir_node *block = get_nodes_block(succ);
5832 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5833 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5834 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5835 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5836 ir_mode *mode = get_ia32_ls_mode(succ);
5838 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5839 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5840 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5841 if (is_ia32_use_frame(succ))
5842 set_ia32_use_frame(st);
5843 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5844 set_irn_pinned(st, get_irn_pinned(succ));
5845 set_ia32_op_type(st, ia32_AddrModeD);
5847 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5848 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5849 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5856 if (new_res == NULL) {
5857 dbg_info *db = get_irn_dbg_info(call);
5858 ir_node *block = get_nodes_block(call);
5859 ir_node *frame = get_irg_frame(current_ir_graph);
5860 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5861 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5862 ir_node *vfst, *xld, *new_mem;
5865 /* store st(0) on stack */
5866 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5868 set_ia32_op_type(vfst, ia32_AddrModeD);
5869 set_ia32_use_frame(vfst);
5871 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5873 /* load into SSE register */
5874 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5876 set_ia32_op_type(xld, ia32_AddrModeS);
5877 set_ia32_use_frame(xld);
5879 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5880 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5882 if (old_mem != NULL) {
5883 edges_reroute(old_mem, new_mem);
5887 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5893 /* do the transformation */
5894 void ia32_transform_graph(ir_graph *irg)
5898 register_transformers();
5899 initial_fpcw = NULL;
5900 ia32_no_pic_adjust = 0;
5902 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5904 be_timer_push(T_HEIGHTS);
5905 ia32_heights = heights_new(irg);
5906 be_timer_pop(T_HEIGHTS);
5907 ia32_calculate_non_address_mode_nodes(irg);
5909 /* the transform phase is not safe for CSE (yet) because several nodes get
5910 * attributes set after their creation */
5911 cse_last = get_opt_cse();
5914 call_list = NEW_ARR_F(ir_node *, 0);
5915 call_types = NEW_ARR_F(ir_type *, 0);
5916 be_transform_graph(irg, ia32_pretransform_node);
5918 if (ia32_cg_config.use_sse2)
5919 postprocess_fp_call_results();
5920 DEL_ARR_F(call_types);
5921 DEL_ARR_F(call_list);
5923 set_opt_cse(cse_last);
5925 ia32_free_non_address_mode_nodes();
5926 heights_free(ia32_heights);
5927 ia32_heights = NULL;
5930 void ia32_init_transform(void)
5932 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");