2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
55 #include "betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_address_mode.h"
66 #include "ia32_architecture.h"
68 #include "gen_ia32_regalloc_if.h"
70 /* define this to construct SSE constants instead of load them */
71 #undef CONSTRUCT_SSE_CONST
73 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
74 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
76 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
78 static ir_node *old_initial_fpcw = NULL;
79 static ir_node *initial_fpcw = NULL;
80 int ia32_no_pic_adjust;
82 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
83 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
86 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
87 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
90 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
91 ir_node *op1, ir_node *op2);
93 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
94 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
96 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem);
99 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
100 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
103 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
105 static ir_node *create_immediate_or_transform(ir_node *node,
106 char immediate_constraint_type);
108 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
109 dbg_info *dbgi, ir_node *block,
110 ir_node *op, ir_node *orig_node);
112 /* its enough to have those once */
113 static ir_node *nomem, *noreg_GP;
115 /** a list to postprocess all calls */
116 static ir_node **call_list;
117 static ir_type **call_types;
119 /** Return non-zero is a node represents the 0 constant. */
120 static bool is_Const_0(ir_node *node)
122 return is_Const(node) && is_Const_null(node);
125 /** Return non-zero is a node represents the 1 constant. */
126 static bool is_Const_1(ir_node *node)
128 return is_Const(node) && is_Const_one(node);
131 /** Return non-zero is a node represents the -1 constant. */
132 static bool is_Const_Minus_1(ir_node *node)
134 return is_Const(node) && is_Const_all_one(node);
138 * returns true if constant can be created with a simple float command
140 static bool is_simple_x87_Const(ir_node *node)
142 ir_tarval *tv = get_Const_tarval(node);
143 if (tarval_is_null(tv) || tarval_is_one(tv))
146 /* TODO: match all the other float constants */
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_sse_Const(ir_node *node)
155 ir_tarval *tv = get_Const_tarval(node);
156 ir_mode *mode = get_tarval_mode(tv);
161 if (tarval_is_null(tv)
162 #ifdef CONSTRUCT_SSE_CONST
167 #ifdef CONSTRUCT_SSE_CONST
168 if (mode == mode_D) {
169 unsigned val = get_tarval_sub_bits(tv, 0) |
170 (get_tarval_sub_bits(tv, 1) << 8) |
171 (get_tarval_sub_bits(tv, 2) << 16) |
172 (get_tarval_sub_bits(tv, 3) << 24);
174 /* lower 32bit are zero, really a 32bit constant */
177 #endif /* CONSTRUCT_SSE_CONST */
178 /* TODO: match all the other float constants */
183 * return NoREG or pic_base in case of PIC.
184 * This is necessary as base address for newly created symbols
186 static ir_node *get_symconst_base(void)
188 ir_graph *irg = current_ir_graph;
190 if (be_options.pic) {
191 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
192 return arch_env->impl->get_pic_base(irg);
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
207 ir_tarval *tv = get_Const_tarval(node);
209 assert(is_Const(node));
211 if (mode_is_float(mode)) {
212 ir_graph *irg = get_irn_irg(node);
213 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
214 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
219 if (ia32_cg_config.use_sse2) {
220 if (tarval_is_null(tv)) {
221 load = new_bd_ia32_xZero(dbgi, block);
222 set_ia32_ls_mode(load, mode);
224 #ifdef CONSTRUCT_SSE_CONST
225 } else if (tarval_is_one(tv)) {
226 int cnst = mode == mode_F ? 26 : 55;
227 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
228 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
229 ir_node *pslld, *psrld;
231 load = new_bd_ia32_xAllOnes(dbgi, block);
232 set_ia32_ls_mode(load, mode);
233 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
234 set_ia32_ls_mode(pslld, mode);
235 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
236 set_ia32_ls_mode(psrld, mode);
238 #endif /* CONSTRUCT_SSE_CONST */
239 } else if (mode == mode_F) {
240 /* we can place any 32bit constant by using a movd gp, sse */
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
245 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
246 load = new_bd_ia32_xMovd(dbgi, block, cnst);
247 set_ia32_ls_mode(load, mode);
251 #ifdef CONSTRUCT_SSE_CONST
252 if (mode == mode_D) {
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
259 ir_node *cnst, *psllq;
261 /* fine, lower 32bit are zero, produce 32bit value */
262 val = get_tarval_sub_bits(tv, 4) |
263 (get_tarval_sub_bits(tv, 5) << 8) |
264 (get_tarval_sub_bits(tv, 6) << 16) |
265 (get_tarval_sub_bits(tv, 7) << 24);
266 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
267 load = new_bd_ia32_xMovd(dbgi, block, cnst);
268 set_ia32_ls_mode(load, mode);
269 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
270 set_ia32_ls_mode(psllq, mode);
275 #endif /* CONSTRUCT_SSE_CONST */
276 floatent = ia32_create_float_const_entity(isa, tv, NULL);
278 base = get_symconst_base();
279 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
281 set_ia32_op_type(load, ia32_AddrModeS);
282 set_ia32_am_sc(load, floatent);
283 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
284 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
287 if (tarval_is_null(tv)) {
288 load = new_bd_ia32_vfldz(dbgi, block);
290 set_ia32_ls_mode(load, mode);
291 } else if (tarval_is_one(tv)) {
292 load = new_bd_ia32_vfld1(dbgi, block);
294 set_ia32_ls_mode(load, mode);
299 floatent = ia32_create_float_const_entity(isa, tv, NULL);
300 /* create_float_const_ent is smart and sometimes creates
302 ls_mode = get_type_mode(get_entity_type(floatent));
303 base = get_symconst_base();
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 } else { /* non-float mode */
321 tv = tarval_convert_to(tv, mode_Iu);
323 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
325 panic("couldn't convert constant tarval (%+F)", node);
327 val = get_tarval_long(tv);
329 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
330 SET_IA32_ORIG_NODE(cnst, node);
337 * Transforms a SymConst.
339 static ir_node *gen_SymConst(ir_node *node)
341 ir_node *old_block = get_nodes_block(node);
342 ir_node *block = be_transform_node(old_block);
343 dbg_info *dbgi = get_irn_dbg_info(node);
344 ir_mode *mode = get_irn_mode(node);
347 if (mode_is_float(mode)) {
348 if (ia32_cg_config.use_sse2)
349 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
351 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
352 set_ia32_am_sc(cnst, get_SymConst_entity(node));
353 set_ia32_use_frame(cnst);
357 if (get_SymConst_kind(node) != symconst_addr_ent) {
358 panic("backend only support symconst_addr_ent (at %+F)", node);
360 entity = get_SymConst_entity(node);
361 if (get_entity_owner(entity) == get_tls_type()) {
362 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
363 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
364 set_ia32_am_sc(lea, entity);
367 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
371 SET_IA32_ORIG_NODE(cnst, node);
376 static ir_type *make_array_type(ir_type *tp)
378 unsigned alignment = get_type_alignment_bytes(tp);
379 unsigned size = get_type_size_bytes(tp);
380 ir_type *res = new_type_array(1, tp);
381 set_type_alignment_bytes(res, alignment);
382 set_array_bounds_int(res, 0, 0, 2);
383 if (alignment > size)
385 set_type_size_bytes(res, 2 * size);
386 set_type_state(res, layout_fixed);
391 * Create a float[2] array type for the given atomic type.
393 * @param tp the atomic type
395 static ir_type *ia32_create_float_array(ir_type *tp)
397 ir_mode *mode = get_type_mode(tp);
400 if (mode == mode_F) {
401 static ir_type *float_F;
405 arr = float_F = make_array_type(tp);
406 } else if (mode == mode_D) {
407 static ir_type *float_D;
411 arr = float_D = make_array_type(tp);
413 static ir_type *float_E;
417 arr = float_E = make_array_type(tp);
422 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
423 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
425 static const struct {
427 const char *cnst_str;
429 } names [ia32_known_const_max] = {
430 { "C_sfp_sign", "0x80000000", 0 },
431 { "C_dfp_sign", "0x8000000000000000", 1 },
432 { "C_sfp_abs", "0x7FFFFFFF", 0 },
433 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
434 { "C_ull_bias", "0x10000000000000000", 2 }
436 static ir_entity *ent_cache[ia32_known_const_max];
438 ir_entity *ent = ent_cache[kct];
441 ir_graph *irg = current_ir_graph;
442 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
443 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
444 const char *cnst_str = names[kct].cnst_str;
445 ident *name = new_id_from_str(names[kct].name);
448 switch (names[kct].mode) {
449 case 0: mode = mode_Iu; break;
450 case 1: mode = mode_Lu; break;
451 case 2: mode = mode_F; break;
452 default: panic("internal compiler error");
454 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
456 if (kct == ia32_ULLBIAS) {
457 ir_type *type = ia32_get_prim_type(mode_F);
458 ir_type *atype = ia32_create_float_array(type);
459 ir_initializer_t *initializer;
461 ent = new_entity(get_glob_type(), name, atype);
463 set_entity_ld_ident(ent, name);
464 set_entity_visibility(ent, ir_visibility_private);
465 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
467 initializer = create_initializer_compound(2);
468 set_initializer_compound_value(initializer, 0,
469 create_initializer_tarval(get_mode_null(mode)));
470 set_initializer_compound_value(initializer, 1,
471 create_initializer_tarval(tv));
472 set_entity_initializer(ent, initializer);
474 ent = ia32_create_float_const_entity(isa, tv, name);
476 /* cache the entry */
477 ent_cache[kct] = ent;
480 return ent_cache[kct];
484 * return true if the node is a Proj(Load) and could be used in source address
485 * mode for another node. Will return only true if the @p other node is not
486 * dependent on the memory of the Load (for binary operations use the other
487 * input here, for unary operations use NULL).
489 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
490 ir_node *other, ir_node *other2,
497 /* float constants are always available */
498 if (is_Const(node)) {
499 mode = get_irn_mode(node);
500 if (mode_is_float(mode)) {
501 ir_tarval *tv = get_Const_tarval(node);
502 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
504 if (ia32_cg_config.use_sse2) {
505 if (is_simple_sse_Const(node))
508 if (is_simple_x87_Const(node))
511 if (get_irn_n_edges(node) > 1)
520 load = get_Proj_pred(node);
521 pn = get_Proj_proj(node);
522 if (!is_Load(load) || pn != pn_Load_res)
524 if (get_nodes_block(load) != block)
526 mode = get_irn_mode(node);
527 /* we can't fold mode_E AM */
528 if (mode == ia32_mode_E)
530 /* we only use address mode if we're the only user of the load */
531 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
533 /* in some edge cases with address mode we might reach the load normally
534 * and through some AM sequence, if it is already materialized then we
535 * can't create an AM node from it */
536 if (be_is_transformed(node))
539 /* don't do AM if other node inputs depend on the load (via mem-proj) */
540 if (other != NULL && ia32_prevents_AM(block, load, other))
543 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
549 typedef struct ia32_address_mode_t ia32_address_mode_t;
550 struct ia32_address_mode_t {
555 ia32_op_type_t op_type;
559 unsigned commutative : 1;
560 unsigned ins_permuted : 1;
563 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
565 /* construct load address */
566 memset(addr, 0, sizeof(addr[0]));
567 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
569 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
570 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
571 addr->mem = be_transform_node(mem);
574 static void build_address(ia32_address_mode_t *am, ir_node *node,
575 ia32_create_am_flags_t flags)
577 ia32_address_t *addr = &am->addr;
583 /* floating point immediates */
584 if (is_Const(node)) {
585 ir_graph *irg = get_irn_irg(node);
586 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
587 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
588 ir_tarval *tv = get_Const_tarval(node);
589 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
590 addr->base = get_symconst_base();
591 addr->index = noreg_GP;
593 addr->symconst_ent = entity;
594 addr->tls_segment = false;
596 am->ls_mode = get_type_mode(get_entity_type(entity));
597 am->pinned = op_pin_state_floats;
601 load = get_Proj_pred(node);
602 ptr = get_Load_ptr(load);
603 mem = get_Load_mem(load);
604 new_mem = be_transform_node(mem);
605 am->pinned = get_irn_pinned(load);
606 am->ls_mode = get_Load_mode(load);
607 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
610 /* construct load address */
611 ia32_create_address_mode(addr, ptr, flags);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
618 static void set_address(ir_node *node, const ia32_address_t *addr)
620 set_ia32_am_scale(node, addr->scale);
621 set_ia32_am_sc(node, addr->symconst_ent);
622 set_ia32_am_offs_int(node, addr->offset);
623 set_ia32_am_tls_segment(node, addr->tls_segment);
624 if (addr->symconst_sign)
625 set_ia32_am_sc_sign(node);
627 set_ia32_use_frame(node);
628 set_ia32_frame_ent(node, addr->frame_entity);
632 * Apply attributes of a given address mode to a node.
634 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
636 set_address(node, &am->addr);
638 set_ia32_op_type(node, am->op_type);
639 set_ia32_ls_mode(node, am->ls_mode);
640 if (am->pinned == op_pin_state_pinned) {
641 /* beware: some nodes are already pinned and did not allow to change the state */
642 if (get_irn_pinned(node) != op_pin_state_pinned)
643 set_irn_pinned(node, op_pin_state_pinned);
646 set_ia32_commutative(node);
650 * Check, if a given node is a Down-Conv, ie. a integer Conv
651 * from a mode with a mode with more bits to a mode with lesser bits.
652 * Moreover, we return only true if the node has not more than 1 user.
654 * @param node the node
655 * @return non-zero if node is a Down-Conv
657 static int is_downconv(const ir_node *node)
665 src_mode = get_irn_mode(get_Conv_op(node));
666 dest_mode = get_irn_mode(node);
668 ia32_mode_needs_gp_reg(src_mode) &&
669 ia32_mode_needs_gp_reg(dest_mode) &&
670 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
673 /** Skip all Down-Conv's on a given node and return the resulting node. */
674 ir_node *ia32_skip_downconv(ir_node *node)
676 while (is_downconv(node)) {
677 /* we only want to skip the conv when we're the only user
678 * (because this test is used in the context of address-mode selection
679 * and we don't want to use address mode for multiple users) */
680 if (get_irn_n_edges(node) > 1)
683 node = get_Conv_op(node);
689 static bool is_sameconv(ir_node *node)
697 /* we only want to skip the conv when we're the only user
698 * (because this test is used in the context of address-mode selection
699 * and we don't want to use address mode for multiple users) */
700 if (get_irn_n_edges(node) > 1)
703 src_mode = get_irn_mode(get_Conv_op(node));
704 dest_mode = get_irn_mode(node);
706 ia32_mode_needs_gp_reg(src_mode) &&
707 ia32_mode_needs_gp_reg(dest_mode) &&
708 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
711 /** Skip all signedness convs */
712 static ir_node *ia32_skip_sameconv(ir_node *node)
714 while (is_sameconv(node))
715 node = get_Conv_op(node);
720 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
722 ir_mode *mode = get_irn_mode(node);
727 if (mode_is_signed(mode)) {
732 block = get_nodes_block(node);
733 dbgi = get_irn_dbg_info(node);
735 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
739 * matches operands of a node into ia32 addressing/operand modes. This covers
740 * usage of source address mode, immediates, operations with non 32-bit modes,
742 * The resulting data is filled into the @p am struct. block is the block
743 * of the node whose arguments are matched. op1, op2 are the first and second
744 * input that are matched (op1 may be NULL). other_op is another unrelated
745 * input that is not matched! but which is needed sometimes to check if AM
746 * for op1/op2 is legal.
747 * @p flags describes the supported modes of the operation in detail.
749 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
750 ir_node *op1, ir_node *op2, ir_node *other_op,
753 ia32_address_t *addr = &am->addr;
754 ir_mode *mode = get_irn_mode(op2);
755 int mode_bits = get_mode_size_bits(mode);
756 ir_node *new_op1, *new_op2;
758 unsigned commutative;
759 int use_am_and_immediates;
762 memset(am, 0, sizeof(am[0]));
764 commutative = (flags & match_commutative) != 0;
765 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
766 use_am = (flags & match_am) != 0;
767 use_immediate = (flags & match_immediate) != 0;
768 assert(!use_am_and_immediates || use_immediate);
771 assert(!commutative || op1 != NULL);
772 assert(use_am || !(flags & match_8bit_am));
773 assert(use_am || !(flags & match_16bit_am));
775 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
776 (mode_bits == 16 && !(flags & match_16bit_am))) {
780 /* we can simply skip downconvs for mode neutral nodes: the upper bits
781 * can be random for these operations */
782 if (flags & match_mode_neutral) {
783 op2 = ia32_skip_downconv(op2);
785 op1 = ia32_skip_downconv(op1);
788 op2 = ia32_skip_sameconv(op2);
790 op1 = ia32_skip_sameconv(op1);
794 /* match immediates. firm nodes are normalized: constants are always on the
797 if (!(flags & match_try_am) && use_immediate) {
798 new_op2 = ia32_try_create_Immediate(op2, 0);
801 if (new_op2 == NULL &&
802 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
803 build_address(am, op2, ia32_create_am_normal);
804 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
805 if (mode_is_float(mode)) {
806 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
810 am->op_type = ia32_AddrModeS;
811 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
813 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
815 build_address(am, op1, ia32_create_am_normal);
817 if (mode_is_float(mode)) {
818 noreg = ia32_new_NoReg_vfp(current_ir_graph);
823 if (new_op2 != NULL) {
826 new_op1 = be_transform_node(op2);
828 am->ins_permuted = true;
830 am->op_type = ia32_AddrModeS;
832 am->op_type = ia32_Normal;
834 if (flags & match_try_am) {
840 mode = get_irn_mode(op2);
841 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
842 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
844 new_op2 = create_upconv(op2, NULL);
845 am->ls_mode = mode_Iu;
847 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
849 new_op2 = be_transform_node(op2);
850 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
853 if (addr->base == NULL)
854 addr->base = noreg_GP;
855 if (addr->index == NULL)
856 addr->index = noreg_GP;
857 if (addr->mem == NULL)
860 am->new_op1 = new_op1;
861 am->new_op2 = new_op2;
862 am->commutative = commutative;
866 * "Fixes" a node that uses address mode by turning it into mode_T
867 * and returning a pn_ia32_res Proj.
869 * @param node the node
870 * @param am its address mode
872 * @return a Proj(pn_ia32_res) if a memory address mode is used,
875 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
880 if (am->mem_proj == NULL)
883 /* we have to create a mode_T so the old MemProj can attach to us */
884 mode = get_irn_mode(node);
885 load = get_Proj_pred(am->mem_proj);
887 be_set_transformed_node(load, node);
889 if (mode != mode_T) {
890 set_irn_mode(node, mode_T);
891 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
898 * Construct a standard binary operation, set AM and immediate if required.
900 * @param node The original node for which the binop is created
901 * @param op1 The first operand
902 * @param op2 The second operand
903 * @param func The node constructor function
904 * @return The constructed ia32 node.
906 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
907 construct_binop_func *func, match_flags_t flags)
910 ir_node *block, *new_block, *new_node;
911 ia32_address_mode_t am;
912 ia32_address_t *addr = &am.addr;
914 block = get_nodes_block(node);
915 match_arguments(&am, block, op1, op2, NULL, flags);
917 dbgi = get_irn_dbg_info(node);
918 new_block = be_transform_node(block);
919 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
920 am.new_op1, am.new_op2);
921 set_am_attributes(new_node, &am);
922 /* we can't use source address mode anymore when using immediates */
923 if (!(flags & match_am_and_immediates) &&
924 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
925 set_ia32_am_support(new_node, ia32_am_none);
926 SET_IA32_ORIG_NODE(new_node, node);
928 new_node = fix_mem_proj(new_node, &am);
934 * Generic names for the inputs of an ia32 binary op.
937 n_ia32_l_binop_left, /**< ia32 left input */
938 n_ia32_l_binop_right, /**< ia32 right input */
939 n_ia32_l_binop_eflags /**< ia32 eflags input */
941 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
942 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
943 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
944 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
945 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
946 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
949 * Construct a binary operation which also consumes the eflags.
951 * @param node The node to transform
952 * @param func The node constructor function
953 * @param flags The match flags
954 * @return The constructor ia32 node
956 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
959 ir_node *src_block = get_nodes_block(node);
960 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
961 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
962 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
964 ir_node *block, *new_node, *new_eflags;
965 ia32_address_mode_t am;
966 ia32_address_t *addr = &am.addr;
968 match_arguments(&am, src_block, op1, op2, eflags, flags);
970 dbgi = get_irn_dbg_info(node);
971 block = be_transform_node(src_block);
972 new_eflags = be_transform_node(eflags);
973 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
974 am.new_op1, am.new_op2, new_eflags);
975 set_am_attributes(new_node, &am);
976 /* we can't use source address mode anymore when using immediates */
977 if (!(flags & match_am_and_immediates) &&
978 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
979 set_ia32_am_support(new_node, ia32_am_none);
980 SET_IA32_ORIG_NODE(new_node, node);
982 new_node = fix_mem_proj(new_node, &am);
987 static ir_node *get_fpcw(void)
989 if (initial_fpcw != NULL)
992 initial_fpcw = be_transform_node(old_initial_fpcw);
996 static ir_node *skip_float_upconv(ir_node *node)
998 ir_mode *mode = get_irn_mode(node);
999 assert(mode_is_float(mode));
1001 while (is_Conv(node)) {
1002 ir_node *pred = get_Conv_op(node);
1003 ir_mode *pred_mode = get_irn_mode(pred);
1006 * suboptimal, but without this check the address mode matcher
1007 * can incorrectly think that something has only 1 user
1009 if (get_irn_n_edges(node) > 1)
1012 if (!mode_is_float(pred_mode)
1013 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1021 static void check_x87_floatmode(ir_mode *mode)
1023 if (mode != ia32_mode_E) {
1024 panic("ia32: x87 only supports x86 extended float mode");
1029 * Construct a standard binary operation, set AM and immediate if required.
1031 * @param op1 The first operand
1032 * @param op2 The second operand
1033 * @param func The node constructor function
1034 * @return The constructed ia32 node.
1036 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1037 construct_binop_float_func *func)
1043 ia32_address_mode_t am;
1044 ia32_address_t *addr = &am.addr;
1045 ia32_x87_attr_t *attr;
1046 /* All operations are considered commutative, because there are reverse
1048 match_flags_t flags = match_commutative | match_am;
1050 = is_Div(node) ? get_Div_resmode(node) : get_irn_mode(node);
1051 check_x87_floatmode(mode);
1053 op1 = skip_float_upconv(op1);
1054 op2 = skip_float_upconv(op2);
1056 block = get_nodes_block(node);
1057 match_arguments(&am, block, op1, op2, NULL, flags);
1059 dbgi = get_irn_dbg_info(node);
1060 new_block = be_transform_node(block);
1061 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1062 am.new_op1, am.new_op2, get_fpcw());
1063 set_am_attributes(new_node, &am);
1065 attr = get_ia32_x87_attr(new_node);
1066 attr->attr.data.ins_permuted = am.ins_permuted;
1068 SET_IA32_ORIG_NODE(new_node, node);
1070 new_node = fix_mem_proj(new_node, &am);
1076 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1078 * @param op1 The first operand
1079 * @param op2 The second operand
1080 * @param func The node constructor function
1081 * @return The constructed ia32 node.
1083 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1084 construct_shift_func *func,
1085 match_flags_t flags)
1088 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1089 ir_mode *mode = get_irn_mode(node);
1091 assert(! mode_is_float(mode));
1092 assert(flags & match_immediate);
1093 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1095 if (get_mode_modulo_shift(mode) != 32)
1096 panic("modulo shift!=32 not supported by ia32 backend");
1098 if (flags & match_mode_neutral) {
1099 op1 = ia32_skip_downconv(op1);
1100 new_op1 = be_transform_node(op1);
1101 } else if (get_mode_size_bits(mode) != 32) {
1102 new_op1 = create_upconv(op1, node);
1104 new_op1 = be_transform_node(op1);
1107 /* the shift amount can be any mode that is bigger than 5 bits, since all
1108 * other bits are ignored anyway */
1109 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1110 ir_node *const op = get_Conv_op(op2);
1111 if (mode_is_float(get_irn_mode(op)))
1114 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1116 new_op2 = create_immediate_or_transform(op2, 0);
1118 dbgi = get_irn_dbg_info(node);
1119 block = get_nodes_block(node);
1120 new_block = be_transform_node(block);
1121 new_node = func(dbgi, new_block, new_op1, new_op2);
1122 SET_IA32_ORIG_NODE(new_node, node);
1124 /* lowered shift instruction may have a dependency operand, handle it here */
1125 if (get_irn_arity(node) == 3) {
1126 /* we have a dependency */
1127 ir_node* dep = get_irn_n(node, 2);
1128 if (get_irn_n_edges(dep) > 1) {
1129 /* ... which has at least one user other than 'node' */
1130 ir_node *new_dep = be_transform_node(dep);
1131 add_irn_dep(new_node, new_dep);
1140 * Construct a standard unary operation, set AM and immediate if required.
1142 * @param op The operand
1143 * @param func The node constructor function
1144 * @return The constructed ia32 node.
1146 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1147 match_flags_t flags)
1150 ir_node *block, *new_block, *new_op, *new_node;
1152 assert(flags == 0 || flags == match_mode_neutral);
1153 if (flags & match_mode_neutral) {
1154 op = ia32_skip_downconv(op);
1157 new_op = be_transform_node(op);
1158 dbgi = get_irn_dbg_info(node);
1159 block = get_nodes_block(node);
1160 new_block = be_transform_node(block);
1161 new_node = func(dbgi, new_block, new_op);
1163 SET_IA32_ORIG_NODE(new_node, node);
1168 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1169 ia32_address_t *addr)
1179 base = be_transform_node(base);
1186 idx = be_transform_node(idx);
1189 /* segment overrides are ineffective for Leas :-( so we have to patch
1191 if (addr->tls_segment) {
1192 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1193 assert(addr->symconst_ent != NULL);
1194 if (base == noreg_GP)
1197 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1198 addr->tls_segment = false;
1201 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1202 set_address(res, addr);
1208 * Returns non-zero if a given address mode has a symbolic or
1209 * numerical offset != 0.
1211 static int am_has_immediates(const ia32_address_t *addr)
1213 return addr->offset != 0 || addr->symconst_ent != NULL
1214 || addr->frame_entity || addr->use_frame;
1217 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1218 ir_node *high, ir_node *low,
1222 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1223 * op1 - target to be shifted
1224 * op2 - contains bits to be shifted into target
1226 * Only op3 can be an immediate.
1228 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1229 ir_node *high, ir_node *low, ir_node *count,
1230 new_shiftd_func func)
1232 ir_node *new_block = be_transform_node(block);
1233 ir_node *new_high = be_transform_node(high);
1234 ir_node *new_low = be_transform_node(low);
1238 /* the shift amount can be any mode that is bigger than 5 bits, since all
1239 * other bits are ignored anyway */
1240 while (is_Conv(count) &&
1241 get_irn_n_edges(count) == 1 &&
1242 mode_is_int(get_irn_mode(count))) {
1243 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1244 count = get_Conv_op(count);
1246 new_count = create_immediate_or_transform(count, 0);
1248 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1253 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1256 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1258 if (is_Const(value1) && is_Const(value2)) {
1259 ir_tarval *tv1 = get_Const_tarval(value1);
1260 ir_tarval *tv2 = get_Const_tarval(value2);
1261 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1262 long v1 = get_tarval_long(tv1);
1263 long v2 = get_tarval_long(tv2);
1264 return v1 <= v2 && v2 == 32-v1;
1270 static ir_node *match_64bit_shift(ir_node *node)
1272 ir_node *op1 = get_binop_left(node);
1273 ir_node *op2 = get_binop_right(node);
1274 assert(is_Or(node) || is_Add(node));
1282 /* match ShlD operation */
1283 if (is_Shl(op1) && is_Shr(op2)) {
1284 ir_node *shl_right = get_Shl_right(op1);
1285 ir_node *shl_left = get_Shl_left(op1);
1286 ir_node *shr_right = get_Shr_right(op2);
1287 ir_node *shr_left = get_Shr_left(op2);
1288 /* constant ShlD operation */
1289 if (is_complementary_shifts(shl_right, shr_right)) {
1290 dbg_info *dbgi = get_irn_dbg_info(node);
1291 ir_node *block = get_nodes_block(node);
1292 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1295 /* constant ShrD operation */
1296 if (is_complementary_shifts(shr_right, shl_right)) {
1297 dbg_info *dbgi = get_irn_dbg_info(node);
1298 ir_node *block = get_nodes_block(node);
1299 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1302 /* lower_dw produces the following for ShlD:
1303 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1304 if (is_Shr(shr_left) && is_Not(shr_right)
1305 && is_Const_1(get_Shr_right(shr_left))
1306 && get_Not_op(shr_right) == shl_right) {
1307 dbg_info *dbgi = get_irn_dbg_info(node);
1308 ir_node *block = get_nodes_block(node);
1309 ir_node *val_h = get_Shr_left(shr_left);
1310 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1313 /* lower_dw produces the following for ShrD:
1314 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1315 if (is_Shl(shl_left) && is_Not(shl_right)
1316 && is_Const_1(get_Shl_right(shl_left))
1317 && get_Not_op(shl_right) == shr_right) {
1318 dbg_info *dbgi = get_irn_dbg_info(node);
1319 ir_node *block = get_nodes_block(node);
1320 ir_node *val_h = get_Shl_left(shl_left);
1321 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1330 * Creates an ia32 Add.
1332 * @return the created ia32 Add node
1334 static ir_node *gen_Add(ir_node *node)
1336 ir_mode *mode = get_irn_mode(node);
1337 ir_node *op1 = get_Add_left(node);
1338 ir_node *op2 = get_Add_right(node);
1340 ir_node *block, *new_block, *new_node, *add_immediate_op;
1341 ia32_address_t addr;
1342 ia32_address_mode_t am;
1344 new_node = match_64bit_shift(node);
1345 if (new_node != NULL)
1348 if (mode_is_float(mode)) {
1349 if (ia32_cg_config.use_sse2)
1350 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1351 match_commutative | match_am);
1353 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1356 ia32_mark_non_am(node);
1360 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1361 * 1. Add with immediate -> Lea
1362 * 2. Add with possible source address mode -> Add
1363 * 3. Otherwise -> Lea
1365 memset(&addr, 0, sizeof(addr));
1366 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1367 add_immediate_op = NULL;
1369 dbgi = get_irn_dbg_info(node);
1370 block = get_nodes_block(node);
1371 new_block = be_transform_node(block);
1374 if (addr.base == NULL && addr.index == NULL) {
1375 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1376 addr.symconst_sign, 0, addr.offset);
1377 SET_IA32_ORIG_NODE(new_node, node);
1380 /* add with immediate? */
1381 if (addr.index == NULL) {
1382 add_immediate_op = addr.base;
1383 } else if (addr.base == NULL && addr.scale == 0) {
1384 add_immediate_op = addr.index;
1387 if (add_immediate_op != NULL) {
1388 if (!am_has_immediates(&addr)) {
1389 #ifdef DEBUG_libfirm
1390 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1393 return be_transform_node(add_immediate_op);
1396 new_node = create_lea_from_address(dbgi, new_block, &addr);
1397 SET_IA32_ORIG_NODE(new_node, node);
1401 /* test if we can use source address mode */
1402 match_arguments(&am, block, op1, op2, NULL, match_commutative
1403 | match_mode_neutral | match_am | match_immediate | match_try_am);
1405 /* construct an Add with source address mode */
1406 if (am.op_type == ia32_AddrModeS) {
1407 ia32_address_t *am_addr = &am.addr;
1408 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1409 am_addr->index, am_addr->mem, am.new_op1,
1411 set_am_attributes(new_node, &am);
1412 SET_IA32_ORIG_NODE(new_node, node);
1414 new_node = fix_mem_proj(new_node, &am);
1419 /* otherwise construct a lea */
1420 new_node = create_lea_from_address(dbgi, new_block, &addr);
1421 SET_IA32_ORIG_NODE(new_node, node);
1426 * Creates an ia32 Mul.
1428 * @return the created ia32 Mul node
1430 static ir_node *gen_Mul(ir_node *node)
1432 ir_node *op1 = get_Mul_left(node);
1433 ir_node *op2 = get_Mul_right(node);
1434 ir_mode *mode = get_irn_mode(node);
1436 if (mode_is_float(mode)) {
1437 if (ia32_cg_config.use_sse2)
1438 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1439 match_commutative | match_am);
1441 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1443 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1444 match_commutative | match_am | match_mode_neutral |
1445 match_immediate | match_am_and_immediates);
1449 * Creates an ia32 Mulh.
1450 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1451 * this result while Mul returns the lower 32 bit.
1453 * @return the created ia32 Mulh node
1455 static ir_node *gen_Mulh(ir_node *node)
1457 dbg_info *dbgi = get_irn_dbg_info(node);
1458 ir_node *op1 = get_Mulh_left(node);
1459 ir_node *op2 = get_Mulh_right(node);
1460 ir_mode *mode = get_irn_mode(node);
1462 ir_node *proj_res_high;
1464 if (get_mode_size_bits(mode) != 32) {
1465 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1468 if (mode_is_signed(mode)) {
1469 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1470 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1472 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1473 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1475 return proj_res_high;
1479 * Creates an ia32 And.
1481 * @return The created ia32 And node
1483 static ir_node *gen_And(ir_node *node)
1485 ir_node *op1 = get_And_left(node);
1486 ir_node *op2 = get_And_right(node);
1487 assert(! mode_is_float(get_irn_mode(node)));
1489 /* is it a zero extension? */
1490 if (is_Const(op2)) {
1491 ir_tarval *tv = get_Const_tarval(op2);
1492 long v = get_tarval_long(tv);
1494 if (v == 0xFF || v == 0xFFFF) {
1495 dbg_info *dbgi = get_irn_dbg_info(node);
1496 ir_node *block = get_nodes_block(node);
1503 assert(v == 0xFFFF);
1506 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1511 return gen_binop(node, op1, op2, new_bd_ia32_And,
1512 match_commutative | match_mode_neutral | match_am | match_immediate);
1516 * Creates an ia32 Or.
1518 * @return The created ia32 Or node
1520 static ir_node *gen_Or(ir_node *node)
1522 ir_node *op1 = get_Or_left(node);
1523 ir_node *op2 = get_Or_right(node);
1526 res = match_64bit_shift(node);
1530 assert (! mode_is_float(get_irn_mode(node)));
1531 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1532 | match_mode_neutral | match_am | match_immediate);
1538 * Creates an ia32 Eor.
1540 * @return The created ia32 Eor node
1542 static ir_node *gen_Eor(ir_node *node)
1544 ir_node *op1 = get_Eor_left(node);
1545 ir_node *op2 = get_Eor_right(node);
1547 assert(! mode_is_float(get_irn_mode(node)));
1548 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1549 | match_mode_neutral | match_am | match_immediate);
1554 * Creates an ia32 Sub.
1556 * @return The created ia32 Sub node
1558 static ir_node *gen_Sub(ir_node *node)
1560 ir_node *op1 = get_Sub_left(node);
1561 ir_node *op2 = get_Sub_right(node);
1562 ir_mode *mode = get_irn_mode(node);
1564 if (mode_is_float(mode)) {
1565 if (ia32_cg_config.use_sse2)
1566 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1568 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1571 if (is_Const(op2)) {
1572 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1576 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1577 | match_am | match_immediate);
1580 static ir_node *transform_AM_mem(ir_node *const block,
1581 ir_node *const src_val,
1582 ir_node *const src_mem,
1583 ir_node *const am_mem)
1585 if (is_NoMem(am_mem)) {
1586 return be_transform_node(src_mem);
1587 } else if (is_Proj(src_val) &&
1589 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1590 /* avoid memory loop */
1592 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1593 ir_node *const ptr_pred = get_Proj_pred(src_val);
1594 int const arity = get_Sync_n_preds(src_mem);
1599 NEW_ARR_A(ir_node*, ins, arity + 1);
1601 /* NOTE: This sometimes produces dead-code because the old sync in
1602 * src_mem might not be used anymore, we should detect this case
1603 * and kill the sync... */
1604 for (i = arity - 1; i >= 0; --i) {
1605 ir_node *const pred = get_Sync_pred(src_mem, i);
1607 /* avoid memory loop */
1608 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1611 ins[n++] = be_transform_node(pred);
1614 if (n==1 && ins[0] == am_mem) {
1616 /* creating a new Sync and relying on CSE may fail,
1617 * if am_mem is a ProjM, which does not yet verify. */
1621 return new_r_Sync(block, n, ins);
1625 ins[0] = be_transform_node(src_mem);
1627 return new_r_Sync(block, 2, ins);
1632 * Create a 32bit to 64bit signed extension.
1634 * @param dbgi debug info
1635 * @param block the block where node nodes should be placed
1636 * @param val the value to extend
1637 * @param orig the original node
1639 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1640 ir_node *val, const ir_node *orig)
1645 if (ia32_cg_config.use_short_sex_eax) {
1646 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1647 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1649 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1650 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1652 SET_IA32_ORIG_NODE(res, orig);
1657 * Generates an ia32 Div with additional infrastructure for the
1658 * register allocator if needed.
1660 static ir_node *create_Div(ir_node *node)
1662 dbg_info *dbgi = get_irn_dbg_info(node);
1663 ir_node *block = get_nodes_block(node);
1664 ir_node *new_block = be_transform_node(block);
1665 int throws_exception = ir_throws_exception(node);
1672 ir_node *sign_extension;
1673 ia32_address_mode_t am;
1674 ia32_address_t *addr = &am.addr;
1676 /* the upper bits have random contents for smaller modes */
1677 switch (get_irn_opcode(node)) {
1679 op1 = get_Div_left(node);
1680 op2 = get_Div_right(node);
1681 mem = get_Div_mem(node);
1682 mode = get_Div_resmode(node);
1685 op1 = get_Mod_left(node);
1686 op2 = get_Mod_right(node);
1687 mem = get_Mod_mem(node);
1688 mode = get_Mod_resmode(node);
1691 panic("invalid divmod node %+F", node);
1694 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1696 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1697 is the memory of the consumed address. We can have only the second op as address
1698 in Div nodes, so check only op2. */
1699 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1701 if (mode_is_signed(mode)) {
1702 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1703 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1704 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1706 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1708 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1709 addr->index, new_mem, am.new_op2,
1710 am.new_op1, sign_extension);
1712 ir_set_throws_exception(new_node, throws_exception);
1714 set_irn_pinned(new_node, get_irn_pinned(node));
1716 set_am_attributes(new_node, &am);
1717 SET_IA32_ORIG_NODE(new_node, node);
1719 new_node = fix_mem_proj(new_node, &am);
1725 * Generates an ia32 Mod.
1727 static ir_node *gen_Mod(ir_node *node)
1729 return create_Div(node);
1733 * Generates an ia32 Div.
1735 static ir_node *gen_Div(ir_node *node)
1737 ir_mode *mode = get_Div_resmode(node);
1738 if (mode_is_float(mode)) {
1739 ir_node *op1 = get_Div_left(node);
1740 ir_node *op2 = get_Div_right(node);
1742 if (ia32_cg_config.use_sse2) {
1743 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1745 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1749 return create_Div(node);
1753 * Creates an ia32 Shl.
1755 * @return The created ia32 Shl node
1757 static ir_node *gen_Shl(ir_node *node)
1759 ir_node *left = get_Shl_left(node);
1760 ir_node *right = get_Shl_right(node);
1762 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1763 match_mode_neutral | match_immediate);
1767 * Creates an ia32 Shr.
1769 * @return The created ia32 Shr node
1771 static ir_node *gen_Shr(ir_node *node)
1773 ir_node *left = get_Shr_left(node);
1774 ir_node *right = get_Shr_right(node);
1776 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1782 * Creates an ia32 Sar.
1784 * @return The created ia32 Shrs node
1786 static ir_node *gen_Shrs(ir_node *node)
1788 ir_node *left = get_Shrs_left(node);
1789 ir_node *right = get_Shrs_right(node);
1791 if (is_Const(right)) {
1792 ir_tarval *tv = get_Const_tarval(right);
1793 long val = get_tarval_long(tv);
1795 /* this is a sign extension */
1796 dbg_info *dbgi = get_irn_dbg_info(node);
1797 ir_node *block = be_transform_node(get_nodes_block(node));
1798 ir_node *new_op = be_transform_node(left);
1800 return create_sex_32_64(dbgi, block, new_op, node);
1804 /* 8 or 16 bit sign extension? */
1805 if (is_Const(right) && is_Shl(left)) {
1806 ir_node *shl_left = get_Shl_left(left);
1807 ir_node *shl_right = get_Shl_right(left);
1808 if (is_Const(shl_right)) {
1809 ir_tarval *tv1 = get_Const_tarval(right);
1810 ir_tarval *tv2 = get_Const_tarval(shl_right);
1811 if (tv1 == tv2 && tarval_is_long(tv1)) {
1812 long val = get_tarval_long(tv1);
1813 if (val == 16 || val == 24) {
1814 dbg_info *dbgi = get_irn_dbg_info(node);
1815 ir_node *block = get_nodes_block(node);
1825 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1834 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1840 * Creates an ia32 Rol.
1842 * @param op1 The first operator
1843 * @param op2 The second operator
1844 * @return The created ia32 RotL node
1846 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1848 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1854 * Creates an ia32 Ror.
1855 * NOTE: There is no RotR with immediate because this would always be a RotL
1856 * "imm-mode_size_bits" which can be pre-calculated.
1858 * @param op1 The first operator
1859 * @param op2 The second operator
1860 * @return The created ia32 RotR node
1862 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1864 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1870 * Creates an ia32 RotR or RotL (depending on the found pattern).
1872 * @return The created ia32 RotL or RotR node
1874 static ir_node *gen_Rotl(ir_node *node)
1876 ir_node *op1 = get_Rotl_left(node);
1877 ir_node *op2 = get_Rotl_right(node);
1879 if (is_Minus(op2)) {
1880 return gen_Ror(node, op1, get_Minus_op(op2));
1883 return gen_Rol(node, op1, op2);
1889 * Transforms a Minus node.
1891 * @return The created ia32 Minus node
1893 static ir_node *gen_Minus(ir_node *node)
1895 ir_node *op = get_Minus_op(node);
1896 ir_node *block = be_transform_node(get_nodes_block(node));
1897 dbg_info *dbgi = get_irn_dbg_info(node);
1898 ir_mode *mode = get_irn_mode(node);
1903 if (mode_is_float(mode)) {
1904 ir_node *new_op = be_transform_node(op);
1905 if (ia32_cg_config.use_sse2) {
1906 /* TODO: non-optimal... if we have many xXors, then we should
1907 * rather create a load for the const and use that instead of
1908 * several AM nodes... */
1909 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1911 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1912 noreg_GP, nomem, new_op, noreg_xmm);
1914 size = get_mode_size_bits(mode);
1915 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1917 set_ia32_am_sc(new_node, ent);
1918 set_ia32_op_type(new_node, ia32_AddrModeS);
1919 set_ia32_ls_mode(new_node, mode);
1921 check_x87_floatmode(mode);
1922 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1925 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1928 SET_IA32_ORIG_NODE(new_node, node);
1934 * Transforms a Not node.
1936 * @return The created ia32 Not node
1938 static ir_node *gen_Not(ir_node *node)
1940 ir_node *op = get_Not_op(node);
1942 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1943 assert (! mode_is_float(get_irn_mode(node)));
1945 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1948 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1949 bool negate, ir_node *node)
1951 ir_node *new_block = be_transform_node(block);
1952 ir_mode *mode = get_irn_mode(op);
1953 ir_node *new_op = be_transform_node(op);
1958 assert(mode_is_float(mode));
1960 if (ia32_cg_config.use_sse2) {
1961 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1962 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1963 noreg_GP, nomem, new_op, noreg_fp);
1965 size = get_mode_size_bits(mode);
1966 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1968 set_ia32_am_sc(new_node, ent);
1970 SET_IA32_ORIG_NODE(new_node, node);
1972 set_ia32_op_type(new_node, ia32_AddrModeS);
1973 set_ia32_ls_mode(new_node, mode);
1975 /* TODO, implement -Abs case */
1978 check_x87_floatmode(mode);
1979 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1980 SET_IA32_ORIG_NODE(new_node, node);
1982 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1983 SET_IA32_ORIG_NODE(new_node, node);
1991 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1993 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1995 dbg_info *dbgi = get_irn_dbg_info(cmp);
1996 ir_node *block = get_nodes_block(cmp);
1997 ir_node *new_block = be_transform_node(block);
1998 ir_node *op1 = be_transform_node(x);
1999 ir_node *op2 = be_transform_node(n);
2001 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2004 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2006 bool overflow_possible)
2008 if (mode_is_float(mode)) {
2010 case ir_relation_equal: return ia32_cc_float_equal;
2011 case ir_relation_less: return ia32_cc_float_below;
2012 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2013 case ir_relation_greater: return ia32_cc_float_above;
2014 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2015 case ir_relation_less_greater: return ia32_cc_not_equal;
2016 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2017 case ir_relation_unordered: return ia32_cc_parity;
2018 case ir_relation_unordered_equal: return ia32_cc_equal;
2019 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2020 case ir_relation_unordered_less_equal:
2021 return ia32_cc_float_unordered_below_equal;
2022 case ir_relation_unordered_greater:
2023 return ia32_cc_float_unordered_above;
2024 case ir_relation_unordered_greater_equal:
2025 return ia32_cc_float_unordered_above_equal;
2026 case ir_relation_unordered_less_greater:
2027 return ia32_cc_float_not_equal;
2028 case ir_relation_false:
2029 case ir_relation_true:
2030 /* should we introduce a jump always/jump never? */
2033 panic("Unexpected float pnc");
2034 } else if (mode_is_signed(mode)) {
2036 case ir_relation_unordered_equal:
2037 case ir_relation_equal: return ia32_cc_equal;
2038 case ir_relation_unordered_less:
2039 case ir_relation_less:
2040 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2041 case ir_relation_unordered_less_equal:
2042 case ir_relation_less_equal: return ia32_cc_less_equal;
2043 case ir_relation_unordered_greater:
2044 case ir_relation_greater: return ia32_cc_greater;
2045 case ir_relation_unordered_greater_equal:
2046 case ir_relation_greater_equal:
2047 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2048 case ir_relation_unordered_less_greater:
2049 case ir_relation_less_greater: return ia32_cc_not_equal;
2050 case ir_relation_less_equal_greater:
2051 case ir_relation_unordered:
2052 case ir_relation_false:
2053 case ir_relation_true:
2054 /* introduce jump always/jump never? */
2057 panic("Unexpected pnc");
2060 case ir_relation_unordered_equal:
2061 case ir_relation_equal: return ia32_cc_equal;
2062 case ir_relation_unordered_less:
2063 case ir_relation_less: return ia32_cc_below;
2064 case ir_relation_unordered_less_equal:
2065 case ir_relation_less_equal: return ia32_cc_below_equal;
2066 case ir_relation_unordered_greater:
2067 case ir_relation_greater: return ia32_cc_above;
2068 case ir_relation_unordered_greater_equal:
2069 case ir_relation_greater_equal: return ia32_cc_above_equal;
2070 case ir_relation_unordered_less_greater:
2071 case ir_relation_less_greater: return ia32_cc_not_equal;
2072 case ir_relation_less_equal_greater:
2073 case ir_relation_unordered:
2074 case ir_relation_false:
2075 case ir_relation_true:
2076 /* introduce jump always/jump never? */
2079 panic("Unexpected pnc");
2083 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2085 /* must have a Cmp as input */
2086 ir_relation relation = get_Cmp_relation(cmp);
2087 ir_node *l = get_Cmp_left(cmp);
2088 ir_node *r = get_Cmp_right(cmp);
2089 ir_mode *mode = get_irn_mode(l);
2090 bool overflow_possible;
2093 /* check for bit-test */
2094 if (ia32_cg_config.use_bt
2095 && (relation == ir_relation_equal
2096 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2097 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2099 ir_node *la = get_And_left(l);
2100 ir_node *ra = get_And_right(l);
2107 ir_node *c = get_Shl_left(la);
2108 if (is_Const_1(c) && is_Const_0(r)) {
2109 /* (1 << n) & ra) */
2110 ir_node *n = get_Shl_right(la);
2111 flags = gen_bt(cmp, ra, n);
2112 /* the bit is copied into the CF flag */
2113 if (relation & ir_relation_equal)
2114 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2116 *cc_out = ia32_cc_below; /* test for CF=1 */
2122 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2123 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2124 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2125 * a predecessor node). So add the < bit.
2126 * (Note that we do not want to produce <=> (which can happen for
2127 * unoptimized code), because no x86 flag can represent that */
2128 if (!(relation & ir_relation_equal) && relation & ir_relation_less_greater)
2129 relation |= get_negated_relation(ir_get_possible_cmp_relations(l, r)) & ir_relation_less_greater;
2131 overflow_possible = true;
2132 if (is_Const(r) && is_Const_null(r))
2133 overflow_possible = false;
2135 /* just do a normal transformation of the Cmp */
2136 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2137 flags = be_transform_node(cmp);
2142 * Transforms a Load.
2144 * @return the created ia32 Load node
2146 static ir_node *gen_Load(ir_node *node)
2148 ir_node *old_block = get_nodes_block(node);
2149 ir_node *block = be_transform_node(old_block);
2150 ir_node *ptr = get_Load_ptr(node);
2151 ir_node *mem = get_Load_mem(node);
2152 ir_node *new_mem = be_transform_node(mem);
2153 dbg_info *dbgi = get_irn_dbg_info(node);
2154 ir_mode *mode = get_Load_mode(node);
2155 int throws_exception = ir_throws_exception(node);
2159 ia32_address_t addr;
2161 /* construct load address */
2162 memset(&addr, 0, sizeof(addr));
2163 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2170 base = be_transform_node(base);
2176 idx = be_transform_node(idx);
2179 if (mode_is_float(mode)) {
2180 if (ia32_cg_config.use_sse2) {
2181 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2184 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2188 assert(mode != mode_b);
2190 /* create a conv node with address mode for smaller modes */
2191 if (get_mode_size_bits(mode) < 32) {
2192 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2193 new_mem, noreg_GP, mode);
2195 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2198 ir_set_throws_exception(new_node, throws_exception);
2200 set_irn_pinned(new_node, get_irn_pinned(node));
2201 set_ia32_op_type(new_node, ia32_AddrModeS);
2202 set_ia32_ls_mode(new_node, mode);
2203 set_address(new_node, &addr);
2205 if (get_irn_pinned(node) == op_pin_state_floats) {
2206 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2207 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2208 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2209 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2212 SET_IA32_ORIG_NODE(new_node, node);
2217 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2218 ir_node *ptr, ir_node *other)
2225 /* we only use address mode if we're the only user of the load */
2226 if (get_irn_n_edges(node) > 1)
2229 load = get_Proj_pred(node);
2232 if (get_nodes_block(load) != block)
2235 /* store should have the same pointer as the load */
2236 if (get_Load_ptr(load) != ptr)
2239 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2240 if (other != NULL &&
2241 get_nodes_block(other) == block &&
2242 heights_reachable_in_block(ia32_heights, other, load)) {
2246 if (ia32_prevents_AM(block, load, mem))
2248 /* Store should be attached to the load via mem */
2249 assert(heights_reachable_in_block(ia32_heights, mem, load));
2254 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2255 ir_node *mem, ir_node *ptr, ir_mode *mode,
2256 construct_binop_dest_func *func,
2257 construct_binop_dest_func *func8bit,
2258 match_flags_t flags)
2260 ir_node *src_block = get_nodes_block(node);
2268 ia32_address_mode_t am;
2269 ia32_address_t *addr = &am.addr;
2270 memset(&am, 0, sizeof(am));
2272 assert(flags & match_immediate); /* there is no destam node without... */
2273 commutative = (flags & match_commutative) != 0;
2275 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2276 build_address(&am, op1, ia32_create_am_double_use);
2277 new_op = create_immediate_or_transform(op2, 0);
2278 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2279 build_address(&am, op2, ia32_create_am_double_use);
2280 new_op = create_immediate_or_transform(op1, 0);
2285 if (addr->base == NULL)
2286 addr->base = noreg_GP;
2287 if (addr->index == NULL)
2288 addr->index = noreg_GP;
2289 if (addr->mem == NULL)
2292 dbgi = get_irn_dbg_info(node);
2293 block = be_transform_node(src_block);
2294 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2296 if (get_mode_size_bits(mode) == 8) {
2297 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2299 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2301 set_address(new_node, addr);
2302 set_ia32_op_type(new_node, ia32_AddrModeD);
2303 set_ia32_ls_mode(new_node, mode);
2304 SET_IA32_ORIG_NODE(new_node, node);
2306 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2307 mem_proj = be_transform_node(am.mem_proj);
2308 be_set_transformed_node(am.mem_proj, new_node);
2309 be_set_transformed_node(mem_proj, new_node);
2314 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2315 ir_node *ptr, ir_mode *mode,
2316 construct_unop_dest_func *func)
2318 ir_node *src_block = get_nodes_block(node);
2324 ia32_address_mode_t am;
2325 ia32_address_t *addr = &am.addr;
2327 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2330 memset(&am, 0, sizeof(am));
2331 build_address(&am, op, ia32_create_am_double_use);
2333 dbgi = get_irn_dbg_info(node);
2334 block = be_transform_node(src_block);
2335 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2336 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2337 set_address(new_node, addr);
2338 set_ia32_op_type(new_node, ia32_AddrModeD);
2339 set_ia32_ls_mode(new_node, mode);
2340 SET_IA32_ORIG_NODE(new_node, node);
2342 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2343 mem_proj = be_transform_node(am.mem_proj);
2344 be_set_transformed_node(am.mem_proj, new_node);
2345 be_set_transformed_node(mem_proj, new_node);
2350 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2352 ir_mode *mode = get_irn_mode(node);
2353 ir_node *mux_true = get_Mux_true(node);
2354 ir_node *mux_false = get_Mux_false(node);
2362 ia32_condition_code_t cc;
2363 ia32_address_t addr;
2365 if (get_mode_size_bits(mode) != 8)
2368 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2370 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2376 cond = get_Mux_sel(node);
2377 flags = get_flags_node(cond, &cc);
2378 /* we can't handle the float special cases with SetM */
2379 if (cc & ia32_cc_additional_float_cases)
2382 cc = ia32_negate_condition_code(cc);
2384 build_address_ptr(&addr, ptr, mem);
2386 dbgi = get_irn_dbg_info(node);
2387 block = get_nodes_block(node);
2388 new_block = be_transform_node(block);
2389 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2390 addr.index, addr.mem, flags, cc);
2391 set_address(new_node, &addr);
2392 set_ia32_op_type(new_node, ia32_AddrModeD);
2393 set_ia32_ls_mode(new_node, mode);
2394 SET_IA32_ORIG_NODE(new_node, node);
2399 static ir_node *try_create_dest_am(ir_node *node)
2401 ir_node *val = get_Store_value(node);
2402 ir_node *mem = get_Store_mem(node);
2403 ir_node *ptr = get_Store_ptr(node);
2404 ir_mode *mode = get_irn_mode(val);
2405 unsigned bits = get_mode_size_bits(mode);
2410 /* handle only GP modes for now... */
2411 if (!ia32_mode_needs_gp_reg(mode))
2415 /* store must be the only user of the val node */
2416 if (get_irn_n_edges(val) > 1)
2418 /* skip pointless convs */
2420 ir_node *conv_op = get_Conv_op(val);
2421 ir_mode *pred_mode = get_irn_mode(conv_op);
2422 if (!ia32_mode_needs_gp_reg(pred_mode))
2424 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2432 /* value must be in the same block */
2433 if (get_nodes_block(node) != get_nodes_block(val))
2436 switch (get_irn_opcode(val)) {
2438 op1 = get_Add_left(val);
2439 op2 = get_Add_right(val);
2440 if (ia32_cg_config.use_incdec) {
2441 if (is_Const_1(op2)) {
2442 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2444 } else if (is_Const_Minus_1(op2)) {
2445 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2449 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2450 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2451 match_commutative | match_immediate);
2454 op1 = get_Sub_left(val);
2455 op2 = get_Sub_right(val);
2456 if (is_Const(op2)) {
2457 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2459 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2460 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2464 op1 = get_And_left(val);
2465 op2 = get_And_right(val);
2466 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2467 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2468 match_commutative | match_immediate);
2471 op1 = get_Or_left(val);
2472 op2 = get_Or_right(val);
2473 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2474 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2475 match_commutative | match_immediate);
2478 op1 = get_Eor_left(val);
2479 op2 = get_Eor_right(val);
2480 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2481 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2482 match_commutative | match_immediate);
2485 op1 = get_Shl_left(val);
2486 op2 = get_Shl_right(val);
2487 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2488 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2492 op1 = get_Shr_left(val);
2493 op2 = get_Shr_right(val);
2494 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2495 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2499 op1 = get_Shrs_left(val);
2500 op2 = get_Shrs_right(val);
2501 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2502 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2506 op1 = get_Rotl_left(val);
2507 op2 = get_Rotl_right(val);
2508 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2509 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2512 /* TODO: match ROR patterns... */
2514 new_node = try_create_SetMem(val, ptr, mem);
2518 op1 = get_Minus_op(val);
2519 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2522 /* should be lowered already */
2523 assert(mode != mode_b);
2524 op1 = get_Not_op(val);
2525 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2531 if (new_node != NULL) {
2532 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2533 get_irn_pinned(node) == op_pin_state_pinned) {
2534 set_irn_pinned(new_node, op_pin_state_pinned);
2541 static bool possible_int_mode_for_fp(ir_mode *mode)
2545 if (!mode_is_signed(mode))
2547 size = get_mode_size_bits(mode);
2548 if (size != 16 && size != 32)
2553 static int is_float_to_int_conv(const ir_node *node)
2555 ir_mode *mode = get_irn_mode(node);
2559 if (!possible_int_mode_for_fp(mode))
2564 conv_op = get_Conv_op(node);
2565 conv_mode = get_irn_mode(conv_op);
2567 if (!mode_is_float(conv_mode))
2574 * Transform a Store(floatConst) into a sequence of
2577 * @return the created ia32 Store node
2579 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2581 ir_mode *mode = get_irn_mode(cns);
2582 unsigned size = get_mode_size_bytes(mode);
2583 ir_tarval *tv = get_Const_tarval(cns);
2584 ir_node *block = get_nodes_block(node);
2585 ir_node *new_block = be_transform_node(block);
2586 ir_node *ptr = get_Store_ptr(node);
2587 ir_node *mem = get_Store_mem(node);
2588 dbg_info *dbgi = get_irn_dbg_info(node);
2591 int throws_exception = ir_throws_exception(node);
2593 ia32_address_t addr;
2595 build_address_ptr(&addr, ptr, mem);
2602 val= get_tarval_sub_bits(tv, ofs) |
2603 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2604 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2605 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2608 } else if (size >= 2) {
2609 val= get_tarval_sub_bits(tv, ofs) |
2610 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2614 panic("invalid size of Store float to mem (%+F)", node);
2616 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2618 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2619 addr.index, addr.mem, imm);
2620 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2622 ir_set_throws_exception(new_node, throws_exception);
2623 set_irn_pinned(new_node, get_irn_pinned(node));
2624 set_ia32_op_type(new_node, ia32_AddrModeD);
2625 set_ia32_ls_mode(new_node, mode);
2626 set_address(new_node, &addr);
2627 SET_IA32_ORIG_NODE(new_node, node);
2634 addr.offset += delta;
2635 } while (size != 0);
2638 return new_rd_Sync(dbgi, new_block, i, ins);
2640 return get_Proj_pred(ins[0]);
2645 * Generate a vfist or vfisttp instruction.
2647 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2648 ir_node *index, ir_node *mem, ir_node *val)
2650 if (ia32_cg_config.use_fisttp) {
2651 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2652 if other users exists */
2653 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2654 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_vfisttp_res);
2655 be_new_Keep(block, 1, &value);
2659 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2662 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2668 * Transforms a general (no special case) Store.
2670 * @return the created ia32 Store node
2672 static ir_node *gen_general_Store(ir_node *node)
2674 ir_node *val = get_Store_value(node);
2675 ir_mode *mode = get_irn_mode(val);
2676 ir_node *block = get_nodes_block(node);
2677 ir_node *new_block = be_transform_node(block);
2678 ir_node *ptr = get_Store_ptr(node);
2679 ir_node *mem = get_Store_mem(node);
2680 dbg_info *dbgi = get_irn_dbg_info(node);
2681 int throws_exception = ir_throws_exception(node);
2684 ia32_address_t addr;
2686 /* check for destination address mode */
2687 new_node = try_create_dest_am(node);
2688 if (new_node != NULL)
2691 /* construct store address */
2692 memset(&addr, 0, sizeof(addr));
2693 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2695 if (addr.base == NULL) {
2696 addr.base = noreg_GP;
2698 addr.base = be_transform_node(addr.base);
2701 if (addr.index == NULL) {
2702 addr.index = noreg_GP;
2704 addr.index = be_transform_node(addr.index);
2706 addr.mem = be_transform_node(mem);
2708 if (mode_is_float(mode)) {
2709 new_val = be_transform_node(val);
2710 if (ia32_cg_config.use_sse2) {
2711 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2712 addr.index, addr.mem, new_val);
2714 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2715 addr.index, addr.mem, new_val, mode);
2717 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2718 val = get_Conv_op(val);
2719 new_val = be_transform_node(val);
2720 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2722 unsigned dest_bits = get_mode_size_bits(mode);
2723 while (is_downconv(val)
2724 && get_mode_size_bits(get_irn_mode(val)) >= dest_bits) {
2725 val = get_Conv_op(val);
2727 new_val = create_immediate_or_transform(val, 0);
2728 assert(mode != mode_b);
2730 if (dest_bits == 8) {
2731 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2732 addr.index, addr.mem, new_val);
2734 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2735 addr.index, addr.mem, new_val);
2738 ir_set_throws_exception(new_node, throws_exception);
2740 set_irn_pinned(new_node, get_irn_pinned(node));
2741 set_ia32_op_type(new_node, ia32_AddrModeD);
2742 set_ia32_ls_mode(new_node, mode);
2744 set_address(new_node, &addr);
2745 SET_IA32_ORIG_NODE(new_node, node);
2751 * Transforms a Store.
2753 * @return the created ia32 Store node
2755 static ir_node *gen_Store(ir_node *node)
2757 ir_node *val = get_Store_value(node);
2758 ir_mode *mode = get_irn_mode(val);
2760 if (mode_is_float(mode) && is_Const(val)) {
2761 /* We can transform every floating const store
2762 into a sequence of integer stores.
2763 If the constant is already in a register,
2764 it would be better to use it, but we don't
2765 have this information here. */
2766 return gen_float_const_Store(node, val);
2768 return gen_general_Store(node);
2772 * Transforms a Switch.
2774 * @return the created ia32 SwitchJmp node
2776 static ir_node *gen_Switch(ir_node *node)
2778 dbg_info *dbgi = get_irn_dbg_info(node);
2779 ir_graph *irg = get_irn_irg(node);
2780 ir_node *block = be_transform_node(get_nodes_block(node));
2781 ir_node *sel = get_Switch_selector(node);
2782 ir_node *new_sel = be_transform_node(sel);
2783 ir_mode *sel_mode = get_irn_mode(sel);
2784 const ir_switch_table *table = get_Switch_table(node);
2785 unsigned n_outs = get_Switch_n_outs(node);
2789 assert(get_mode_size_bits(get_irn_mode(sel)) <= 32);
2790 if (get_mode_size_bits(sel_mode) != 32)
2791 new_sel = create_upconv(new_sel, sel);
2793 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2794 set_entity_visibility(entity, ir_visibility_private);
2795 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2797 table = ir_switch_table_duplicate(irg, table);
2799 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2800 set_ia32_am_scale(new_node, 2);
2801 set_ia32_am_sc(new_node, entity);
2802 set_ia32_op_type(new_node, ia32_AddrModeS);
2803 set_ia32_ls_mode(new_node, mode_Iu);
2804 SET_IA32_ORIG_NODE(new_node, node);
2805 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2806 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2812 * Transform a Cond node.
2814 static ir_node *gen_Cond(ir_node *node)
2816 ir_node *block = get_nodes_block(node);
2817 ir_node *new_block = be_transform_node(block);
2818 dbg_info *dbgi = get_irn_dbg_info(node);
2819 ir_node *sel = get_Cond_selector(node);
2820 ir_node *flags = NULL;
2822 ia32_condition_code_t cc;
2824 /* we get flags from a Cmp */
2825 flags = get_flags_node(sel, &cc);
2827 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2828 SET_IA32_ORIG_NODE(new_node, node);
2834 * Transform a be_Copy.
2836 static ir_node *gen_be_Copy(ir_node *node)
2838 ir_node *new_node = be_duplicate_node(node);
2839 ir_mode *mode = get_irn_mode(new_node);
2841 if (ia32_mode_needs_gp_reg(mode)) {
2842 set_irn_mode(new_node, mode_Iu);
2848 static ir_node *create_Fucom(ir_node *node)
2850 dbg_info *dbgi = get_irn_dbg_info(node);
2851 ir_node *block = get_nodes_block(node);
2852 ir_node *new_block = be_transform_node(block);
2853 ir_node *left = get_Cmp_left(node);
2854 ir_node *new_left = be_transform_node(left);
2855 ir_node *right = get_Cmp_right(node);
2856 ir_mode *cmp_mode = get_irn_mode(left);
2859 check_x87_floatmode(cmp_mode);
2861 if (ia32_cg_config.use_fucomi) {
2862 new_right = be_transform_node(right);
2863 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2865 set_ia32_commutative(new_node);
2866 SET_IA32_ORIG_NODE(new_node, node);
2868 if (is_Const_0(right)) {
2869 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2871 new_right = be_transform_node(right);
2872 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2873 set_ia32_commutative(new_node);
2876 SET_IA32_ORIG_NODE(new_node, node);
2878 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2879 SET_IA32_ORIG_NODE(new_node, node);
2885 static ir_node *create_Ucomi(ir_node *node)
2887 dbg_info *dbgi = get_irn_dbg_info(node);
2888 ir_node *src_block = get_nodes_block(node);
2889 ir_node *new_block = be_transform_node(src_block);
2890 ir_node *left = get_Cmp_left(node);
2891 ir_node *right = get_Cmp_right(node);
2893 ia32_address_mode_t am;
2894 ia32_address_t *addr = &am.addr;
2896 match_arguments(&am, src_block, left, right, NULL,
2897 match_commutative | match_am);
2899 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2900 addr->mem, am.new_op1, am.new_op2,
2902 set_am_attributes(new_node, &am);
2904 SET_IA32_ORIG_NODE(new_node, node);
2906 new_node = fix_mem_proj(new_node, &am);
2912 * returns true if it is assured, that the upper bits of a node are "clean"
2913 * which means for a 16 or 8 bit value, that the upper bits in the register
2914 * are 0 for unsigned and a copy of the last significant bit for signed
2917 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2919 assert(ia32_mode_needs_gp_reg(mode));
2920 if (get_mode_size_bits(mode) >= 32)
2923 if (is_Proj(transformed_node))
2924 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2926 switch (get_ia32_irn_opcode(transformed_node)) {
2927 case iro_ia32_Conv_I2I:
2928 case iro_ia32_Conv_I2I8Bit: {
2929 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2930 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2932 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2939 if (mode_is_signed(mode)) {
2940 return false; /* TODO handle signed modes */
2942 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2943 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2944 const ia32_immediate_attr_t *attr
2945 = get_ia32_immediate_attr_const(right);
2946 if (attr->symconst == 0 &&
2947 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2951 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2955 /* TODO too conservative if shift amount is constant */
2956 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2959 if (!mode_is_signed(mode)) {
2961 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2962 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2964 /* TODO if one is known to be zero extended, then || is sufficient */
2969 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2970 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2972 case iro_ia32_Const:
2973 case iro_ia32_Immediate: {
2974 const ia32_immediate_attr_t *attr =
2975 get_ia32_immediate_attr_const(transformed_node);
2976 if (mode_is_signed(mode)) {
2977 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2978 return shifted == 0 || shifted == -1;
2980 unsigned long shifted = (unsigned long)attr->offset;
2981 shifted >>= get_mode_size_bits(mode)-1;
2983 return shifted == 0;
2993 * Generate code for a Cmp.
2995 static ir_node *gen_Cmp(ir_node *node)
2997 dbg_info *dbgi = get_irn_dbg_info(node);
2998 ir_node *block = get_nodes_block(node);
2999 ir_node *new_block = be_transform_node(block);
3000 ir_node *left = get_Cmp_left(node);
3001 ir_node *right = get_Cmp_right(node);
3002 ir_mode *cmp_mode = get_irn_mode(left);
3004 ia32_address_mode_t am;
3005 ia32_address_t *addr = &am.addr;
3007 if (mode_is_float(cmp_mode)) {
3008 if (ia32_cg_config.use_sse2) {
3009 return create_Ucomi(node);
3011 return create_Fucom(node);
3015 assert(ia32_mode_needs_gp_reg(cmp_mode));
3017 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3018 if (is_Const_0(right) &&
3020 get_irn_n_edges(left) == 1) {
3021 /* Test(and_left, and_right) */
3022 ir_node *and_left = get_And_left(left);
3023 ir_node *and_right = get_And_right(left);
3025 /* matze: code here used mode instead of cmd_mode, I think it is always
3026 * the same as cmp_mode, but I leave this here to see if this is really
3029 assert(get_irn_mode(and_left) == cmp_mode);
3031 match_arguments(&am, block, and_left, and_right, NULL,
3033 match_am | match_8bit_am | match_16bit_am |
3034 match_am_and_immediates | match_immediate);
3036 /* use 32bit compare mode if possible since the opcode is smaller */
3037 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3038 upper_bits_clean(am.new_op2, cmp_mode)) {
3039 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3042 if (get_mode_size_bits(cmp_mode) == 8) {
3043 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3044 addr->index, addr->mem,
3045 am.new_op1, am.new_op2,
3048 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base,
3049 addr->index, addr->mem, am.new_op1,
3050 am.new_op2, am.ins_permuted);
3053 /* Cmp(left, right) */
3054 match_arguments(&am, block, left, right, NULL,
3055 match_commutative | match_am | match_8bit_am |
3056 match_16bit_am | match_am_and_immediates |
3058 /* use 32bit compare mode if possible since the opcode is smaller */
3059 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3060 upper_bits_clean(am.new_op2, cmp_mode)) {
3061 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3064 if (get_mode_size_bits(cmp_mode) == 8) {
3065 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3066 addr->index, addr->mem, am.new_op1,
3067 am.new_op2, am.ins_permuted);
3069 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3070 addr->mem, am.new_op1, am.new_op2,
3074 set_am_attributes(new_node, &am);
3075 set_ia32_ls_mode(new_node, cmp_mode);
3077 SET_IA32_ORIG_NODE(new_node, node);
3079 new_node = fix_mem_proj(new_node, &am);
3084 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3085 ia32_condition_code_t cc)
3087 dbg_info *dbgi = get_irn_dbg_info(node);
3088 ir_node *block = get_nodes_block(node);
3089 ir_node *new_block = be_transform_node(block);
3090 ir_node *val_true = get_Mux_true(node);
3091 ir_node *val_false = get_Mux_false(node);
3093 ia32_address_mode_t am;
3094 ia32_address_t *addr;
3096 assert(ia32_cg_config.use_cmov);
3097 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3101 match_arguments(&am, block, val_false, val_true, flags,
3102 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3104 if (am.ins_permuted)
3105 cc = ia32_negate_condition_code(cc);
3107 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3108 addr->mem, am.new_op1, am.new_op2, new_flags,
3110 set_am_attributes(new_node, &am);
3112 SET_IA32_ORIG_NODE(new_node, node);
3114 new_node = fix_mem_proj(new_node, &am);
3120 * Creates a ia32 Setcc instruction.
3122 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3123 ir_node *flags, ia32_condition_code_t cc,
3126 ir_mode *mode = get_irn_mode(orig_node);
3129 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3130 SET_IA32_ORIG_NODE(new_node, orig_node);
3132 /* we might need to conv the result up */
3133 if (get_mode_size_bits(mode) > 8) {
3134 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3135 nomem, new_node, mode_Bu);
3136 SET_IA32_ORIG_NODE(new_node, orig_node);
3143 * Create instruction for an unsigned Difference or Zero.
3145 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3147 ir_mode *mode = get_irn_mode(psi);
3157 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3158 match_mode_neutral | match_am | match_immediate | match_two_users);
3160 block = get_nodes_block(new_node);
3162 if (is_Proj(new_node)) {
3163 sub = get_Proj_pred(new_node);
3166 set_irn_mode(sub, mode_T);
3167 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3169 assert(is_ia32_Sub(sub));
3170 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3172 dbgi = get_irn_dbg_info(psi);
3173 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3174 set_ia32_ls_mode(sbb, mode_Iu);
3175 notn = new_bd_ia32_Not(dbgi, block, sbb);
3177 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3178 set_ia32_ls_mode(new_node, mode_Iu);
3179 set_ia32_commutative(new_node);
3184 * Create an const array of two float consts.
3186 * @param c0 the first constant
3187 * @param c1 the second constant
3188 * @param new_mode IN/OUT for the mode of the constants, if NULL
3189 * smallest possible mode will be used
3191 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3194 ir_mode *mode = *new_mode;
3196 ir_initializer_t *initializer;
3197 ir_tarval *tv0 = get_Const_tarval(c0);
3198 ir_tarval *tv1 = get_Const_tarval(c1);
3201 /* detect the best mode for the constants */
3202 mode = get_tarval_mode(tv0);
3204 if (mode != mode_F) {
3205 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3206 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3208 tv0 = tarval_convert_to(tv0, mode);
3209 tv1 = tarval_convert_to(tv1, mode);
3210 } else if (mode != mode_D) {
3211 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3212 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3214 tv0 = tarval_convert_to(tv0, mode);
3215 tv1 = tarval_convert_to(tv1, mode);
3222 tp = ia32_get_prim_type(mode);
3223 tp = ia32_create_float_array(tp);
3225 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3227 set_entity_ld_ident(ent, get_entity_ident(ent));
3228 set_entity_visibility(ent, ir_visibility_private);
3229 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3231 initializer = create_initializer_compound(2);
3233 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3234 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3236 set_entity_initializer(ent, initializer);
3243 * Possible transformations for creating a Setcc.
3245 enum setcc_transform_insn {
3258 typedef struct setcc_transform {
3260 ia32_condition_code_t cc;
3262 enum setcc_transform_insn transform;
3266 } setcc_transform_t;
3269 * Setcc can only handle 0 and 1 result.
3270 * Find a transformation that creates 0 and 1 from
3273 static void find_const_transform(ia32_condition_code_t cc,
3274 ir_tarval *t, ir_tarval *f,
3275 setcc_transform_t *res)
3281 if (tarval_is_null(t)) {
3285 cc = ia32_negate_condition_code(cc);
3286 } else if (tarval_cmp(t, f) == ir_relation_less) {
3287 // now, t is the bigger one
3291 cc = ia32_negate_condition_code(cc);
3295 if (! tarval_is_null(f)) {
3296 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3299 res->steps[step].transform = SETCC_TR_ADD;
3301 if (t == tarval_bad)
3302 panic("constant subtract failed");
3303 if (! tarval_is_long(f))
3304 panic("tarval is not long");
3306 res->steps[step].val = get_tarval_long(f);
3308 f = tarval_sub(f, f, NULL);
3309 assert(tarval_is_null(f));
3312 if (tarval_is_one(t)) {
3313 res->steps[step].transform = SETCC_TR_SET;
3314 res->num_steps = ++step;
3318 if (tarval_is_minus_one(t)) {
3319 res->steps[step].transform = SETCC_TR_NEG;
3321 res->steps[step].transform = SETCC_TR_SET;
3322 res->num_steps = ++step;
3325 if (tarval_is_long(t)) {
3326 long v = get_tarval_long(t);
3328 res->steps[step].val = 0;
3331 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3333 res->steps[step].transform = SETCC_TR_LEAxx;
3334 res->steps[step].scale = 3; /* (a << 3) + a */
3337 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3339 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3340 res->steps[step].scale = 3; /* (a << 3) */
3343 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3345 res->steps[step].transform = SETCC_TR_LEAxx;
3346 res->steps[step].scale = 2; /* (a << 2) + a */
3349 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3351 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3352 res->steps[step].scale = 2; /* (a << 2) */
3355 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3357 res->steps[step].transform = SETCC_TR_LEAxx;
3358 res->steps[step].scale = 1; /* (a << 1) + a */
3361 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3363 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3364 res->steps[step].scale = 1; /* (a << 1) */
3367 res->num_steps = step;
3370 if (! tarval_is_single_bit(t)) {
3371 res->steps[step].transform = SETCC_TR_AND;
3372 res->steps[step].val = v;
3374 res->steps[step].transform = SETCC_TR_NEG;
3376 int val = get_tarval_lowest_bit(t);
3379 res->steps[step].transform = SETCC_TR_SHL;
3380 res->steps[step].scale = val;
3384 res->steps[step].transform = SETCC_TR_SET;
3385 res->num_steps = ++step;
3388 panic("tarval is not long");
3392 * Transforms a Mux node into some code sequence.
3394 * @return The transformed node.
3396 static ir_node *gen_Mux(ir_node *node)
3398 dbg_info *dbgi = get_irn_dbg_info(node);
3399 ir_node *block = get_nodes_block(node);
3400 ir_node *new_block = be_transform_node(block);
3401 ir_node *mux_true = get_Mux_true(node);
3402 ir_node *mux_false = get_Mux_false(node);
3403 ir_node *sel = get_Mux_sel(node);
3404 ir_mode *mode = get_irn_mode(node);
3408 ia32_condition_code_t cc;
3410 assert(get_irn_mode(sel) == mode_b);
3412 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3414 if (ia32_mode_needs_gp_reg(mode)) {
3415 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3418 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3419 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3423 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3424 if (mode_is_float(mode)) {
3425 ir_node *cmp_left = get_Cmp_left(sel);
3426 ir_node *cmp_right = get_Cmp_right(sel);
3427 ir_relation relation = get_Cmp_relation(sel);
3429 if (ia32_cg_config.use_sse2) {
3430 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3431 if (cmp_left == mux_true && cmp_right == mux_false) {
3432 /* Mux(a <= b, a, b) => MIN */
3433 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3434 match_commutative | match_am | match_two_users);
3435 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3436 /* Mux(a <= b, b, a) => MAX */
3437 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3438 match_commutative | match_am | match_two_users);
3440 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3441 if (cmp_left == mux_true && cmp_right == mux_false) {
3442 /* Mux(a >= b, a, b) => MAX */
3443 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3444 match_commutative | match_am | match_two_users);
3445 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3446 /* Mux(a >= b, b, a) => MIN */
3447 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3448 match_commutative | match_am | match_two_users);
3453 if (is_Const(mux_true) && is_Const(mux_false)) {
3454 ia32_address_mode_t am;
3459 flags = get_flags_node(sel, &cc);
3460 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3462 if (ia32_cg_config.use_sse2) {
3463 /* cannot load from different mode on SSE */
3466 /* x87 can load any mode */
3470 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3472 if (new_mode == mode_F) {
3474 } else if (new_mode == mode_D) {
3476 } else if (new_mode == ia32_mode_E) {
3477 /* arg, shift 16 NOT supported */
3479 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3481 panic("Unsupported constant size");
3484 am.ls_mode = new_mode;
3485 am.addr.base = get_symconst_base();
3486 am.addr.index = new_node;
3487 am.addr.mem = nomem;
3489 am.addr.scale = scale;
3490 am.addr.use_frame = 0;
3491 am.addr.tls_segment = false;
3492 am.addr.frame_entity = NULL;
3493 am.addr.symconst_sign = 0;
3494 am.mem_proj = am.addr.mem;
3495 am.op_type = ia32_AddrModeS;
3498 am.pinned = op_pin_state_floats;
3500 am.ins_permuted = false;
3502 if (ia32_cg_config.use_sse2)
3503 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3505 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3506 set_am_attributes(load, &am);
3508 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3510 panic("cannot transform floating point Mux");
3513 assert(ia32_mode_needs_gp_reg(mode));
3516 ir_node *cmp_left = get_Cmp_left(sel);
3517 ir_node *cmp_right = get_Cmp_right(sel);
3518 ir_relation relation = get_Cmp_relation(sel);
3519 ir_node *val_true = mux_true;
3520 ir_node *val_false = mux_false;
3522 if (is_Const(val_true) && is_Const_null(val_true)) {
3523 ir_node *tmp = val_false;
3524 val_false = val_true;
3526 relation = get_negated_relation(relation);
3528 if (is_Const_0(val_false) && is_Sub(val_true)) {
3529 if ((relation & ir_relation_greater)
3530 && get_Sub_left(val_true) == cmp_left
3531 && get_Sub_right(val_true) == cmp_right) {
3532 return create_doz(node, cmp_left, cmp_right);
3534 if ((relation & ir_relation_less)
3535 && get_Sub_left(val_true) == cmp_right
3536 && get_Sub_right(val_true) == cmp_left) {
3537 return create_doz(node, cmp_right, cmp_left);
3542 flags = get_flags_node(sel, &cc);
3544 if (is_Const(mux_true) && is_Const(mux_false)) {
3545 /* both are const, good */
3546 ir_tarval *tv_true = get_Const_tarval(mux_true);
3547 ir_tarval *tv_false = get_Const_tarval(mux_false);
3548 setcc_transform_t res;
3551 find_const_transform(cc, tv_true, tv_false, &res);
3553 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3556 switch (res.steps[step].transform) {
3558 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3559 add_ia32_am_offs_int(new_node, res.steps[step].val);
3561 case SETCC_TR_ADDxx:
3562 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3565 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3566 set_ia32_am_scale(new_node, res.steps[step].scale);
3567 set_ia32_am_offs_int(new_node, res.steps[step].val);
3569 case SETCC_TR_LEAxx:
3570 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3571 set_ia32_am_scale(new_node, res.steps[step].scale);
3572 set_ia32_am_offs_int(new_node, res.steps[step].val);
3575 imm = ia32_immediate_from_long(res.steps[step].scale);
3576 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3579 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3582 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3585 imm = ia32_immediate_from_long(res.steps[step].val);
3586 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3589 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3592 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3595 panic("unknown setcc transform");
3599 new_node = create_CMov(node, sel, flags, cc);
3606 * Create a conversion from x87 state register to general purpose.
3608 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3610 ir_node *block = be_transform_node(get_nodes_block(node));
3611 ir_node *op = get_Conv_op(node);
3612 ir_node *new_op = be_transform_node(op);
3613 ir_graph *irg = current_ir_graph;
3614 dbg_info *dbgi = get_irn_dbg_info(node);
3615 ir_mode *mode = get_irn_mode(node);
3616 ir_node *frame = get_irg_frame(irg);
3617 ir_node *fist, *load, *mem;
3619 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3620 set_irn_pinned(fist, op_pin_state_floats);
3621 set_ia32_use_frame(fist);
3622 set_ia32_op_type(fist, ia32_AddrModeD);
3624 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3625 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3627 assert(get_mode_size_bits(mode) <= 32);
3628 /* exception we can only store signed 32 bit integers, so for unsigned
3629 we store a 64bit (signed) integer and load the lower bits */
3630 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3631 set_ia32_ls_mode(fist, mode_Ls);
3633 set_ia32_ls_mode(fist, mode_Is);
3635 SET_IA32_ORIG_NODE(fist, node);
3638 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3640 set_irn_pinned(load, op_pin_state_floats);
3641 set_ia32_use_frame(load);
3642 set_ia32_op_type(load, ia32_AddrModeS);
3643 set_ia32_ls_mode(load, mode_Is);
3644 if (get_ia32_ls_mode(fist) == mode_Ls) {
3645 ia32_attr_t *attr = get_ia32_attr(load);
3646 attr->data.need_64bit_stackent = 1;
3648 ia32_attr_t *attr = get_ia32_attr(load);
3649 attr->data.need_32bit_stackent = 1;
3651 SET_IA32_ORIG_NODE(load, node);
3653 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3657 * Creates a x87 Conv by placing a Store and a Load
3659 static ir_node *gen_x87_conv(ir_mode *tgt_mode, ir_node *node)
3661 ir_node *block = get_nodes_block(node);
3662 ir_graph *irg = get_Block_irg(block);
3663 dbg_info *dbgi = get_irn_dbg_info(node);
3664 ir_node *frame = get_irg_frame(irg);
3666 ir_node *store, *load;
3669 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3670 set_ia32_use_frame(store);
3671 set_ia32_op_type(store, ia32_AddrModeD);
3672 SET_IA32_ORIG_NODE(store, node);
3674 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3676 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3677 set_ia32_use_frame(load);
3678 set_ia32_op_type(load, ia32_AddrModeS);
3679 SET_IA32_ORIG_NODE(load, node);
3681 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_vfld_res);
3685 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3686 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3688 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3690 func = get_mode_size_bits(mode) == 8 ?
3691 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3692 return func(dbgi, block, base, index, mem, val, mode);
3696 * Create a conversion from general purpose to x87 register
3698 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3700 ir_node *src_block = get_nodes_block(node);
3701 ir_node *block = be_transform_node(src_block);
3702 ir_graph *irg = get_Block_irg(block);
3703 dbg_info *dbgi = get_irn_dbg_info(node);
3704 ir_node *op = get_Conv_op(node);
3705 ir_node *new_op = NULL;
3707 ir_mode *store_mode;
3713 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3714 if (possible_int_mode_for_fp(src_mode)) {
3715 ia32_address_mode_t am;
3717 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3718 if (am.op_type == ia32_AddrModeS) {
3719 ia32_address_t *addr = &am.addr;
3721 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3722 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3724 set_am_attributes(fild, &am);
3725 SET_IA32_ORIG_NODE(fild, node);
3727 fix_mem_proj(fild, &am);
3732 if (new_op == NULL) {
3733 new_op = be_transform_node(op);
3736 mode = get_irn_mode(op);
3738 /* first convert to 32 bit signed if necessary */
3739 if (get_mode_size_bits(src_mode) < 32) {
3740 if (!upper_bits_clean(new_op, src_mode)) {
3741 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3742 SET_IA32_ORIG_NODE(new_op, node);
3747 assert(get_mode_size_bits(mode) == 32);
3750 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3752 set_ia32_use_frame(store);
3753 set_ia32_op_type(store, ia32_AddrModeD);
3754 set_ia32_ls_mode(store, mode_Iu);
3756 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3758 /* exception for 32bit unsigned, do a 64bit spill+load */
3759 if (!mode_is_signed(mode)) {
3762 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3764 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3765 noreg_GP, nomem, zero_const);
3766 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3768 set_ia32_use_frame(zero_store);
3769 set_ia32_op_type(zero_store, ia32_AddrModeD);
3770 add_ia32_am_offs_int(zero_store, 4);
3771 set_ia32_ls_mode(zero_store, mode_Iu);
3773 in[0] = zero_store_mem;
3776 store_mem = new_rd_Sync(dbgi, block, 2, in);
3777 store_mode = mode_Ls;
3779 store_mode = mode_Is;
3783 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3785 set_ia32_use_frame(fild);
3786 set_ia32_op_type(fild, ia32_AddrModeS);
3787 set_ia32_ls_mode(fild, store_mode);
3789 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3795 * Create a conversion from one integer mode into another one
3797 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3798 dbg_info *dbgi, ir_node *block, ir_node *op,
3801 ir_node *new_block = be_transform_node(block);
3803 ir_mode *smaller_mode;
3804 ia32_address_mode_t am;
3805 ia32_address_t *addr = &am.addr;
3808 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3809 smaller_mode = src_mode;
3811 smaller_mode = tgt_mode;
3814 #ifdef DEBUG_libfirm
3816 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3821 match_arguments(&am, block, NULL, op, NULL,
3822 match_am | match_8bit_am | match_16bit_am);
3824 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3825 /* unnecessary conv. in theory it shouldn't have been AM */
3826 assert(is_ia32_NoReg_GP(addr->base));
3827 assert(is_ia32_NoReg_GP(addr->index));
3828 assert(is_NoMem(addr->mem));
3829 assert(am.addr.offset == 0);
3830 assert(am.addr.symconst_ent == NULL);
3834 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3835 addr->mem, am.new_op2, smaller_mode);
3836 set_am_attributes(new_node, &am);
3837 /* match_arguments assume that out-mode = in-mode, this isn't true here
3839 set_ia32_ls_mode(new_node, smaller_mode);
3840 SET_IA32_ORIG_NODE(new_node, node);
3841 new_node = fix_mem_proj(new_node, &am);
3846 * Transforms a Conv node.
3848 * @return The created ia32 Conv node
3850 static ir_node *gen_Conv(ir_node *node)
3852 ir_node *block = get_nodes_block(node);
3853 ir_node *new_block = be_transform_node(block);
3854 ir_node *op = get_Conv_op(node);
3855 ir_node *new_op = NULL;
3856 dbg_info *dbgi = get_irn_dbg_info(node);
3857 ir_mode *src_mode = get_irn_mode(op);
3858 ir_mode *tgt_mode = get_irn_mode(node);
3859 int src_bits = get_mode_size_bits(src_mode);
3860 int tgt_bits = get_mode_size_bits(tgt_mode);
3861 ir_node *res = NULL;
3863 assert(!mode_is_int(src_mode) || src_bits <= 32);
3864 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3866 /* modeB -> X should already be lowered by the lower_mode_b pass */
3867 if (src_mode == mode_b) {
3868 panic("ConvB not lowered %+F", node);
3871 if (src_mode == tgt_mode) {
3872 /* this should be optimized already, but who knows... */
3873 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3874 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3875 return be_transform_node(op);
3878 if (mode_is_float(src_mode)) {
3879 new_op = be_transform_node(op);
3880 /* we convert from float ... */
3881 if (mode_is_float(tgt_mode)) {
3883 if (ia32_cg_config.use_sse2) {
3884 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3885 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3887 set_ia32_ls_mode(res, tgt_mode);
3889 if (src_bits < tgt_bits) {
3890 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3893 res = gen_x87_conv(tgt_mode, new_op);
3894 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3900 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3901 if (ia32_cg_config.use_sse2) {
3902 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3904 set_ia32_ls_mode(res, src_mode);
3906 return gen_x87_fp_to_gp(node);
3910 /* we convert from int ... */
3911 if (mode_is_float(tgt_mode)) {
3913 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3914 if (ia32_cg_config.use_sse2) {
3915 new_op = be_transform_node(op);
3916 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3918 set_ia32_ls_mode(res, tgt_mode);
3920 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3921 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3922 res = gen_x87_gp_to_fp(node, src_mode);
3924 /* we need a float-conv, if the int mode has more bits than the
3926 if (float_mantissa < int_mantissa) {
3927 res = gen_x87_conv(tgt_mode, res);
3928 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3932 } else if (tgt_mode == mode_b) {
3933 /* mode_b lowering already took care that we only have 0/1 values */
3934 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3935 src_mode, tgt_mode));
3936 return be_transform_node(op);
3939 if (src_bits == tgt_bits) {
3940 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3941 src_mode, tgt_mode));
3942 return be_transform_node(op);
3945 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3953 static ir_node *create_immediate_or_transform(ir_node *node,
3954 char immediate_constraint_type)
3956 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3957 if (new_node == NULL) {
3958 new_node = be_transform_node(node);
3964 * Transforms a FrameAddr into an ia32 Add.
3966 static ir_node *gen_be_FrameAddr(ir_node *node)
3968 ir_node *block = be_transform_node(get_nodes_block(node));
3969 ir_node *op = be_get_FrameAddr_frame(node);
3970 ir_node *new_op = be_transform_node(op);
3971 dbg_info *dbgi = get_irn_dbg_info(node);
3974 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3975 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3976 set_ia32_use_frame(new_node);
3978 SET_IA32_ORIG_NODE(new_node, node);
3984 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3986 static ir_node *gen_be_Return(ir_node *node)
3988 ir_graph *irg = current_ir_graph;
3989 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
3990 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
3991 ir_node *new_ret_val = be_transform_node(ret_val);
3992 ir_node *new_ret_mem = be_transform_node(ret_mem);
3993 ir_entity *ent = get_irg_entity(irg);
3994 ir_type *tp = get_entity_type(ent);
3995 dbg_info *dbgi = get_irn_dbg_info(node);
3996 ir_node *block = be_transform_node(get_nodes_block(node));
4010 assert(ret_val != NULL);
4011 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4012 return be_duplicate_node(node);
4015 res_type = get_method_res_type(tp, 0);
4017 if (! is_Primitive_type(res_type)) {
4018 return be_duplicate_node(node);
4021 mode = get_type_mode(res_type);
4022 if (! mode_is_float(mode)) {
4023 return be_duplicate_node(node);
4026 assert(get_method_n_ress(tp) == 1);
4028 frame = get_irg_frame(irg);
4030 /* store xmm0 onto stack */
4031 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4032 new_ret_mem, new_ret_val);
4033 set_ia32_ls_mode(sse_store, mode);
4034 set_ia32_op_type(sse_store, ia32_AddrModeD);
4035 set_ia32_use_frame(sse_store);
4036 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4038 /* load into x87 register */
4039 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4040 set_ia32_op_type(fld, ia32_AddrModeS);
4041 set_ia32_use_frame(fld);
4043 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4044 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4046 /* create a new return */
4047 arity = get_irn_arity(node);
4048 in = ALLOCAN(ir_node*, arity);
4049 pop = be_Return_get_pop(node);
4050 for (i = 0; i < arity; ++i) {
4051 ir_node *op = get_irn_n(node, i);
4052 if (op == ret_val) {
4054 } else if (op == ret_mem) {
4057 in[i] = be_transform_node(op);
4060 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4061 copy_node_attr(irg, node, new_node);
4067 * Transform a be_AddSP into an ia32_SubSP.
4069 static ir_node *gen_be_AddSP(ir_node *node)
4071 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4072 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4074 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4075 match_am | match_immediate);
4076 assert(is_ia32_SubSP(new_node));
4077 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4078 &ia32_registers[REG_ESP]);
4083 * Transform a be_SubSP into an ia32_AddSP
4085 static ir_node *gen_be_SubSP(ir_node *node)
4087 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4088 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4090 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4091 match_am | match_immediate);
4092 assert(is_ia32_AddSP(new_node));
4093 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4094 &ia32_registers[REG_ESP]);
4099 * Change some phi modes
4101 static ir_node *gen_Phi(ir_node *node)
4103 const arch_register_req_t *req;
4104 ir_node *block = be_transform_node(get_nodes_block(node));
4105 ir_graph *irg = current_ir_graph;
4106 dbg_info *dbgi = get_irn_dbg_info(node);
4107 ir_mode *mode = get_irn_mode(node);
4110 if (ia32_mode_needs_gp_reg(mode)) {
4111 /* we shouldn't have any 64bit stuff around anymore */
4112 assert(get_mode_size_bits(mode) <= 32);
4113 /* all integer operations are on 32bit registers now */
4115 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4116 } else if (mode_is_float(mode)) {
4117 if (ia32_cg_config.use_sse2) {
4119 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4122 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4125 req = arch_no_register_req;
4128 /* phi nodes allow loops, so we use the old arguments for now
4129 * and fix this later */
4130 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4131 get_irn_in(node) + 1);
4132 copy_node_attr(irg, node, phi);
4133 be_duplicate_deps(node, phi);
4135 arch_set_irn_register_req_out(phi, 0, req);
4137 be_enqueue_preds(node);
4142 static ir_node *gen_Jmp(ir_node *node)
4144 ir_node *block = get_nodes_block(node);
4145 ir_node *new_block = be_transform_node(block);
4146 dbg_info *dbgi = get_irn_dbg_info(node);
4149 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4150 SET_IA32_ORIG_NODE(new_node, node);
4158 static ir_node *gen_IJmp(ir_node *node)
4160 ir_node *block = get_nodes_block(node);
4161 ir_node *new_block = be_transform_node(block);
4162 dbg_info *dbgi = get_irn_dbg_info(node);
4163 ir_node *op = get_IJmp_target(node);
4165 ia32_address_mode_t am;
4166 ia32_address_t *addr = &am.addr;
4168 assert(get_irn_mode(op) == mode_P);
4170 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4172 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4173 addr->mem, am.new_op2);
4174 set_am_attributes(new_node, &am);
4175 SET_IA32_ORIG_NODE(new_node, node);
4177 new_node = fix_mem_proj(new_node, &am);
4182 static ir_node *gen_ia32_l_Add(ir_node *node)
4184 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4185 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4186 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4187 match_commutative | match_am | match_immediate |
4188 match_mode_neutral);
4190 if (is_Proj(lowered)) {
4191 lowered = get_Proj_pred(lowered);
4193 assert(is_ia32_Add(lowered));
4194 set_irn_mode(lowered, mode_T);
4200 static ir_node *gen_ia32_l_Adc(ir_node *node)
4202 return gen_binop_flags(node, new_bd_ia32_Adc,
4203 match_commutative | match_am | match_immediate |
4204 match_mode_neutral);
4208 * Transforms a l_MulS into a "real" MulS node.
4210 * @return the created ia32 Mul node
4212 static ir_node *gen_ia32_l_Mul(ir_node *node)
4214 ir_node *left = get_binop_left(node);
4215 ir_node *right = get_binop_right(node);
4217 return gen_binop(node, left, right, new_bd_ia32_Mul,
4218 match_commutative | match_am | match_mode_neutral);
4222 * Transforms a l_IMulS into a "real" IMul1OPS node.
4224 * @return the created ia32 IMul1OP node
4226 static ir_node *gen_ia32_l_IMul(ir_node *node)
4228 ir_node *left = get_binop_left(node);
4229 ir_node *right = get_binop_right(node);
4231 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4232 match_commutative | match_am | match_mode_neutral);
4235 static ir_node *gen_ia32_l_Sub(ir_node *node)
4237 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4238 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4239 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4240 match_am | match_immediate | match_mode_neutral);
4242 if (is_Proj(lowered)) {
4243 lowered = get_Proj_pred(lowered);
4245 assert(is_ia32_Sub(lowered));
4246 set_irn_mode(lowered, mode_T);
4252 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4254 return gen_binop_flags(node, new_bd_ia32_Sbb,
4255 match_am | match_immediate | match_mode_neutral);
4258 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4260 ir_node *src_block = get_nodes_block(node);
4261 ir_node *block = be_transform_node(src_block);
4262 ir_graph *irg = current_ir_graph;
4263 dbg_info *dbgi = get_irn_dbg_info(node);
4264 ir_node *frame = get_irg_frame(irg);
4265 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4266 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4267 ir_node *new_val_low = be_transform_node(val_low);
4268 ir_node *new_val_high = be_transform_node(val_high);
4270 ir_node *sync, *fild, *res;
4272 ir_node *store_high;
4276 if (ia32_cg_config.use_sse2) {
4277 panic("not implemented for SSE2");
4281 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4283 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4285 SET_IA32_ORIG_NODE(store_low, node);
4286 SET_IA32_ORIG_NODE(store_high, node);
4288 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4289 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4291 set_ia32_use_frame(store_low);
4292 set_ia32_use_frame(store_high);
4293 set_ia32_op_type(store_low, ia32_AddrModeD);
4294 set_ia32_op_type(store_high, ia32_AddrModeD);
4295 set_ia32_ls_mode(store_low, mode_Iu);
4296 set_ia32_ls_mode(store_high, mode_Is);
4297 add_ia32_am_offs_int(store_high, 4);
4301 sync = new_rd_Sync(dbgi, block, 2, in);
4304 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4306 set_ia32_use_frame(fild);
4307 set_ia32_op_type(fild, ia32_AddrModeS);
4308 set_ia32_ls_mode(fild, mode_Ls);
4310 SET_IA32_ORIG_NODE(fild, node);
4312 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4314 if (! mode_is_signed(get_irn_mode(val_high))) {
4315 ia32_address_mode_t am;
4317 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4320 am.addr.base = get_symconst_base();
4321 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4322 am.addr.mem = nomem;
4325 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4326 am.addr.tls_segment = false;
4327 am.addr.use_frame = 0;
4328 am.addr.frame_entity = NULL;
4329 am.addr.symconst_sign = 0;
4330 am.ls_mode = mode_F;
4331 am.mem_proj = nomem;
4332 am.op_type = ia32_AddrModeS;
4334 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4335 am.pinned = op_pin_state_floats;
4337 am.ins_permuted = false;
4339 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4340 am.new_op1, am.new_op2, get_fpcw());
4341 set_am_attributes(fadd, &am);
4343 set_irn_mode(fadd, mode_T);
4344 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4349 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4351 ir_node *src_block = get_nodes_block(node);
4352 ir_node *block = be_transform_node(src_block);
4353 ir_graph *irg = get_Block_irg(block);
4354 dbg_info *dbgi = get_irn_dbg_info(node);
4355 ir_node *frame = get_irg_frame(irg);
4356 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4357 ir_node *new_val = be_transform_node(val);
4360 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4361 SET_IA32_ORIG_NODE(fist, node);
4362 set_ia32_use_frame(fist);
4363 set_ia32_op_type(fist, ia32_AddrModeD);
4364 set_ia32_ls_mode(fist, mode_Ls);
4366 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4367 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4370 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4372 ir_node *block = be_transform_node(get_nodes_block(node));
4373 ir_graph *irg = get_Block_irg(block);
4374 ir_node *pred = get_Proj_pred(node);
4375 ir_node *new_pred = be_transform_node(pred);
4376 ir_node *frame = get_irg_frame(irg);
4377 dbg_info *dbgi = get_irn_dbg_info(node);
4378 long pn = get_Proj_proj(node);
4383 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4384 SET_IA32_ORIG_NODE(load, node);
4385 set_ia32_use_frame(load);
4386 set_ia32_op_type(load, ia32_AddrModeS);
4387 set_ia32_ls_mode(load, mode_Iu);
4388 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4389 * 32 bit from it with this particular load */
4390 attr = get_ia32_attr(load);
4391 attr->data.need_64bit_stackent = 1;
4393 if (pn == pn_ia32_l_FloattoLL_res_high) {
4394 add_ia32_am_offs_int(load, 4);
4396 assert(pn == pn_ia32_l_FloattoLL_res_low);
4399 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4405 * Transform the Projs of an AddSP.
4407 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4409 ir_node *pred = get_Proj_pred(node);
4410 ir_node *new_pred = be_transform_node(pred);
4411 dbg_info *dbgi = get_irn_dbg_info(node);
4412 long proj = get_Proj_proj(node);
4414 if (proj == pn_be_AddSP_sp) {
4415 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4416 pn_ia32_SubSP_stack);
4417 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4419 } else if (proj == pn_be_AddSP_res) {
4420 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4421 pn_ia32_SubSP_addr);
4422 } else if (proj == pn_be_AddSP_M) {
4423 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4426 panic("No idea how to transform proj->AddSP");
4430 * Transform the Projs of a SubSP.
4432 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4434 ir_node *pred = get_Proj_pred(node);
4435 ir_node *new_pred = be_transform_node(pred);
4436 dbg_info *dbgi = get_irn_dbg_info(node);
4437 long proj = get_Proj_proj(node);
4439 if (proj == pn_be_SubSP_sp) {
4440 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4441 pn_ia32_AddSP_stack);
4442 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4444 } else if (proj == pn_be_SubSP_M) {
4445 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4448 panic("No idea how to transform proj->SubSP");
4452 * Transform and renumber the Projs from a Load.
4454 static ir_node *gen_Proj_Load(ir_node *node)
4457 ir_node *pred = get_Proj_pred(node);
4458 dbg_info *dbgi = get_irn_dbg_info(node);
4459 long proj = get_Proj_proj(node);
4461 /* loads might be part of source address mode matches, so we don't
4462 * transform the ProjMs yet (with the exception of loads whose result is
4465 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4468 /* this is needed, because sometimes we have loops that are only
4469 reachable through the ProjM */
4470 be_enqueue_preds(node);
4471 /* do it in 2 steps, to silence firm verifier */
4472 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4473 set_Proj_proj(res, pn_ia32_mem);
4477 /* renumber the proj */
4478 new_pred = be_transform_node(pred);
4479 if (is_ia32_Load(new_pred)) {
4480 switch ((pn_Load)proj) {
4482 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4484 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4485 case pn_Load_X_except:
4486 /* This Load might raise an exception. Mark it. */
4487 set_ia32_exc_label(new_pred, 1);
4488 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4489 case pn_Load_X_regular:
4490 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4492 } else if (is_ia32_Conv_I2I(new_pred) ||
4493 is_ia32_Conv_I2I8Bit(new_pred)) {
4494 set_irn_mode(new_pred, mode_T);
4495 switch ((pn_Load)proj) {
4497 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4499 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4500 case pn_Load_X_except:
4501 /* This Load might raise an exception. Mark it. */
4502 set_ia32_exc_label(new_pred, 1);
4503 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4504 case pn_Load_X_regular:
4505 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4507 } else if (is_ia32_xLoad(new_pred)) {
4508 switch ((pn_Load)proj) {
4510 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4512 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4513 case pn_Load_X_except:
4514 /* This Load might raise an exception. Mark it. */
4515 set_ia32_exc_label(new_pred, 1);
4516 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4517 case pn_Load_X_regular:
4518 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4520 } else if (is_ia32_vfld(new_pred)) {
4521 switch ((pn_Load)proj) {
4523 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4525 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4526 case pn_Load_X_except:
4527 /* This Load might raise an exception. Mark it. */
4528 set_ia32_exc_label(new_pred, 1);
4529 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4530 case pn_Load_X_regular:
4531 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4534 /* can happen for ProJMs when source address mode happened for the
4537 /* however it should not be the result proj, as that would mean the
4538 load had multiple users and should not have been used for
4540 if (proj != pn_Load_M) {
4541 panic("internal error: transformed node not a Load");
4543 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4546 panic("No idea how to transform Proj(Load) %+F", node);
4549 static ir_node *gen_Proj_Store(ir_node *node)
4551 ir_node *pred = get_Proj_pred(node);
4552 ir_node *new_pred = be_transform_node(pred);
4553 dbg_info *dbgi = get_irn_dbg_info(node);
4554 long pn = get_Proj_proj(node);
4556 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4557 switch ((pn_Store)pn) {
4559 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4560 case pn_Store_X_except:
4561 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4562 case pn_Store_X_regular:
4563 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4565 } else if (is_ia32_vfist(new_pred)) {
4566 switch ((pn_Store)pn) {
4568 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4569 case pn_Store_X_except:
4570 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4571 case pn_Store_X_regular:
4572 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4574 } else if (is_ia32_vfisttp(new_pred)) {
4575 switch ((pn_Store)pn) {
4577 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4578 case pn_Store_X_except:
4579 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4580 case pn_Store_X_regular:
4581 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4583 } else if (is_ia32_vfst(new_pred)) {
4584 switch ((pn_Store)pn) {
4586 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4587 case pn_Store_X_except:
4588 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4589 case pn_Store_X_regular:
4590 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4592 } else if (is_ia32_xStore(new_pred)) {
4593 switch ((pn_Store)pn) {
4595 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4596 case pn_Store_X_except:
4597 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4598 case pn_Store_X_regular:
4599 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4601 } else if (is_Sync(new_pred)) {
4602 /* hack for the case that gen_float_const_Store produced a Sync */
4603 if (pn == pn_Store_M) {
4606 panic("exception control flow not implemented yet");
4607 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4608 /* destination address mode */
4609 if (pn == pn_Store_M) {
4612 panic("exception control flow for destination AM not implemented yet");
4615 panic("No idea how to transform Proj(Store) %+F", node);
4619 * Transform and renumber the Projs from a Div or Mod instruction.
4621 static ir_node *gen_Proj_Div(ir_node *node)
4623 ir_node *pred = get_Proj_pred(node);
4624 ir_node *new_pred = be_transform_node(pred);
4625 dbg_info *dbgi = get_irn_dbg_info(node);
4626 long proj = get_Proj_proj(node);
4628 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4629 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4631 switch ((pn_Div)proj) {
4633 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4634 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4635 } else if (is_ia32_xDiv(new_pred)) {
4636 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4637 } else if (is_ia32_vfdiv(new_pred)) {
4638 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4640 panic("Div transformed to unexpected thing %+F", new_pred);
4643 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4644 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4645 } else if (is_ia32_xDiv(new_pred)) {
4646 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4647 } else if (is_ia32_vfdiv(new_pred)) {
4648 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4650 panic("Div transformed to unexpected thing %+F", new_pred);
4652 case pn_Div_X_except:
4653 set_ia32_exc_label(new_pred, 1);
4654 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4655 case pn_Div_X_regular:
4656 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4659 panic("No idea how to transform proj->Div");
4663 * Transform and renumber the Projs from a Div or Mod instruction.
4665 static ir_node *gen_Proj_Mod(ir_node *node)
4667 ir_node *pred = get_Proj_pred(node);
4668 ir_node *new_pred = be_transform_node(pred);
4669 dbg_info *dbgi = get_irn_dbg_info(node);
4670 long proj = get_Proj_proj(node);
4672 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4673 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4674 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4676 switch ((pn_Mod)proj) {
4678 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4680 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4681 case pn_Mod_X_except:
4682 set_ia32_exc_label(new_pred, 1);
4683 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4684 case pn_Mod_X_regular:
4685 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4687 panic("No idea how to transform proj->Mod");
4691 * Transform and renumber the Projs from a CopyB.
4693 static ir_node *gen_Proj_CopyB(ir_node *node)
4695 ir_node *pred = get_Proj_pred(node);
4696 ir_node *new_pred = be_transform_node(pred);
4697 dbg_info *dbgi = get_irn_dbg_info(node);
4698 long proj = get_Proj_proj(node);
4700 switch ((pn_CopyB)proj) {
4702 if (is_ia32_CopyB_i(new_pred)) {
4703 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4704 } else if (is_ia32_CopyB(new_pred)) {
4705 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4708 case pn_CopyB_X_regular:
4709 if (is_ia32_CopyB_i(new_pred)) {
4710 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4711 } else if (is_ia32_CopyB(new_pred)) {
4712 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4715 case pn_CopyB_X_except:
4716 if (is_ia32_CopyB_i(new_pred)) {
4717 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4718 } else if (is_ia32_CopyB(new_pred)) {
4719 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4724 panic("No idea how to transform proj->CopyB");
4727 static ir_node *gen_be_Call(ir_node *node)
4729 dbg_info *const dbgi = get_irn_dbg_info(node);
4730 ir_node *const src_block = get_nodes_block(node);
4731 ir_node *const block = be_transform_node(src_block);
4732 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4733 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4734 ir_node *const sp = be_transform_node(src_sp);
4735 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4736 ia32_address_mode_t am;
4737 ia32_address_t *const addr = &am.addr;
4742 ir_node * eax = noreg_GP;
4743 ir_node * ecx = noreg_GP;
4744 ir_node * edx = noreg_GP;
4745 unsigned const pop = be_Call_get_pop(node);
4746 ir_type *const call_tp = be_Call_get_type(node);
4747 int old_no_pic_adjust;
4748 int throws_exception = ir_throws_exception(node);
4750 /* Run the x87 simulator if the call returns a float value */
4751 if (get_method_n_ress(call_tp) > 0) {
4752 ir_type *const res_type = get_method_res_type(call_tp, 0);
4753 ir_mode *const res_mode = get_type_mode(res_type);
4755 if (res_mode != NULL && mode_is_float(res_mode)) {
4756 ir_graph *irg = current_ir_graph;
4757 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4758 irg_data->do_x87_sim = 1;
4762 /* We do not want be_Call direct calls */
4763 assert(be_Call_get_entity(node) == NULL);
4765 /* special case for PIC trampoline calls */
4766 old_no_pic_adjust = ia32_no_pic_adjust;
4767 ia32_no_pic_adjust = be_options.pic;
4769 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4770 match_am | match_immediate);
4772 ia32_no_pic_adjust = old_no_pic_adjust;
4774 i = get_irn_arity(node) - 1;
4775 fpcw = be_transform_node(get_irn_n(node, i--));
4776 for (; i >= n_be_Call_first_arg; --i) {
4777 arch_register_req_t const *const req
4778 = arch_get_irn_register_req_in(node, i);
4779 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4781 assert(req->type == arch_register_req_type_limited);
4782 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4784 switch (*req->limited) {
4785 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4786 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4787 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4788 default: panic("Invalid GP register for register parameter");
4792 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4793 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4794 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4795 ir_set_throws_exception(call, throws_exception);
4796 set_am_attributes(call, &am);
4797 call = fix_mem_proj(call, &am);
4799 if (get_irn_pinned(node) == op_pin_state_pinned)
4800 set_irn_pinned(call, op_pin_state_pinned);
4802 SET_IA32_ORIG_NODE(call, node);
4804 if (ia32_cg_config.use_sse2) {
4805 /* remember this call for post-processing */
4806 ARR_APP1(ir_node *, call_list, call);
4807 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4814 * Transform Builtin trap
4816 static ir_node *gen_trap(ir_node *node)
4818 dbg_info *dbgi = get_irn_dbg_info(node);
4819 ir_node *block = be_transform_node(get_nodes_block(node));
4820 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4822 return new_bd_ia32_UD2(dbgi, block, mem);
4826 * Transform Builtin debugbreak
4828 static ir_node *gen_debugbreak(ir_node *node)
4830 dbg_info *dbgi = get_irn_dbg_info(node);
4831 ir_node *block = be_transform_node(get_nodes_block(node));
4832 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4834 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4838 * Transform Builtin return_address
4840 static ir_node *gen_return_address(ir_node *node)
4842 ir_node *param = get_Builtin_param(node, 0);
4843 ir_node *frame = get_Builtin_param(node, 1);
4844 dbg_info *dbgi = get_irn_dbg_info(node);
4845 ir_tarval *tv = get_Const_tarval(param);
4846 ir_graph *irg = get_irn_irg(node);
4847 unsigned long value = get_tarval_long(tv);
4849 ir_node *block = be_transform_node(get_nodes_block(node));
4850 ir_node *ptr = be_transform_node(frame);
4854 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4855 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4856 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4859 /* load the return address from this frame */
4860 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4862 set_irn_pinned(load, get_irn_pinned(node));
4863 set_ia32_op_type(load, ia32_AddrModeS);
4864 set_ia32_ls_mode(load, mode_Iu);
4866 set_ia32_am_offs_int(load, 0);
4867 set_ia32_use_frame(load);
4868 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4870 if (get_irn_pinned(node) == op_pin_state_floats) {
4871 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4872 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4873 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4874 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4877 SET_IA32_ORIG_NODE(load, node);
4878 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4882 * Transform Builtin frame_address
4884 static ir_node *gen_frame_address(ir_node *node)
4886 ir_node *param = get_Builtin_param(node, 0);
4887 ir_node *frame = get_Builtin_param(node, 1);
4888 dbg_info *dbgi = get_irn_dbg_info(node);
4889 ir_tarval *tv = get_Const_tarval(param);
4890 ir_graph *irg = get_irn_irg(node);
4891 unsigned long value = get_tarval_long(tv);
4893 ir_node *block = be_transform_node(get_nodes_block(node));
4894 ir_node *ptr = be_transform_node(frame);
4899 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4900 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4901 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4904 /* load the frame address from this frame */
4905 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4907 set_irn_pinned(load, get_irn_pinned(node));
4908 set_ia32_op_type(load, ia32_AddrModeS);
4909 set_ia32_ls_mode(load, mode_Iu);
4911 ent = ia32_get_frame_address_entity(irg);
4913 set_ia32_am_offs_int(load, 0);
4914 set_ia32_use_frame(load);
4915 set_ia32_frame_ent(load, ent);
4917 /* will fail anyway, but gcc does this: */
4918 set_ia32_am_offs_int(load, 0);
4921 if (get_irn_pinned(node) == op_pin_state_floats) {
4922 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4923 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4924 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4925 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4928 SET_IA32_ORIG_NODE(load, node);
4929 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4933 * Transform Builtin frame_address
4935 static ir_node *gen_prefetch(ir_node *node)
4938 ir_node *ptr, *block, *mem, *base, *idx;
4939 ir_node *param, *new_node;
4942 ia32_address_t addr;
4944 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4945 /* no prefetch at all, route memory */
4946 return be_transform_node(get_Builtin_mem(node));
4949 param = get_Builtin_param(node, 1);
4950 tv = get_Const_tarval(param);
4951 rw = get_tarval_long(tv);
4953 /* construct load address */
4954 memset(&addr, 0, sizeof(addr));
4955 ptr = get_Builtin_param(node, 0);
4956 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4963 base = be_transform_node(base);
4969 idx = be_transform_node(idx);
4972 dbgi = get_irn_dbg_info(node);
4973 block = be_transform_node(get_nodes_block(node));
4974 mem = be_transform_node(get_Builtin_mem(node));
4976 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4977 /* we have 3DNow!, this was already checked above */
4978 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4979 } else if (ia32_cg_config.use_sse_prefetch) {
4980 /* note: rw == 1 is IGNORED in that case */
4981 param = get_Builtin_param(node, 2);
4982 tv = get_Const_tarval(param);
4983 locality = get_tarval_long(tv);
4985 /* SSE style prefetch */
4988 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
4991 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
4994 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
4997 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
5001 assert(ia32_cg_config.use_3dnow_prefetch);
5002 /* 3DNow! style prefetch */
5003 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
5006 set_irn_pinned(new_node, get_irn_pinned(node));
5007 set_ia32_op_type(new_node, ia32_AddrModeS);
5008 set_ia32_ls_mode(new_node, mode_Bu);
5009 set_address(new_node, &addr);
5011 SET_IA32_ORIG_NODE(new_node, node);
5013 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5017 * Transform bsf like node
5019 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5021 ir_node *param = get_Builtin_param(node, 0);
5022 dbg_info *dbgi = get_irn_dbg_info(node);
5024 ir_node *block = get_nodes_block(node);
5025 ir_node *new_block = be_transform_node(block);
5027 ia32_address_mode_t am;
5028 ia32_address_t *addr = &am.addr;
5031 match_arguments(&am, block, NULL, param, NULL, match_am);
5033 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5034 set_am_attributes(cnt, &am);
5035 set_ia32_ls_mode(cnt, get_irn_mode(param));
5037 SET_IA32_ORIG_NODE(cnt, node);
5038 return fix_mem_proj(cnt, &am);
5042 * Transform builtin ffs.
5044 static ir_node *gen_ffs(ir_node *node)
5046 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5047 ir_node *real = skip_Proj(bsf);
5048 dbg_info *dbgi = get_irn_dbg_info(real);
5049 ir_node *block = get_nodes_block(real);
5050 ir_node *flag, *set, *conv, *neg, *orn, *add;
5053 if (get_irn_mode(real) != mode_T) {
5054 set_irn_mode(real, mode_T);
5055 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5058 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5061 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5062 SET_IA32_ORIG_NODE(set, node);
5065 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5066 SET_IA32_ORIG_NODE(conv, node);
5069 neg = new_bd_ia32_Neg(dbgi, block, conv);
5072 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5073 set_ia32_ls_mode(orn, mode_Iu);
5074 set_ia32_commutative(orn);
5077 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5078 add_ia32_am_offs_int(add, 1);
5083 * Transform builtin clz.
5085 static ir_node *gen_clz(ir_node *node)
5087 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5088 ir_node *real = skip_Proj(bsr);
5089 dbg_info *dbgi = get_irn_dbg_info(real);
5090 ir_node *block = get_nodes_block(real);
5091 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5093 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5097 * Transform builtin ctz.
5099 static ir_node *gen_ctz(ir_node *node)
5101 return gen_unop_AM(node, new_bd_ia32_Bsf);
5105 * Transform builtin parity.
5107 static ir_node *gen_parity(ir_node *node)
5109 dbg_info *dbgi = get_irn_dbg_info(node);
5110 ir_node *block = get_nodes_block(node);
5111 ir_node *new_block = be_transform_node(block);
5112 ir_node *param = get_Builtin_param(node, 0);
5113 ir_node *new_param = be_transform_node(param);
5116 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5117 * so we have to do complicated xoring first.
5118 * (we should also better lower this before the backend so we still have a
5119 * chance for CSE, constant folding and other goodies for some of these
5122 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5123 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5124 ir_node *xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5126 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xorn);
5129 set_ia32_ls_mode(xorn, mode_Iu);
5130 set_ia32_commutative(xorn);
5132 set_irn_mode(xor2, mode_T);
5133 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5136 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5137 SET_IA32_ORIG_NODE(new_node, node);
5140 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5141 nomem, new_node, mode_Bu);
5142 SET_IA32_ORIG_NODE(new_node, node);
5147 * Transform builtin popcount
5149 static ir_node *gen_popcount(ir_node *node)
5151 ir_node *param = get_Builtin_param(node, 0);
5152 dbg_info *dbgi = get_irn_dbg_info(node);
5154 ir_node *block = get_nodes_block(node);
5155 ir_node *new_block = be_transform_node(block);
5158 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5160 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5161 if (ia32_cg_config.use_popcnt) {
5162 ia32_address_mode_t am;
5163 ia32_address_t *addr = &am.addr;
5166 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5168 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5169 set_am_attributes(cnt, &am);
5170 set_ia32_ls_mode(cnt, get_irn_mode(param));
5172 SET_IA32_ORIG_NODE(cnt, node);
5173 return fix_mem_proj(cnt, &am);
5176 new_param = be_transform_node(param);
5178 /* do the standard popcount algo */
5179 /* TODO: This is stupid, we should transform this before the backend,
5180 * to get CSE, localopts, etc. for the operations
5181 * TODO: This is also not the optimal algorithm (it is just the starting
5182 * example in hackers delight, they optimize it more on the following page)
5183 * But I'm too lazy to fix this now, as the code should get lowered before
5184 * the backend anyway.
5187 /* m1 = x & 0x55555555 */
5188 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5189 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5192 simm = ia32_create_Immediate(NULL, 0, 1);
5193 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5195 /* m2 = s1 & 0x55555555 */
5196 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5199 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5201 /* m4 = m3 & 0x33333333 */
5202 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5203 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5206 simm = ia32_create_Immediate(NULL, 0, 2);
5207 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5209 /* m5 = s2 & 0x33333333 */
5210 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5213 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5215 /* m7 = m6 & 0x0F0F0F0F */
5216 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5217 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5220 simm = ia32_create_Immediate(NULL, 0, 4);
5221 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5223 /* m8 = s3 & 0x0F0F0F0F */
5224 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5227 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5229 /* m10 = m9 & 0x00FF00FF */
5230 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5231 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5234 simm = ia32_create_Immediate(NULL, 0, 8);
5235 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5237 /* m11 = s4 & 0x00FF00FF */
5238 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5240 /* m12 = m10 + m11 */
5241 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5243 /* m13 = m12 & 0x0000FFFF */
5244 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5245 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5247 /* s5 = m12 >> 16 */
5248 simm = ia32_create_Immediate(NULL, 0, 16);
5249 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5251 /* res = m13 + s5 */
5252 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5256 * Transform builtin byte swap.
5258 static ir_node *gen_bswap(ir_node *node)
5260 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5261 dbg_info *dbgi = get_irn_dbg_info(node);
5263 ir_node *block = get_nodes_block(node);
5264 ir_node *new_block = be_transform_node(block);
5265 ir_mode *mode = get_irn_mode(param);
5266 unsigned size = get_mode_size_bits(mode);
5270 if (ia32_cg_config.use_bswap) {
5271 /* swap available */
5272 return new_bd_ia32_Bswap(dbgi, new_block, param);
5274 ir_node *i8 = ia32_create_Immediate(NULL, 0, 8);
5275 ir_node *rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5276 ir_node *i16 = ia32_create_Immediate(NULL, 0, 16);
5277 ir_node *rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5278 ir_node *rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5279 set_ia32_ls_mode(rol1, mode_Hu);
5280 set_ia32_ls_mode(rol2, mode_Iu);
5281 set_ia32_ls_mode(rol3, mode_Hu);
5286 /* swap16 always available */
5287 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5290 panic("Invalid bswap size (%d)", size);
5295 * Transform builtin outport.
5297 static ir_node *gen_outport(ir_node *node)
5299 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5300 ir_node *oldv = get_Builtin_param(node, 1);
5301 ir_mode *mode = get_irn_mode(oldv);
5302 ir_node *value = be_transform_node(oldv);
5303 ir_node *block = be_transform_node(get_nodes_block(node));
5304 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5305 dbg_info *dbgi = get_irn_dbg_info(node);
5307 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5308 set_ia32_ls_mode(res, mode);
5313 * Transform builtin inport.
5315 static ir_node *gen_inport(ir_node *node)
5317 ir_type *tp = get_Builtin_type(node);
5318 ir_type *rstp = get_method_res_type(tp, 0);
5319 ir_mode *mode = get_type_mode(rstp);
5320 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5321 ir_node *block = be_transform_node(get_nodes_block(node));
5322 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5323 dbg_info *dbgi = get_irn_dbg_info(node);
5325 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5326 set_ia32_ls_mode(res, mode);
5328 /* check for missing Result Proj */
5333 * Transform a builtin inner trampoline
5335 static ir_node *gen_inner_trampoline(ir_node *node)
5337 ir_node *ptr = get_Builtin_param(node, 0);
5338 ir_node *callee = get_Builtin_param(node, 1);
5339 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5340 ir_node *mem = get_Builtin_mem(node);
5341 ir_node *block = get_nodes_block(node);
5342 ir_node *new_block = be_transform_node(block);
5346 ir_node *trampoline;
5348 dbg_info *dbgi = get_irn_dbg_info(node);
5349 ia32_address_t addr;
5351 /* construct store address */
5352 memset(&addr, 0, sizeof(addr));
5353 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5355 if (addr.base == NULL) {
5356 addr.base = noreg_GP;
5358 addr.base = be_transform_node(addr.base);
5361 if (addr.index == NULL) {
5362 addr.index = noreg_GP;
5364 addr.index = be_transform_node(addr.index);
5366 addr.mem = be_transform_node(mem);
5368 /* mov ecx, <env> */
5369 val = ia32_create_Immediate(NULL, 0, 0xB9);
5370 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5371 addr.index, addr.mem, val);
5372 set_irn_pinned(store, get_irn_pinned(node));
5373 set_ia32_op_type(store, ia32_AddrModeD);
5374 set_ia32_ls_mode(store, mode_Bu);
5375 set_address(store, &addr);
5379 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5380 addr.index, addr.mem, env);
5381 set_irn_pinned(store, get_irn_pinned(node));
5382 set_ia32_op_type(store, ia32_AddrModeD);
5383 set_ia32_ls_mode(store, mode_Iu);
5384 set_address(store, &addr);
5388 /* jmp rel <callee> */
5389 val = ia32_create_Immediate(NULL, 0, 0xE9);
5390 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5391 addr.index, addr.mem, val);
5392 set_irn_pinned(store, get_irn_pinned(node));
5393 set_ia32_op_type(store, ia32_AddrModeD);
5394 set_ia32_ls_mode(store, mode_Bu);
5395 set_address(store, &addr);
5399 trampoline = be_transform_node(ptr);
5401 /* the callee is typically an immediate */
5402 if (is_SymConst(callee)) {
5403 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5405 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5406 add_ia32_am_offs_int(rel, -10);
5408 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5410 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5411 addr.index, addr.mem, rel);
5412 set_irn_pinned(store, get_irn_pinned(node));
5413 set_ia32_op_type(store, ia32_AddrModeD);
5414 set_ia32_ls_mode(store, mode_Iu);
5415 set_address(store, &addr);
5420 return new_r_Tuple(new_block, 2, in);
5424 * Transform Builtin node.
5426 static ir_node *gen_Builtin(ir_node *node)
5428 ir_builtin_kind kind = get_Builtin_kind(node);
5432 return gen_trap(node);
5433 case ir_bk_debugbreak:
5434 return gen_debugbreak(node);
5435 case ir_bk_return_address:
5436 return gen_return_address(node);
5437 case ir_bk_frame_address:
5438 return gen_frame_address(node);
5439 case ir_bk_prefetch:
5440 return gen_prefetch(node);
5442 return gen_ffs(node);
5444 return gen_clz(node);
5446 return gen_ctz(node);
5448 return gen_parity(node);
5449 case ir_bk_popcount:
5450 return gen_popcount(node);
5452 return gen_bswap(node);
5454 return gen_outport(node);
5456 return gen_inport(node);
5457 case ir_bk_inner_trampoline:
5458 return gen_inner_trampoline(node);
5460 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5464 * Transform Proj(Builtin) node.
5466 static ir_node *gen_Proj_Builtin(ir_node *proj)
5468 ir_node *node = get_Proj_pred(proj);
5469 ir_node *new_node = be_transform_node(node);
5470 ir_builtin_kind kind = get_Builtin_kind(node);
5473 case ir_bk_return_address:
5474 case ir_bk_frame_address:
5479 case ir_bk_popcount:
5481 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5484 case ir_bk_debugbreak:
5485 case ir_bk_prefetch:
5487 assert(get_Proj_proj(proj) == pn_Builtin_M);
5490 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5491 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5493 assert(get_Proj_proj(proj) == pn_Builtin_M);
5494 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5496 case ir_bk_inner_trampoline:
5497 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5498 return get_Tuple_pred(new_node, 1);
5500 assert(get_Proj_proj(proj) == pn_Builtin_M);
5501 return get_Tuple_pred(new_node, 0);
5504 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5507 static ir_node *gen_be_IncSP(ir_node *node)
5509 ir_node *res = be_duplicate_node(node);
5510 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5516 * Transform the Projs from a be_Call.
5518 static ir_node *gen_Proj_be_Call(ir_node *node)
5520 ir_node *call = get_Proj_pred(node);
5521 ir_node *new_call = be_transform_node(call);
5522 dbg_info *dbgi = get_irn_dbg_info(node);
5523 long proj = get_Proj_proj(node);
5524 ir_mode *mode = get_irn_mode(node);
5527 if (proj == pn_be_Call_M) {
5528 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5530 /* transform call modes */
5531 if (mode_is_data(mode)) {
5532 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5536 /* Map from be_Call to ia32_Call proj number */
5537 if (proj == pn_be_Call_sp) {
5538 proj = pn_ia32_Call_stack;
5539 } else if (proj == pn_be_Call_M) {
5540 proj = pn_ia32_Call_M;
5541 } else if (proj == pn_be_Call_X_except) {
5542 proj = pn_ia32_Call_X_except;
5543 } else if (proj == pn_be_Call_X_regular) {
5544 proj = pn_ia32_Call_X_regular;
5546 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5547 int const n_outs = arch_get_irn_n_outs(new_call);
5550 assert(proj >= pn_be_Call_first_res);
5551 assert(req->type & arch_register_req_type_limited);
5553 for (i = 0; i < n_outs; ++i) {
5554 arch_register_req_t const *const new_req
5555 = arch_get_irn_register_req_out(new_call, i);
5557 if (!(new_req->type & arch_register_req_type_limited) ||
5558 new_req->cls != req->cls ||
5559 *new_req->limited != *req->limited)
5568 res = new_rd_Proj(dbgi, new_call, mode, proj);
5570 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5572 case pn_ia32_Call_stack:
5573 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5576 case pn_ia32_Call_fpcw:
5577 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5584 static ir_node *gen_Proj_ASM(ir_node *node)
5586 ir_mode *mode = get_irn_mode(node);
5587 ir_node *pred = get_Proj_pred(node);
5588 ir_node *new_pred = be_transform_node(pred);
5589 long pos = get_Proj_proj(node);
5591 if (mode == mode_M) {
5592 pos = arch_get_irn_n_outs(new_pred)-1;
5593 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5595 } else if (mode_is_float(mode)) {
5598 panic("unexpected proj mode at ASM");
5601 return new_r_Proj(new_pred, mode, pos);
5605 * Transform and potentially renumber Proj nodes.
5607 static ir_node *gen_Proj(ir_node *node)
5609 ir_node *pred = get_Proj_pred(node);
5612 switch (get_irn_opcode(pred)) {
5614 return gen_Proj_Load(node);
5616 return gen_Proj_Store(node);
5618 return gen_Proj_ASM(node);
5620 return gen_Proj_Builtin(node);
5622 return gen_Proj_Div(node);
5624 return gen_Proj_Mod(node);
5626 return gen_Proj_CopyB(node);
5628 return gen_Proj_be_SubSP(node);
5630 return gen_Proj_be_AddSP(node);
5632 return gen_Proj_be_Call(node);
5634 proj = get_Proj_proj(node);
5636 case pn_Start_X_initial_exec: {
5637 ir_node *block = get_nodes_block(pred);
5638 ir_node *new_block = be_transform_node(block);
5639 dbg_info *dbgi = get_irn_dbg_info(node);
5640 /* we exchange the ProjX with a jump */
5641 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5649 if (is_ia32_l_FloattoLL(pred)) {
5650 return gen_Proj_l_FloattoLL(node);
5652 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5656 ir_mode *mode = get_irn_mode(node);
5657 if (ia32_mode_needs_gp_reg(mode)) {
5658 ir_node *new_pred = be_transform_node(pred);
5659 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5660 get_Proj_proj(node));
5661 new_proj->node_nr = node->node_nr;
5666 return be_duplicate_node(node);
5670 * Enters all transform functions into the generic pointer
5672 static void register_transformers(void)
5674 /* first clear the generic function pointer for all ops */
5675 be_start_transform_setup();
5677 be_set_transform_function(op_Add, gen_Add);
5678 be_set_transform_function(op_And, gen_And);
5679 be_set_transform_function(op_ASM, ia32_gen_ASM);
5680 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5681 be_set_transform_function(op_be_Call, gen_be_Call);
5682 be_set_transform_function(op_be_Copy, gen_be_Copy);
5683 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5684 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5685 be_set_transform_function(op_be_Return, gen_be_Return);
5686 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5687 be_set_transform_function(op_Builtin, gen_Builtin);
5688 be_set_transform_function(op_Cmp, gen_Cmp);
5689 be_set_transform_function(op_Cond, gen_Cond);
5690 be_set_transform_function(op_Const, gen_Const);
5691 be_set_transform_function(op_Conv, gen_Conv);
5692 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5693 be_set_transform_function(op_Div, gen_Div);
5694 be_set_transform_function(op_Eor, gen_Eor);
5695 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5696 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5697 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5698 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5699 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5700 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5701 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5702 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5703 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5704 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5705 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5706 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5707 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5708 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5709 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5710 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5711 be_set_transform_function(op_IJmp, gen_IJmp);
5712 be_set_transform_function(op_Jmp, gen_Jmp);
5713 be_set_transform_function(op_Load, gen_Load);
5714 be_set_transform_function(op_Minus, gen_Minus);
5715 be_set_transform_function(op_Mod, gen_Mod);
5716 be_set_transform_function(op_Mul, gen_Mul);
5717 be_set_transform_function(op_Mulh, gen_Mulh);
5718 be_set_transform_function(op_Mux, gen_Mux);
5719 be_set_transform_function(op_Not, gen_Not);
5720 be_set_transform_function(op_Or, gen_Or);
5721 be_set_transform_function(op_Phi, gen_Phi);
5722 be_set_transform_function(op_Proj, gen_Proj);
5723 be_set_transform_function(op_Rotl, gen_Rotl);
5724 be_set_transform_function(op_Shl, gen_Shl);
5725 be_set_transform_function(op_Shr, gen_Shr);
5726 be_set_transform_function(op_Shrs, gen_Shrs);
5727 be_set_transform_function(op_Store, gen_Store);
5728 be_set_transform_function(op_Sub, gen_Sub);
5729 be_set_transform_function(op_Switch, gen_Switch);
5730 be_set_transform_function(op_SymConst, gen_SymConst);
5731 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5735 * Pre-transform all unknown and noreg nodes.
5737 static void ia32_pretransform_node(void)
5739 ir_graph *irg = current_ir_graph;
5740 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5742 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5743 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5744 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5745 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5746 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5748 nomem = get_irg_no_mem(irg);
5749 noreg_GP = ia32_new_NoReg_gp(irg);
5753 * Post-process all calls if we are in SSE mode.
5754 * The ABI requires that the results are in st0, copy them
5755 * to a xmm register.
5757 static void postprocess_fp_call_results(void)
5761 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5762 ir_node *call = call_list[i];
5763 ir_type *mtp = call_types[i];
5766 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5767 ir_type *res_tp = get_method_res_type(mtp, j);
5768 ir_node *res, *new_res;
5771 if (! is_atomic_type(res_tp)) {
5772 /* no floating point return */
5775 res_mode = get_type_mode(res_tp);
5776 if (! mode_is_float(res_mode)) {
5777 /* no floating point return */
5781 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5784 /* now patch the users */
5785 foreach_out_edge_safe(res, edge) {
5786 ir_node *succ = get_edge_src_irn(edge);
5789 if (be_is_Keep(succ))
5792 if (is_ia32_xStore(succ)) {
5793 /* an xStore can be patched into an vfst */
5794 dbg_info *db = get_irn_dbg_info(succ);
5795 ir_node *block = get_nodes_block(succ);
5796 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5797 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5798 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5799 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5800 ir_mode *mode = get_ia32_ls_mode(succ);
5802 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5803 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5804 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5805 if (is_ia32_use_frame(succ))
5806 set_ia32_use_frame(st);
5807 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5808 set_irn_pinned(st, get_irn_pinned(succ));
5809 set_ia32_op_type(st, ia32_AddrModeD);
5811 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5812 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5813 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5820 if (new_res == NULL) {
5821 dbg_info *db = get_irn_dbg_info(call);
5822 ir_node *block = get_nodes_block(call);
5823 ir_node *frame = get_irg_frame(current_ir_graph);
5824 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5825 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5826 ir_node *vfst, *xld, *new_mem;
5829 /* store st(0) on stack */
5830 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5832 set_ia32_op_type(vfst, ia32_AddrModeD);
5833 set_ia32_use_frame(vfst);
5835 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5837 /* load into SSE register */
5838 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5840 set_ia32_op_type(xld, ia32_AddrModeS);
5841 set_ia32_use_frame(xld);
5843 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5844 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5846 if (old_mem != NULL) {
5847 edges_reroute(old_mem, new_mem);
5851 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5857 /* do the transformation */
5858 void ia32_transform_graph(ir_graph *irg)
5862 register_transformers();
5863 initial_fpcw = NULL;
5864 ia32_no_pic_adjust = 0;
5866 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5868 be_timer_push(T_HEIGHTS);
5869 ia32_heights = heights_new(irg);
5870 be_timer_pop(T_HEIGHTS);
5871 ia32_calculate_non_address_mode_nodes(irg);
5873 /* the transform phase is not safe for CSE (yet) because several nodes get
5874 * attributes set after their creation */
5875 cse_last = get_opt_cse();
5878 call_list = NEW_ARR_F(ir_node *, 0);
5879 call_types = NEW_ARR_F(ir_type *, 0);
5880 be_transform_graph(irg, ia32_pretransform_node);
5882 if (ia32_cg_config.use_sse2)
5883 postprocess_fp_call_results();
5884 DEL_ARR_F(call_types);
5885 DEL_ARR_F(call_list);
5887 set_opt_cse(cse_last);
5889 ia32_free_non_address_mode_nodes();
5890 heights_free(ia32_heights);
5891 ia32_heights = NULL;
5894 void ia32_init_transform(void)
5896 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");