2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
54 #include "betranshlp.h"
57 #include "bearch_ia32_t.h"
58 #include "ia32_common_transform.h"
59 #include "ia32_nodes_attr.h"
60 #include "ia32_transform.h"
61 #include "ia32_new_nodes.h"
62 #include "ia32_dbg_stat.h"
63 #include "ia32_optimize.h"
64 #include "ia32_address_mode.h"
65 #include "ia32_architecture.h"
67 #include "gen_ia32_regalloc_if.h"
69 /* define this to construct SSE constants instead of load them */
70 #undef CONSTRUCT_SSE_CONST
72 #define mode_fp (ia32_reg_classes[CLASS_ia32_fp].mode)
73 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
75 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
77 static ir_node *old_initial_fpcw = NULL;
78 static ir_node *initial_fpcw = NULL;
79 int ia32_no_pic_adjust;
81 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
82 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
85 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
86 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
89 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
90 ir_node *op1, ir_node *op2);
92 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
93 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
95 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
96 ir_node *base, ir_node *index, ir_node *mem);
98 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
102 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
104 static ir_node *create_immediate_or_transform(ir_node *node);
106 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
107 dbg_info *dbgi, ir_node *block,
108 ir_node *op, ir_node *orig_node);
110 /* its enough to have those once */
111 static ir_node *nomem, *noreg_GP;
113 /** a list to postprocess all calls */
114 static ir_node **call_list;
115 static ir_type **call_types;
117 /** Return non-zero is a node represents the 0 constant. */
118 static bool is_Const_0(ir_node *node)
120 return is_Const(node) && is_Const_null(node);
123 /** Return non-zero is a node represents the 1 constant. */
124 static bool is_Const_1(ir_node *node)
126 return is_Const(node) && is_Const_one(node);
129 /** Return non-zero is a node represents the -1 constant. */
130 static bool is_Const_Minus_1(ir_node *node)
132 return is_Const(node) && is_Const_all_one(node);
136 * returns true if constant can be created with a simple float command
138 static bool is_simple_x87_Const(ir_node *node)
140 ir_tarval *tv = get_Const_tarval(node);
141 if (tarval_is_null(tv) || tarval_is_one(tv))
144 /* TODO: match all the other float constants */
149 * returns true if constant can be created with a simple float command
151 static bool is_simple_sse_Const(ir_node *node)
153 ir_tarval *tv = get_Const_tarval(node);
154 ir_mode *mode = get_tarval_mode(tv);
159 if (tarval_is_null(tv)
160 #ifdef CONSTRUCT_SSE_CONST
165 #ifdef CONSTRUCT_SSE_CONST
166 if (mode == mode_D) {
167 unsigned val = get_tarval_sub_bits(tv, 0) |
168 (get_tarval_sub_bits(tv, 1) << 8) |
169 (get_tarval_sub_bits(tv, 2) << 16) |
170 (get_tarval_sub_bits(tv, 3) << 24);
172 /* lower 32bit are zero, really a 32bit constant */
175 #endif /* CONSTRUCT_SSE_CONST */
176 /* TODO: match all the other float constants */
181 * return NoREG or pic_base in case of PIC.
182 * This is necessary as base address for newly created symbols
184 static ir_node *get_symconst_base(void)
186 ir_graph *irg = current_ir_graph;
188 if (be_options.pic) {
189 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
190 return arch_env->impl->get_pic_base(irg);
197 * Transforms a Const.
199 static ir_node *gen_Const(ir_node *node)
201 ir_node *old_block = get_nodes_block(node);
202 ir_node *block = be_transform_node(old_block);
203 dbg_info *dbgi = get_irn_dbg_info(node);
204 ir_mode *mode = get_irn_mode(node);
205 ir_tarval *tv = get_Const_tarval(node);
207 assert(is_Const(node));
209 if (mode_is_float(mode)) {
210 ir_graph *irg = get_irn_irg(node);
211 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
212 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
217 if (ia32_cg_config.use_sse2) {
218 if (tarval_is_null(tv)) {
219 load = new_bd_ia32_xZero(dbgi, block);
220 set_ia32_ls_mode(load, mode);
222 #ifdef CONSTRUCT_SSE_CONST
223 } else if (tarval_is_one(tv)) {
224 int cnst = mode == mode_F ? 26 : 55;
225 ir_node *imm1 = ia32_create_Immediate(irg, NULL, 0, cnst);
226 ir_node *imm2 = ia32_create_Immediate(irg, NULL, 0, 2);
227 ir_node *pslld, *psrld;
229 load = new_bd_ia32_xAllOnes(dbgi, block);
230 set_ia32_ls_mode(load, mode);
231 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
232 set_ia32_ls_mode(pslld, mode);
233 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
234 set_ia32_ls_mode(psrld, mode);
236 #endif /* CONSTRUCT_SSE_CONST */
237 } else if (mode == mode_F) {
238 /* we can place any 32bit constant by using a movd gp, sse */
239 unsigned val = get_tarval_sub_bits(tv, 0) |
240 (get_tarval_sub_bits(tv, 1) << 8) |
241 (get_tarval_sub_bits(tv, 2) << 16) |
242 (get_tarval_sub_bits(tv, 3) << 24);
243 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
244 load = new_bd_ia32_xMovd(dbgi, block, cnst);
245 set_ia32_ls_mode(load, mode);
249 #ifdef CONSTRUCT_SSE_CONST
250 if (mode == mode_D) {
251 unsigned val = get_tarval_sub_bits(tv, 0) |
252 (get_tarval_sub_bits(tv, 1) << 8) |
253 (get_tarval_sub_bits(tv, 2) << 16) |
254 (get_tarval_sub_bits(tv, 3) << 24);
256 ir_node *imm32 = ia32_create_Immediate(irg, NULL, 0, 32);
257 ir_node *cnst, *psllq;
259 /* fine, lower 32bit are zero, produce 32bit value */
260 val = get_tarval_sub_bits(tv, 4) |
261 (get_tarval_sub_bits(tv, 5) << 8) |
262 (get_tarval_sub_bits(tv, 6) << 16) |
263 (get_tarval_sub_bits(tv, 7) << 24);
264 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
265 load = new_bd_ia32_xMovd(dbgi, block, cnst);
266 set_ia32_ls_mode(load, mode);
267 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
268 set_ia32_ls_mode(psllq, mode);
273 #endif /* CONSTRUCT_SSE_CONST */
274 floatent = ia32_create_float_const_entity(isa, tv, NULL);
276 base = get_symconst_base();
277 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
279 set_ia32_op_type(load, ia32_AddrModeS);
280 set_ia32_am_sc(load, floatent);
281 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
282 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
285 if (tarval_is_null(tv)) {
286 load = new_bd_ia32_fldz(dbgi, block);
288 set_ia32_ls_mode(load, mode);
289 } else if (tarval_is_one(tv)) {
290 load = new_bd_ia32_fld1(dbgi, block);
292 set_ia32_ls_mode(load, mode);
297 floatent = ia32_create_float_const_entity(isa, tv, NULL);
298 /* create_float_const_ent is smart and sometimes creates
300 ls_mode = get_type_mode(get_entity_type(floatent));
301 base = get_symconst_base();
302 load = new_bd_ia32_fld(dbgi, block, base, noreg_GP, nomem,
304 set_ia32_op_type(load, ia32_AddrModeS);
305 set_ia32_am_sc(load, floatent);
306 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
307 res = new_r_Proj(load, mode_fp, pn_ia32_fld_res);
310 #ifdef CONSTRUCT_SSE_CONST
312 #endif /* CONSTRUCT_SSE_CONST */
313 SET_IA32_ORIG_NODE(load, node);
315 } else { /* non-float mode */
319 tv = tarval_convert_to(tv, mode_Iu);
321 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
323 panic("couldn't convert constant tarval (%+F)", node);
325 val = get_tarval_long(tv);
327 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
328 SET_IA32_ORIG_NODE(cnst, node);
335 * Transforms a SymConst.
337 static ir_node *gen_SymConst(ir_node *node)
339 ir_node *old_block = get_nodes_block(node);
340 ir_node *block = be_transform_node(old_block);
341 dbg_info *dbgi = get_irn_dbg_info(node);
342 ir_mode *mode = get_irn_mode(node);
345 if (mode_is_float(mode)) {
346 if (ia32_cg_config.use_sse2)
347 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
349 cnst = new_bd_ia32_fld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
350 set_ia32_am_sc(cnst, get_SymConst_entity(node));
351 set_ia32_use_frame(cnst);
355 if (get_SymConst_kind(node) != symconst_addr_ent) {
356 panic("backend only support symconst_addr_ent (at %+F)", node);
358 entity = get_SymConst_entity(node);
359 if (get_entity_owner(entity) == get_tls_type()) {
360 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
361 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
362 set_ia32_am_sc(lea, entity);
365 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
369 SET_IA32_ORIG_NODE(cnst, node);
374 static ir_type *make_array_type(ir_type *tp)
376 unsigned alignment = get_type_alignment_bytes(tp);
377 unsigned size = get_type_size_bytes(tp);
378 ir_type *res = new_type_array(1, tp);
379 set_type_alignment_bytes(res, alignment);
380 set_array_bounds_int(res, 0, 0, 2);
381 if (alignment > size)
383 set_type_size_bytes(res, 2 * size);
384 set_type_state(res, layout_fixed);
389 * Create a float[2] array type for the given atomic type.
391 * @param tp the atomic type
393 static ir_type *ia32_create_float_array(ir_type *tp)
395 ir_mode *mode = get_type_mode(tp);
398 if (mode == mode_F) {
399 static ir_type *float_F;
403 arr = float_F = make_array_type(tp);
404 } else if (mode == mode_D) {
405 static ir_type *float_D;
409 arr = float_D = make_array_type(tp);
411 static ir_type *float_E;
415 arr = float_E = make_array_type(tp);
420 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
421 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
423 static const struct {
425 const char *cnst_str;
427 } names [ia32_known_const_max] = {
428 { "C_sfp_sign", "0x80000000", 0 },
429 { "C_dfp_sign", "0x8000000000000000", 1 },
430 { "C_sfp_abs", "0x7FFFFFFF", 0 },
431 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
432 { "C_ull_bias", "0x10000000000000000", 2 }
434 static ir_entity *ent_cache[ia32_known_const_max];
436 ir_entity *ent = ent_cache[kct];
439 ir_graph *irg = current_ir_graph;
440 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
441 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
442 const char *cnst_str = names[kct].cnst_str;
443 ident *name = new_id_from_str(names[kct].name);
446 switch (names[kct].mode) {
447 case 0: mode = mode_Iu; break;
448 case 1: mode = mode_Lu; break;
449 case 2: mode = mode_F; break;
450 default: panic("internal compiler error");
452 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
454 if (kct == ia32_ULLBIAS) {
455 ir_type *type = ia32_get_prim_type(mode_F);
456 ir_type *atype = ia32_create_float_array(type);
457 ir_initializer_t *initializer;
459 ent = new_entity(get_glob_type(), name, atype);
461 set_entity_ld_ident(ent, name);
462 set_entity_visibility(ent, ir_visibility_private);
463 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
465 initializer = create_initializer_compound(2);
466 set_initializer_compound_value(initializer, 0,
467 create_initializer_tarval(get_mode_null(mode)));
468 set_initializer_compound_value(initializer, 1,
469 create_initializer_tarval(tv));
470 set_entity_initializer(ent, initializer);
472 ent = ia32_create_float_const_entity(isa, tv, name);
474 /* cache the entry */
475 ent_cache[kct] = ent;
478 return ent_cache[kct];
482 * return true if the node is a Proj(Load) and could be used in source address
483 * mode for another node. Will return only true if the @p other node is not
484 * dependent on the memory of the Load (for binary operations use the other
485 * input here, for unary operations use NULL).
487 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
488 ir_node *other, ir_node *other2,
495 /* float constants are always available */
496 if (is_Const(node)) {
497 mode = get_irn_mode(node);
498 if (mode_is_float(mode)) {
499 ir_tarval *tv = get_Const_tarval(node);
500 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
502 if (ia32_cg_config.use_sse2) {
503 if (is_simple_sse_Const(node))
506 if (is_simple_x87_Const(node))
509 if (get_irn_n_edges(node) > 1)
518 load = get_Proj_pred(node);
519 pn = get_Proj_proj(node);
520 if (!is_Load(load) || pn != pn_Load_res)
522 if (get_nodes_block(load) != block)
524 mode = get_irn_mode(node);
525 /* we can't fold mode_E AM */
526 if (mode == ia32_mode_E)
528 /* we only use address mode if we're the only user of the load */
529 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
531 /* in some edge cases with address mode we might reach the load normally
532 * and through some AM sequence, if it is already materialized then we
533 * can't create an AM node from it */
534 if (be_is_transformed(node))
537 /* don't do AM if other node inputs depend on the load (via mem-proj) */
538 if (other != NULL && ia32_prevents_AM(block, load, other))
541 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
547 typedef struct ia32_address_mode_t ia32_address_mode_t;
548 struct ia32_address_mode_t {
553 ia32_op_type_t op_type;
557 unsigned commutative : 1;
558 unsigned ins_permuted : 1;
561 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
563 /* construct load address */
564 memset(addr, 0, sizeof(addr[0]));
565 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
567 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
568 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
569 addr->mem = be_transform_node(mem);
572 static void build_address(ia32_address_mode_t *am, ir_node *node,
573 ia32_create_am_flags_t flags)
575 ia32_address_t *addr = &am->addr;
581 /* floating point immediates */
582 if (is_Const(node)) {
583 ir_graph *irg = get_irn_irg(node);
584 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
585 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
586 ir_tarval *tv = get_Const_tarval(node);
587 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
588 addr->base = get_symconst_base();
589 addr->index = noreg_GP;
591 addr->symconst_ent = entity;
592 addr->tls_segment = false;
594 am->ls_mode = get_type_mode(get_entity_type(entity));
595 am->pinned = op_pin_state_floats;
599 load = get_Proj_pred(node);
600 ptr = get_Load_ptr(load);
601 mem = get_Load_mem(load);
602 new_mem = be_transform_node(mem);
603 am->pinned = get_irn_pinned(load);
604 am->ls_mode = get_Load_mode(load);
605 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
608 /* construct load address */
609 ia32_create_address_mode(addr, ptr, flags);
611 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
612 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
616 static void set_address(ir_node *node, const ia32_address_t *addr)
618 set_ia32_am_scale(node, addr->scale);
619 set_ia32_am_sc(node, addr->symconst_ent);
620 set_ia32_am_offs_int(node, addr->offset);
621 set_ia32_am_tls_segment(node, addr->tls_segment);
622 if (addr->symconst_sign)
623 set_ia32_am_sc_sign(node);
625 set_ia32_use_frame(node);
626 set_ia32_frame_ent(node, addr->frame_entity);
630 * Apply attributes of a given address mode to a node.
632 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
634 set_address(node, &am->addr);
636 set_ia32_op_type(node, am->op_type);
637 set_ia32_ls_mode(node, am->ls_mode);
638 if (am->pinned == op_pin_state_pinned) {
639 /* beware: some nodes are already pinned and did not allow to change the state */
640 if (get_irn_pinned(node) != op_pin_state_pinned)
641 set_irn_pinned(node, op_pin_state_pinned);
644 set_ia32_commutative(node);
648 * Check, if a given node is a Down-Conv, i.e. a integer Conv
649 * from a mode with a mode with more bits to a mode with lesser bits.
650 * Moreover, we return only true if the node has not more than 1 user.
652 * @param node the node
653 * @return non-zero if node is a Down-Conv
655 static int is_downconv(const ir_node *node)
663 src_mode = get_irn_mode(get_Conv_op(node));
664 dest_mode = get_irn_mode(node);
666 ia32_mode_needs_gp_reg(src_mode) &&
667 ia32_mode_needs_gp_reg(dest_mode) &&
668 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
671 /** Skip all Down-Conv's on a given node and return the resulting node. */
672 ir_node *ia32_skip_downconv(ir_node *node)
674 while (is_downconv(node)) {
675 /* we only want to skip the conv when we're the only user
676 * (because this test is used in the context of address-mode selection
677 * and we don't want to use address mode for multiple users) */
678 if (get_irn_n_edges(node) > 1)
681 node = get_Conv_op(node);
687 static bool is_float_downconv(const ir_node *node)
691 ir_node *pred = get_Conv_op(node);
692 ir_mode *pred_mode = get_irn_mode(pred);
693 ir_mode *mode = get_irn_mode(node);
694 return mode_is_float(pred_mode)
695 && get_mode_size_bits(mode) <= get_mode_size_bits(pred_mode);
698 static ir_node *ia32_skip_float_downconv(ir_node *node)
700 while (is_float_downconv(node)) {
701 node = get_Conv_op(node);
706 static bool is_sameconv(ir_node *node)
714 /* we only want to skip the conv when we're the only user
715 * (because this test is used in the context of address-mode selection
716 * and we don't want to use address mode for multiple users) */
717 if (get_irn_n_edges(node) > 1)
720 src_mode = get_irn_mode(get_Conv_op(node));
721 dest_mode = get_irn_mode(node);
723 ia32_mode_needs_gp_reg(src_mode) &&
724 ia32_mode_needs_gp_reg(dest_mode) &&
725 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
728 /** Skip all signedness convs */
729 static ir_node *ia32_skip_sameconv(ir_node *node)
731 while (is_sameconv(node)) {
732 node = get_Conv_op(node);
738 static ir_node *transform_sext(ir_node *node, ir_node *orig_node)
740 ir_mode *mode = get_irn_mode(node);
741 ir_node *block = get_nodes_block(node);
742 dbg_info *dbgi = get_irn_dbg_info(node);
743 return create_I2I_Conv(mode, mode_Is, dbgi, block, node, orig_node);
746 static ir_node *transform_zext(ir_node *node, ir_node *orig_node)
748 ir_mode *mode = get_irn_mode(node);
749 ir_node *block = get_nodes_block(node);
750 dbg_info *dbgi = get_irn_dbg_info(node);
751 /* normalize to an unsigned mode */
752 switch (get_mode_size_bits(mode)) {
753 case 8: mode = mode_Bu; break;
754 case 16: mode = mode_Hu; break;
756 panic("ia32: invalid mode in zest: %+F", node);
758 return create_I2I_Conv(mode, mode_Iu, dbgi, block, node, orig_node);
761 static ir_node *transform_upconv(ir_node *node, ir_node *orig_node)
763 ir_mode *mode = get_irn_mode(node);
764 if (mode_is_signed(mode)) {
765 return transform_sext(node, orig_node);
767 return transform_zext(node, orig_node);
771 static ir_node *get_noreg(ir_mode *const mode)
773 if (!mode_is_float(mode)) {
775 } else if (ia32_cg_config.use_sse2) {
776 return ia32_new_NoReg_xmm(current_ir_graph);
778 return ia32_new_NoReg_fp(current_ir_graph);
783 * matches operands of a node into ia32 addressing/operand modes. This covers
784 * usage of source address mode, immediates, operations with non 32-bit modes,
786 * The resulting data is filled into the @p am struct. block is the block
787 * of the node whose arguments are matched. op1, op2 are the first and second
788 * input that are matched (op1 may be NULL). other_op is another unrelated
789 * input that is not matched! but which is needed sometimes to check if AM
790 * for op1/op2 is legal.
791 * @p flags describes the supported modes of the operation in detail.
793 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
794 ir_node *op1, ir_node *op2, ir_node *other_op,
797 ia32_address_t *addr = &am->addr;
798 ir_mode *mode = get_irn_mode(op2);
799 int mode_bits = get_mode_size_bits(mode);
800 ir_node *new_op1, *new_op2;
802 unsigned commutative;
803 int use_am_and_immediates;
806 memset(am, 0, sizeof(am[0]));
808 commutative = (flags & match_commutative) != 0;
809 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
810 use_am = (flags & match_am) != 0;
811 use_immediate = (flags & match_immediate) != 0;
812 assert(!use_am_and_immediates || use_immediate);
815 assert(!commutative || op1 != NULL);
816 assert(use_am || !(flags & match_8bit_am));
817 assert(use_am || !(flags & match_16bit_am));
819 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
820 (mode_bits == 16 && !(flags & match_16bit_am))) {
824 /* we can simply skip downconvs for mode neutral nodes: the upper bits
825 * can be random for these operations */
826 if (flags & match_mode_neutral) {
827 op2 = ia32_skip_downconv(op2);
829 op1 = ia32_skip_downconv(op1);
832 op2 = ia32_skip_sameconv(op2);
834 op1 = ia32_skip_sameconv(op1);
838 /* match immediates. firm nodes are normalized: constants are always on the
841 if (!(flags & match_try_am) && use_immediate) {
842 new_op2 = ia32_try_create_Immediate(op2, 'i');
845 if (new_op2 == NULL &&
846 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
847 build_address(am, op2, ia32_create_am_normal);
848 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
849 new_op2 = get_noreg(mode);
850 am->op_type = ia32_AddrModeS;
851 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
853 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
854 build_address(am, op1, ia32_create_am_normal);
856 ir_node *const noreg = get_noreg(mode);
857 if (new_op2 != NULL) {
860 new_op1 = be_transform_node(op2);
862 am->ins_permuted = true;
864 am->op_type = ia32_AddrModeS;
866 am->op_type = ia32_Normal;
868 if (flags & match_try_am) {
874 mode = get_irn_mode(op2);
875 if (get_mode_size_bits(mode) != 32
876 && (flags & (match_mode_neutral | match_upconv | match_zero_ext))) {
877 if (flags & match_upconv) {
878 new_op1 = (op1 == NULL ? NULL : transform_upconv(op1, op1));
880 new_op2 = transform_upconv(op2, op2);
881 } else if (flags & match_zero_ext) {
882 new_op1 = (op1 == NULL ? NULL : transform_zext(op1, op1));
884 new_op2 = transform_zext(op2, op2);
886 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
888 new_op2 = be_transform_node(op2);
889 assert(flags & match_mode_neutral);
893 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
895 new_op2 = be_transform_node(op2);
899 if (addr->base == NULL)
900 addr->base = noreg_GP;
901 if (addr->index == NULL)
902 addr->index = noreg_GP;
903 if (addr->mem == NULL)
906 am->new_op1 = new_op1;
907 am->new_op2 = new_op2;
908 am->commutative = commutative;
912 * "Fixes" a node that uses address mode by turning it into mode_T
913 * and returning a pn_ia32_res Proj.
915 * @param node the node
916 * @param am its address mode
918 * @return a Proj(pn_ia32_res) if a memory address mode is used,
921 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
926 if (am->mem_proj == NULL)
929 /* we have to create a mode_T so the old MemProj can attach to us */
930 mode = get_irn_mode(node);
931 load = get_Proj_pred(am->mem_proj);
933 be_set_transformed_node(load, node);
935 if (mode != mode_T) {
936 set_irn_mode(node, mode_T);
937 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
944 * Construct a standard binary operation, set AM and immediate if required.
946 * @param node The original node for which the binop is created
947 * @param op1 The first operand
948 * @param op2 The second operand
949 * @param func The node constructor function
950 * @return The constructed ia32 node.
952 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
953 construct_binop_func *func, match_flags_t flags)
956 ir_node *block, *new_block, *new_node;
957 ia32_address_mode_t am;
958 ia32_address_t *addr = &am.addr;
960 block = get_nodes_block(node);
961 match_arguments(&am, block, op1, op2, NULL, flags);
963 dbgi = get_irn_dbg_info(node);
964 new_block = be_transform_node(block);
965 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
966 am.new_op1, am.new_op2);
967 set_am_attributes(new_node, &am);
968 /* we can't use source address mode anymore when using immediates */
969 if (!(flags & match_am_and_immediates) &&
970 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
971 set_ia32_am_support(new_node, ia32_am_none);
972 SET_IA32_ORIG_NODE(new_node, node);
974 new_node = fix_mem_proj(new_node, &am);
980 * Generic names for the inputs of an ia32 binary op.
983 n_ia32_l_binop_left, /**< ia32 left input */
984 n_ia32_l_binop_right, /**< ia32 right input */
985 n_ia32_l_binop_eflags /**< ia32 eflags input */
987 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
988 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
989 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
990 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
991 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
992 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
995 * Construct a binary operation which also consumes the eflags.
997 * @param node The node to transform
998 * @param func The node constructor function
999 * @param flags The match flags
1000 * @return The constructor ia32 node
1002 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1003 match_flags_t flags)
1005 ir_node *src_block = get_nodes_block(node);
1006 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1007 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1008 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1010 ir_node *block, *new_node, *new_eflags;
1011 ia32_address_mode_t am;
1012 ia32_address_t *addr = &am.addr;
1014 match_arguments(&am, src_block, op1, op2, eflags, flags);
1016 dbgi = get_irn_dbg_info(node);
1017 block = be_transform_node(src_block);
1018 new_eflags = be_transform_node(eflags);
1019 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1020 am.new_op1, am.new_op2, new_eflags);
1021 set_am_attributes(new_node, &am);
1022 /* we can't use source address mode anymore when using immediates */
1023 if (!(flags & match_am_and_immediates) &&
1024 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1025 set_ia32_am_support(new_node, ia32_am_none);
1026 SET_IA32_ORIG_NODE(new_node, node);
1028 new_node = fix_mem_proj(new_node, &am);
1033 static ir_node *get_fpcw(void)
1035 if (initial_fpcw != NULL)
1036 return initial_fpcw;
1038 initial_fpcw = be_transform_node(old_initial_fpcw);
1039 return initial_fpcw;
1042 static ir_node *skip_float_upconv(ir_node *node)
1044 ir_mode *mode = get_irn_mode(node);
1045 assert(mode_is_float(mode));
1047 while (is_Conv(node)) {
1048 ir_node *pred = get_Conv_op(node);
1049 ir_mode *pred_mode = get_irn_mode(pred);
1052 * suboptimal, but without this check the address mode matcher
1053 * can incorrectly think that something has only 1 user
1055 if (get_irn_n_edges(node) > 1)
1058 if (!mode_is_float(pred_mode)
1059 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1067 static void check_x87_floatmode(ir_mode *mode)
1069 if (mode != ia32_mode_E) {
1070 panic("ia32: x87 only supports x86 extended float mode");
1075 * Construct a standard binary operation, set AM and immediate if required.
1077 * @param op1 The first operand
1078 * @param op2 The second operand
1079 * @param func The node constructor function
1080 * @return The constructed ia32 node.
1082 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1083 construct_binop_float_func *func)
1089 ia32_address_mode_t am;
1090 ia32_address_t *addr = &am.addr;
1091 ia32_x87_attr_t *attr;
1092 /* All operations are considered commutative, because there are reverse
1094 match_flags_t flags = match_commutative | match_am;
1096 = is_Div(node) ? get_Div_resmode(node) : get_irn_mode(node);
1097 check_x87_floatmode(mode);
1099 op1 = skip_float_upconv(op1);
1100 op2 = skip_float_upconv(op2);
1102 block = get_nodes_block(node);
1103 match_arguments(&am, block, op1, op2, NULL, flags);
1105 dbgi = get_irn_dbg_info(node);
1106 new_block = be_transform_node(block);
1107 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1108 am.new_op1, am.new_op2, get_fpcw());
1109 set_am_attributes(new_node, &am);
1111 attr = get_ia32_x87_attr(new_node);
1112 attr->attr.data.ins_permuted = am.ins_permuted;
1114 SET_IA32_ORIG_NODE(new_node, node);
1116 new_node = fix_mem_proj(new_node, &am);
1122 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1124 * @param op1 The first operand
1125 * @param op2 The second operand
1126 * @param func The node constructor function
1127 * @return The constructed ia32 node.
1129 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1130 construct_shift_func *func,
1131 match_flags_t flags)
1133 ir_mode *mode = get_irn_mode(node);
1135 assert(! mode_is_float(mode));
1136 assert(flags & match_immediate);
1137 assert((flags & ~(match_mode_neutral | match_zero_ext | match_upconv | match_immediate)) == 0);
1139 if (get_mode_modulo_shift(mode) != 32) {
1140 /* TODO: implement special cases for non-modulo shifts */
1141 panic("modulo shift!=32 not supported by ia32 backend");
1146 if (flags & match_mode_neutral) {
1147 op1 = ia32_skip_downconv(op1);
1148 new_op1 = be_transform_node(op1);
1150 op1 = ia32_skip_sameconv(op1);
1151 if (get_mode_size_bits(mode) != 32) {
1152 if (flags & match_upconv) {
1153 new_op1 = transform_upconv(op1, node);
1154 } else if (flags & match_zero_ext) {
1155 new_op1 = transform_zext(op1, node);
1157 /* match_mode_neutral not handled here because it makes no
1158 * sense for shift operations */
1159 panic("ia32 code selection failed for %+F", node);
1162 new_op1 = be_transform_node(op1);
1166 /* the shift amount can be any mode that is bigger than 5 bits, since all
1167 * other bits are ignored anyway */
1168 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1169 ir_node *const op = get_Conv_op(op2);
1170 if (mode_is_float(get_irn_mode(op)))
1173 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1175 new_op2 = create_immediate_or_transform(op2);
1177 dbg_info *dbgi = get_irn_dbg_info(node);
1178 ir_node *block = get_nodes_block(node);
1179 ir_node *new_block = be_transform_node(block);
1180 ir_node *new_node = func(dbgi, new_block, new_op1, new_op2);
1181 SET_IA32_ORIG_NODE(new_node, node);
1183 /* lowered shift instruction may have a dependency operand, handle it here */
1184 if (get_irn_arity(node) == 3) {
1185 /* we have a dependency */
1186 ir_node* dep = get_irn_n(node, 2);
1187 if (get_irn_n_edges(dep) > 1) {
1188 /* ... which has at least one user other than 'node' */
1189 ir_node *new_dep = be_transform_node(dep);
1190 add_irn_dep(new_node, new_dep);
1199 * Construct a standard unary operation, set AM and immediate if required.
1201 * @param op The operand
1202 * @param func The node constructor function
1203 * @return The constructed ia32 node.
1205 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1206 match_flags_t flags)
1209 ir_node *block, *new_block, *new_op, *new_node;
1211 assert(flags == 0 || flags == match_mode_neutral);
1212 if (flags & match_mode_neutral) {
1213 op = ia32_skip_downconv(op);
1216 new_op = be_transform_node(op);
1217 dbgi = get_irn_dbg_info(node);
1218 block = get_nodes_block(node);
1219 new_block = be_transform_node(block);
1220 new_node = func(dbgi, new_block, new_op);
1222 SET_IA32_ORIG_NODE(new_node, node);
1227 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1228 ia32_address_t *addr)
1238 base = be_transform_node(base);
1245 idx = be_transform_node(idx);
1248 /* segment overrides are ineffective for Leas :-( so we have to patch
1250 if (addr->tls_segment) {
1251 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1252 assert(addr->symconst_ent != NULL);
1253 if (base == noreg_GP)
1256 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1257 addr->tls_segment = false;
1260 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1261 set_address(res, addr);
1267 * Returns non-zero if a given address mode has a symbolic or
1268 * numerical offset != 0.
1270 static int am_has_immediates(const ia32_address_t *addr)
1272 return addr->offset != 0 || addr->symconst_ent != NULL
1273 || addr->frame_entity || addr->use_frame;
1276 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1277 ir_node *high, ir_node *low,
1281 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1282 * op1 - target to be shifted
1283 * op2 - contains bits to be shifted into target
1285 * Only op3 can be an immediate.
1287 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1288 ir_node *high, ir_node *low, ir_node *count,
1289 new_shiftd_func func)
1291 ir_node *new_block = be_transform_node(block);
1292 ir_node *new_high = be_transform_node(high);
1293 ir_node *new_low = be_transform_node(low);
1297 /* the shift amount can be any mode that is bigger than 5 bits, since all
1298 * other bits are ignored anyway */
1299 while (is_Conv(count) &&
1300 get_irn_n_edges(count) == 1 &&
1301 mode_is_int(get_irn_mode(count))) {
1302 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1303 count = get_Conv_op(count);
1305 new_count = create_immediate_or_transform(count);
1307 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1312 * Tests whether 2 values result in 'x' and '32-x' when interpreted as a shift
1315 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1317 if (is_Const(value1) && is_Const(value2)) {
1318 ir_tarval *tv1 = get_Const_tarval(value1);
1319 ir_tarval *tv2 = get_Const_tarval(value2);
1320 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1321 long v1 = get_tarval_long(tv1);
1322 long v2 = get_tarval_long(tv2);
1323 return v1 <= v2 && v2 == 32-v1;
1329 static ir_node *match_64bit_shift(ir_node *node)
1331 ir_node *op1 = get_binop_left(node);
1332 ir_node *op2 = get_binop_right(node);
1333 assert(is_Or(node) || is_Add(node));
1341 /* match ShlD operation */
1342 if (is_Shl(op1) && is_Shr(op2)) {
1343 ir_node *shl_right = get_Shl_right(op1);
1344 ir_node *shl_left = get_Shl_left(op1);
1345 ir_node *shr_right = get_Shr_right(op2);
1346 ir_node *shr_left = get_Shr_left(op2);
1347 /* constant ShlD operation */
1348 if (is_complementary_shifts(shl_right, shr_right)) {
1349 dbg_info *dbgi = get_irn_dbg_info(node);
1350 ir_node *block = get_nodes_block(node);
1351 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1354 /* constant ShrD operation */
1355 if (is_complementary_shifts(shr_right, shl_right)) {
1356 dbg_info *dbgi = get_irn_dbg_info(node);
1357 ir_node *block = get_nodes_block(node);
1358 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1361 /* lower_dw produces the following for ShlD:
1362 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1363 if (is_Shr(shr_left) && is_Not(shr_right)
1364 && is_Const_1(get_Shr_right(shr_left))
1365 && get_Not_op(shr_right) == shl_right) {
1366 dbg_info *dbgi = get_irn_dbg_info(node);
1367 ir_node *block = get_nodes_block(node);
1368 ir_node *val_h = get_Shr_left(shr_left);
1369 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1372 /* lower_dw produces the following for ShrD:
1373 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1374 if (is_Shl(shl_left) && is_Not(shl_right)
1375 && is_Const_1(get_Shl_right(shl_left))
1376 && get_Not_op(shl_right) == shr_right) {
1377 dbg_info *dbgi = get_irn_dbg_info(node);
1378 ir_node *block = get_nodes_block(node);
1379 ir_node *val_h = get_Shl_left(shl_left);
1380 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1389 * Creates an ia32 Add.
1391 * @return the created ia32 Add node
1393 static ir_node *gen_Add(ir_node *node)
1395 ir_mode *mode = get_irn_mode(node);
1396 ir_node *op1 = get_Add_left(node);
1397 ir_node *op2 = get_Add_right(node);
1399 ir_node *block, *new_block, *new_node, *add_immediate_op;
1400 ia32_address_t addr;
1401 ia32_address_mode_t am;
1403 new_node = match_64bit_shift(node);
1404 if (new_node != NULL)
1407 if (mode_is_float(mode)) {
1408 if (ia32_cg_config.use_sse2)
1409 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1410 match_commutative | match_am);
1412 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fadd);
1415 ia32_mark_non_am(node);
1419 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1420 * 1. Add with immediate -> Lea
1421 * 2. Add with possible source address mode -> Add
1422 * 3. Otherwise -> Lea
1424 memset(&addr, 0, sizeof(addr));
1425 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1426 add_immediate_op = NULL;
1428 dbgi = get_irn_dbg_info(node);
1429 block = get_nodes_block(node);
1430 new_block = be_transform_node(block);
1433 if (addr.base == NULL && addr.index == NULL) {
1434 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1435 addr.symconst_sign, 0, addr.offset);
1436 SET_IA32_ORIG_NODE(new_node, node);
1439 /* add with immediate? */
1440 if (addr.index == NULL) {
1441 add_immediate_op = addr.base;
1442 } else if (addr.base == NULL && addr.scale == 0) {
1443 add_immediate_op = addr.index;
1446 if (add_immediate_op != NULL) {
1447 if (!am_has_immediates(&addr)) {
1448 #ifdef DEBUG_libfirm
1449 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1452 return be_transform_node(add_immediate_op);
1455 new_node = create_lea_from_address(dbgi, new_block, &addr);
1456 SET_IA32_ORIG_NODE(new_node, node);
1460 /* test if we can use source address mode */
1461 match_arguments(&am, block, op1, op2, NULL, match_commutative
1462 | match_mode_neutral | match_am | match_immediate | match_try_am);
1464 /* construct an Add with source address mode */
1465 if (am.op_type == ia32_AddrModeS) {
1466 ia32_address_t *am_addr = &am.addr;
1467 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1468 am_addr->index, am_addr->mem, am.new_op1,
1470 set_am_attributes(new_node, &am);
1471 SET_IA32_ORIG_NODE(new_node, node);
1473 new_node = fix_mem_proj(new_node, &am);
1478 /* otherwise construct a lea */
1479 new_node = create_lea_from_address(dbgi, new_block, &addr);
1480 SET_IA32_ORIG_NODE(new_node, node);
1485 * Creates an ia32 Mul.
1487 * @return the created ia32 Mul node
1489 static ir_node *gen_Mul(ir_node *node)
1491 ir_node *op1 = get_Mul_left(node);
1492 ir_node *op2 = get_Mul_right(node);
1493 ir_mode *mode = get_irn_mode(node);
1495 if (mode_is_float(mode)) {
1496 if (ia32_cg_config.use_sse2)
1497 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1498 match_commutative | match_am);
1500 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fmul);
1502 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1503 match_commutative | match_am | match_mode_neutral |
1504 match_immediate | match_am_and_immediates);
1508 * Creates an ia32 Mulh.
1509 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1510 * this result while Mul returns the lower 32 bit.
1512 * @return the created ia32 Mulh node
1514 static ir_node *gen_Mulh(ir_node *node)
1516 dbg_info *dbgi = get_irn_dbg_info(node);
1517 ir_node *op1 = get_Mulh_left(node);
1518 ir_node *op2 = get_Mulh_right(node);
1519 ir_mode *mode = get_irn_mode(node);
1521 ir_node *proj_res_high;
1523 if (get_mode_size_bits(mode) != 32) {
1524 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1527 if (mode_is_signed(mode)) {
1528 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1529 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1531 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1532 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1534 return proj_res_high;
1538 * Creates an ia32 And.
1540 * @return The created ia32 And node
1542 static ir_node *gen_And(ir_node *node)
1544 ir_node *op1 = get_And_left(node);
1545 ir_node *op2 = get_And_right(node);
1546 assert(! mode_is_float(get_irn_mode(node)));
1548 /* is it a zero extension? */
1549 if (is_Const(op2)) {
1550 ir_tarval *tv = get_Const_tarval(op2);
1551 long v = get_tarval_long(tv);
1553 if (v == 0xFF || v == 0xFFFF) {
1554 dbg_info *dbgi = get_irn_dbg_info(node);
1555 ir_node *block = get_nodes_block(node);
1562 assert(v == 0xFFFF);
1565 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1570 return gen_binop(node, op1, op2, new_bd_ia32_And,
1571 match_commutative | match_mode_neutral | match_am | match_immediate);
1575 * Creates an ia32 Or.
1577 * @return The created ia32 Or node
1579 static ir_node *gen_Or(ir_node *node)
1581 ir_node *op1 = get_Or_left(node);
1582 ir_node *op2 = get_Or_right(node);
1585 res = match_64bit_shift(node);
1589 assert (! mode_is_float(get_irn_mode(node)));
1590 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1591 | match_mode_neutral | match_am | match_immediate);
1597 * Creates an ia32 Eor.
1599 * @return The created ia32 Eor node
1601 static ir_node *gen_Eor(ir_node *node)
1603 ir_node *op1 = get_Eor_left(node);
1604 ir_node *op2 = get_Eor_right(node);
1606 assert(! mode_is_float(get_irn_mode(node)));
1607 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1608 | match_mode_neutral | match_am | match_immediate);
1613 * Creates an ia32 Sub.
1615 * @return The created ia32 Sub node
1617 static ir_node *gen_Sub(ir_node *node)
1619 ir_node *op1 = get_Sub_left(node);
1620 ir_node *op2 = get_Sub_right(node);
1621 ir_mode *mode = get_irn_mode(node);
1623 if (mode_is_float(mode)) {
1624 if (ia32_cg_config.use_sse2)
1625 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1627 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fsub);
1630 if (is_Const(op2)) {
1631 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1635 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1636 | match_am | match_immediate);
1639 static ir_node *transform_AM_mem(ir_node *const block,
1640 ir_node *const src_val,
1641 ir_node *const src_mem,
1642 ir_node *const am_mem)
1644 if (is_NoMem(am_mem)) {
1645 return be_transform_node(src_mem);
1646 } else if (is_Proj(src_val) &&
1648 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1649 /* avoid memory loop */
1651 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1652 ir_node *const ptr_pred = get_Proj_pred(src_val);
1653 int const arity = get_Sync_n_preds(src_mem);
1658 NEW_ARR_A(ir_node*, ins, arity + 1);
1660 /* NOTE: This sometimes produces dead-code because the old sync in
1661 * src_mem might not be used anymore, we should detect this case
1662 * and kill the sync... */
1663 for (i = arity - 1; i >= 0; --i) {
1664 ir_node *const pred = get_Sync_pred(src_mem, i);
1666 /* avoid memory loop */
1667 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1670 ins[n++] = be_transform_node(pred);
1673 if (n==1 && ins[0] == am_mem) {
1675 /* creating a new Sync and relying on CSE may fail,
1676 * if am_mem is a ProjM, which does not yet verify. */
1680 return new_r_Sync(block, n, ins);
1684 ins[0] = be_transform_node(src_mem);
1686 return new_r_Sync(block, 2, ins);
1691 * Create a 32bit to 64bit signed extension.
1693 * @param dbgi debug info
1694 * @param block the block where node nodes should be placed
1695 * @param val the value to extend
1696 * @param orig the original node
1698 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1699 ir_node *val, const ir_node *orig)
1704 if (ia32_cg_config.use_short_sex_eax) {
1705 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1706 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1708 ir_graph *const irg = get_Block_irg(block);
1709 ir_node *const imm31 = ia32_create_Immediate(irg, NULL, 0, 31);
1710 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1712 SET_IA32_ORIG_NODE(res, orig);
1717 * Generates an ia32 Div with additional infrastructure for the
1718 * register allocator if needed.
1720 static ir_node *create_Div(ir_node *node)
1722 dbg_info *dbgi = get_irn_dbg_info(node);
1723 ir_node *block = get_nodes_block(node);
1724 ir_node *new_block = be_transform_node(block);
1725 int throws_exception = ir_throws_exception(node);
1732 ir_node *sign_extension;
1733 ia32_address_mode_t am;
1734 ia32_address_t *addr = &am.addr;
1736 /* the upper bits have random contents for smaller modes */
1737 switch (get_irn_opcode(node)) {
1739 op1 = get_Div_left(node);
1740 op2 = get_Div_right(node);
1741 mem = get_Div_mem(node);
1742 mode = get_Div_resmode(node);
1745 op1 = get_Mod_left(node);
1746 op2 = get_Mod_right(node);
1747 mem = get_Mod_mem(node);
1748 mode = get_Mod_resmode(node);
1751 panic("invalid divmod node %+F", node);
1754 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv);
1756 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1757 is the memory of the consumed address. We can have only the second op as address
1758 in Div nodes, so check only op2. */
1759 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1761 if (mode_is_signed(mode)) {
1762 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1763 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1764 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1766 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1768 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1769 addr->index, new_mem, am.new_op2,
1770 am.new_op1, sign_extension);
1772 ir_set_throws_exception(new_node, throws_exception);
1774 set_irn_pinned(new_node, get_irn_pinned(node));
1776 set_am_attributes(new_node, &am);
1777 SET_IA32_ORIG_NODE(new_node, node);
1779 new_node = fix_mem_proj(new_node, &am);
1785 * Generates an ia32 Mod.
1787 static ir_node *gen_Mod(ir_node *node)
1789 return create_Div(node);
1793 * Generates an ia32 Div.
1795 static ir_node *gen_Div(ir_node *node)
1797 ir_mode *mode = get_Div_resmode(node);
1798 if (mode_is_float(mode)) {
1799 ir_node *op1 = get_Div_left(node);
1800 ir_node *op2 = get_Div_right(node);
1802 if (ia32_cg_config.use_sse2) {
1803 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1805 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fdiv);
1809 return create_Div(node);
1813 * Creates an ia32 Shl.
1815 * @return The created ia32 Shl node
1817 static ir_node *gen_Shl(ir_node *node)
1819 ir_node *left = get_Shl_left(node);
1820 ir_node *right = get_Shl_right(node);
1822 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1823 match_mode_neutral | match_immediate);
1827 * Creates an ia32 Shr.
1829 * @return The created ia32 Shr node
1831 static ir_node *gen_Shr(ir_node *node)
1833 ir_node *left = get_Shr_left(node);
1834 ir_node *right = get_Shr_right(node);
1836 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
1837 match_immediate | match_zero_ext);
1841 * Creates an ia32 Sar.
1843 * @return The created ia32 Shrs node
1845 static ir_node *gen_Shrs(ir_node *node)
1847 ir_node *left = get_Shrs_left(node);
1848 ir_node *right = get_Shrs_right(node);
1850 if (is_Const(right)) {
1851 ir_tarval *tv = get_Const_tarval(right);
1852 long val = get_tarval_long(tv);
1854 /* this is a sign extension */
1855 dbg_info *dbgi = get_irn_dbg_info(node);
1856 ir_node *block = be_transform_node(get_nodes_block(node));
1857 ir_node *new_op = be_transform_node(left);
1859 return create_sex_32_64(dbgi, block, new_op, node);
1863 /* 8 or 16 bit sign extension? */
1864 if (is_Const(right) && is_Shl(left)) {
1865 ir_node *shl_left = get_Shl_left(left);
1866 ir_node *shl_right = get_Shl_right(left);
1867 if (is_Const(shl_right)) {
1868 ir_tarval *tv1 = get_Const_tarval(right);
1869 ir_tarval *tv2 = get_Const_tarval(shl_right);
1870 if (tv1 == tv2 && tarval_is_long(tv1)) {
1871 long val = get_tarval_long(tv1);
1872 if (val == 16 || val == 24) {
1873 dbg_info *dbgi = get_irn_dbg_info(node);
1874 ir_node *block = get_nodes_block(node);
1884 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1893 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
1894 match_immediate | match_upconv);
1900 * Creates an ia32 Rol.
1902 * @param op1 The first operator
1903 * @param op2 The second operator
1904 * @return The created ia32 RotL node
1906 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1908 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1914 * Creates an ia32 Ror.
1915 * NOTE: There is no RotR with immediate because this would always be a RotL
1916 * "imm-mode_size_bits" which can be pre-calculated.
1918 * @param op1 The first operator
1919 * @param op2 The second operator
1920 * @return The created ia32 RotR node
1922 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1924 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1930 * Creates an ia32 RotR or RotL (depending on the found pattern).
1932 * @return The created ia32 RotL or RotR node
1934 static ir_node *gen_Rotl(ir_node *node)
1936 ir_node *op1 = get_Rotl_left(node);
1937 ir_node *op2 = get_Rotl_right(node);
1939 if (is_Minus(op2)) {
1940 return gen_Ror(node, op1, get_Minus_op(op2));
1943 return gen_Rol(node, op1, op2);
1949 * Transforms a Minus node.
1951 * @return The created ia32 Minus node
1953 static ir_node *gen_Minus(ir_node *node)
1955 ir_node *op = get_Minus_op(node);
1956 ir_node *block = be_transform_node(get_nodes_block(node));
1957 dbg_info *dbgi = get_irn_dbg_info(node);
1958 ir_mode *mode = get_irn_mode(node);
1963 if (mode_is_float(mode)) {
1964 ir_node *new_op = be_transform_node(op);
1965 if (ia32_cg_config.use_sse2) {
1966 /* TODO: non-optimal... if we have many xXors, then we should
1967 * rather create a load for the const and use that instead of
1968 * several AM nodes... */
1969 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1971 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1972 noreg_GP, nomem, new_op, noreg_xmm);
1974 size = get_mode_size_bits(mode);
1975 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1977 set_ia32_am_sc(new_node, ent);
1978 set_ia32_op_type(new_node, ia32_AddrModeS);
1979 set_ia32_ls_mode(new_node, mode);
1981 new_node = new_bd_ia32_fchs(dbgi, block, new_op);
1984 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1987 SET_IA32_ORIG_NODE(new_node, node);
1993 * Transforms a Not node.
1995 * @return The created ia32 Not node
1997 static ir_node *gen_Not(ir_node *node)
1999 ir_node *op = get_Not_op(node);
2001 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
2002 assert(!mode_is_float(get_irn_mode(node)));
2004 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
2007 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
2008 bool negate, ir_node *node)
2010 ir_node *new_block = be_transform_node(block);
2011 ir_mode *mode = get_irn_mode(op);
2012 ir_node *new_op = be_transform_node(op);
2017 assert(mode_is_float(mode));
2019 if (ia32_cg_config.use_sse2) {
2020 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
2021 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
2022 noreg_GP, nomem, new_op, noreg_fp);
2024 size = get_mode_size_bits(mode);
2025 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
2027 set_ia32_am_sc(new_node, ent);
2029 SET_IA32_ORIG_NODE(new_node, node);
2031 set_ia32_op_type(new_node, ia32_AddrModeS);
2032 set_ia32_ls_mode(new_node, mode);
2034 /* TODO, implement -Abs case */
2037 check_x87_floatmode(mode);
2038 new_node = new_bd_ia32_fabs(dbgi, new_block, new_op);
2039 SET_IA32_ORIG_NODE(new_node, node);
2041 new_node = new_bd_ia32_fchs(dbgi, new_block, new_node);
2042 SET_IA32_ORIG_NODE(new_node, node);
2050 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2052 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2054 dbg_info *dbgi = get_irn_dbg_info(cmp);
2055 ir_node *block = get_nodes_block(cmp);
2056 ir_node *new_block = be_transform_node(block);
2057 ir_node *op1 = be_transform_node(x);
2058 ir_node *op2 = be_transform_node(n);
2060 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2063 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2065 bool overflow_possible)
2067 if (mode_is_float(mode)) {
2069 case ir_relation_equal: return ia32_cc_float_equal;
2070 case ir_relation_less: return ia32_cc_float_below;
2071 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2072 case ir_relation_greater: return ia32_cc_float_above;
2073 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2074 case ir_relation_less_greater: return ia32_cc_not_equal;
2075 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2076 case ir_relation_unordered: return ia32_cc_parity;
2077 case ir_relation_unordered_equal: return ia32_cc_equal;
2078 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2079 case ir_relation_unordered_less_equal:
2080 return ia32_cc_float_unordered_below_equal;
2081 case ir_relation_unordered_greater:
2082 return ia32_cc_float_unordered_above;
2083 case ir_relation_unordered_greater_equal:
2084 return ia32_cc_float_unordered_above_equal;
2085 case ir_relation_unordered_less_greater:
2086 return ia32_cc_float_not_equal;
2087 case ir_relation_false:
2088 case ir_relation_true:
2089 /* should we introduce a jump always/jump never? */
2092 panic("Unexpected float pnc");
2093 } else if (mode_is_signed(mode)) {
2095 case ir_relation_unordered_equal:
2096 case ir_relation_equal: return ia32_cc_equal;
2097 case ir_relation_unordered_less:
2098 case ir_relation_less:
2099 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2100 case ir_relation_unordered_less_equal:
2101 case ir_relation_less_equal: return ia32_cc_less_equal;
2102 case ir_relation_unordered_greater:
2103 case ir_relation_greater: return ia32_cc_greater;
2104 case ir_relation_unordered_greater_equal:
2105 case ir_relation_greater_equal:
2106 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2107 case ir_relation_unordered_less_greater:
2108 case ir_relation_less_greater: return ia32_cc_not_equal;
2109 case ir_relation_less_equal_greater:
2110 case ir_relation_unordered:
2111 case ir_relation_false:
2112 case ir_relation_true:
2113 /* introduce jump always/jump never? */
2116 panic("Unexpected pnc");
2119 case ir_relation_unordered_equal:
2120 case ir_relation_equal: return ia32_cc_equal;
2121 case ir_relation_unordered_less:
2122 case ir_relation_less: return ia32_cc_below;
2123 case ir_relation_unordered_less_equal:
2124 case ir_relation_less_equal: return ia32_cc_below_equal;
2125 case ir_relation_unordered_greater:
2126 case ir_relation_greater: return ia32_cc_above;
2127 case ir_relation_unordered_greater_equal:
2128 case ir_relation_greater_equal: return ia32_cc_above_equal;
2129 case ir_relation_unordered_less_greater:
2130 case ir_relation_less_greater: return ia32_cc_not_equal;
2131 case ir_relation_less_equal_greater:
2132 case ir_relation_unordered:
2133 case ir_relation_false:
2134 case ir_relation_true:
2135 /* introduce jump always/jump never? */
2138 panic("Unexpected pnc");
2142 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2144 /* must have a Cmp as input */
2145 ir_relation relation = get_Cmp_relation(cmp);
2146 ir_node *l = get_Cmp_left(cmp);
2147 ir_node *r = get_Cmp_right(cmp);
2148 ir_mode *mode = get_irn_mode(l);
2149 bool overflow_possible;
2152 /* check for bit-test */
2153 if (ia32_cg_config.use_bt
2154 && (relation == ir_relation_equal
2155 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2156 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2158 ir_node *la = get_And_left(l);
2159 ir_node *ra = get_And_right(l);
2166 ir_node *c = get_Shl_left(la);
2167 if (is_Const_1(c) && is_Const_0(r)) {
2168 /* (1 << n) & ra) */
2169 ir_node *n = get_Shl_right(la);
2170 flags = gen_bt(cmp, ra, n);
2171 /* the bit is copied into the CF flag */
2172 if (relation & ir_relation_equal)
2173 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2175 *cc_out = ia32_cc_below; /* test for CF=1 */
2181 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2182 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2183 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2184 * a predecessor node). So add the < bit.
2185 * (Note that we do not want to produce <=> (which can happen for
2186 * unoptimized code), because no x86 flag can represent that */
2187 if (!(relation & ir_relation_equal) && relation & ir_relation_less_greater)
2188 relation |= get_negated_relation(ir_get_possible_cmp_relations(l, r)) & ir_relation_less_greater;
2190 overflow_possible = true;
2191 if (is_Const(r) && is_Const_null(r))
2192 overflow_possible = false;
2194 /* just do a normal transformation of the Cmp */
2195 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2196 flags = be_transform_node(cmp);
2201 * Transforms a Load.
2203 * @return the created ia32 Load node
2205 static ir_node *gen_Load(ir_node *node)
2207 ir_node *old_block = get_nodes_block(node);
2208 ir_node *block = be_transform_node(old_block);
2209 ir_node *ptr = get_Load_ptr(node);
2210 ir_node *mem = get_Load_mem(node);
2211 ir_node *new_mem = be_transform_node(mem);
2212 dbg_info *dbgi = get_irn_dbg_info(node);
2213 ir_mode *mode = get_Load_mode(node);
2214 int throws_exception = ir_throws_exception(node);
2218 ia32_address_t addr;
2220 /* construct load address */
2221 memset(&addr, 0, sizeof(addr));
2222 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2229 base = be_transform_node(base);
2235 idx = be_transform_node(idx);
2238 if (mode_is_float(mode)) {
2239 if (ia32_cg_config.use_sse2) {
2240 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2243 new_node = new_bd_ia32_fld(dbgi, block, base, idx, new_mem,
2247 assert(mode != mode_b);
2249 /* create a conv node with address mode for smaller modes */
2250 if (get_mode_size_bits(mode) < 32) {
2251 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2252 new_mem, noreg_GP, mode);
2254 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2257 ir_set_throws_exception(new_node, throws_exception);
2259 set_irn_pinned(new_node, get_irn_pinned(node));
2260 set_ia32_op_type(new_node, ia32_AddrModeS);
2261 set_ia32_ls_mode(new_node, mode);
2262 set_address(new_node, &addr);
2264 if (get_irn_pinned(node) == op_pin_state_floats) {
2265 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
2266 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
2267 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2268 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2271 SET_IA32_ORIG_NODE(new_node, node);
2276 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2277 ir_node *ptr, ir_node *other)
2284 /* we only use address mode if we're the only user of the load */
2285 if (get_irn_n_edges(node) > 1)
2288 load = get_Proj_pred(node);
2291 if (get_nodes_block(load) != block)
2294 /* store should have the same pointer as the load */
2295 if (get_Load_ptr(load) != ptr)
2298 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2299 if (other != NULL &&
2300 get_nodes_block(other) == block &&
2301 heights_reachable_in_block(ia32_heights, other, load)) {
2305 if (ia32_prevents_AM(block, load, mem))
2307 /* Store should be attached to the load via mem */
2308 assert(heights_reachable_in_block(ia32_heights, mem, load));
2313 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2314 ir_node *mem, ir_node *ptr, ir_mode *mode,
2315 construct_binop_dest_func *func,
2316 construct_binop_dest_func *func8bit,
2317 match_flags_t flags)
2319 ir_node *src_block = get_nodes_block(node);
2327 ia32_address_mode_t am;
2328 ia32_address_t *addr = &am.addr;
2329 memset(&am, 0, sizeof(am));
2331 assert(flags & match_immediate); /* there is no destam node without... */
2332 commutative = (flags & match_commutative) != 0;
2334 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2335 build_address(&am, op1, ia32_create_am_double_use);
2336 new_op = create_immediate_or_transform(op2);
2337 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2338 build_address(&am, op2, ia32_create_am_double_use);
2339 new_op = create_immediate_or_transform(op1);
2344 if (addr->base == NULL)
2345 addr->base = noreg_GP;
2346 if (addr->index == NULL)
2347 addr->index = noreg_GP;
2348 if (addr->mem == NULL)
2351 dbgi = get_irn_dbg_info(node);
2352 block = be_transform_node(src_block);
2353 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2355 if (get_mode_size_bits(mode) == 8) {
2356 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2358 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2360 set_address(new_node, addr);
2361 set_ia32_op_type(new_node, ia32_AddrModeD);
2362 set_ia32_ls_mode(new_node, mode);
2363 SET_IA32_ORIG_NODE(new_node, node);
2365 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2366 mem_proj = be_transform_node(am.mem_proj);
2367 be_set_transformed_node(am.mem_proj, new_node);
2368 be_set_transformed_node(mem_proj, new_node);
2373 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2374 ir_node *ptr, ir_mode *mode,
2375 construct_unop_dest_func *func)
2377 ir_node *src_block = get_nodes_block(node);
2383 ia32_address_mode_t am;
2384 ia32_address_t *addr = &am.addr;
2386 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2389 memset(&am, 0, sizeof(am));
2390 build_address(&am, op, ia32_create_am_double_use);
2392 dbgi = get_irn_dbg_info(node);
2393 block = be_transform_node(src_block);
2394 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2395 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2396 set_address(new_node, addr);
2397 set_ia32_op_type(new_node, ia32_AddrModeD);
2398 set_ia32_ls_mode(new_node, mode);
2399 SET_IA32_ORIG_NODE(new_node, node);
2401 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2402 mem_proj = be_transform_node(am.mem_proj);
2403 be_set_transformed_node(am.mem_proj, new_node);
2404 be_set_transformed_node(mem_proj, new_node);
2409 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2411 ir_mode *mode = get_irn_mode(node);
2412 ir_node *mux_true = get_Mux_true(node);
2413 ir_node *mux_false = get_Mux_false(node);
2421 ia32_condition_code_t cc;
2422 ia32_address_t addr;
2424 if (get_mode_size_bits(mode) != 8)
2427 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2429 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2435 cond = get_Mux_sel(node);
2436 flags = get_flags_node(cond, &cc);
2437 /* we can't handle the float special cases with SetM */
2438 if (cc & ia32_cc_additional_float_cases)
2441 cc = ia32_negate_condition_code(cc);
2443 build_address_ptr(&addr, ptr, mem);
2445 dbgi = get_irn_dbg_info(node);
2446 block = get_nodes_block(node);
2447 new_block = be_transform_node(block);
2448 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2449 addr.index, addr.mem, flags, cc);
2450 set_address(new_node, &addr);
2451 set_ia32_op_type(new_node, ia32_AddrModeD);
2452 set_ia32_ls_mode(new_node, mode);
2453 SET_IA32_ORIG_NODE(new_node, node);
2458 static ir_node *try_create_dest_am(ir_node *node)
2460 ir_node *val = get_Store_value(node);
2461 ir_node *mem = get_Store_mem(node);
2462 ir_node *ptr = get_Store_ptr(node);
2463 ir_mode *mode = get_irn_mode(val);
2464 unsigned bits = get_mode_size_bits(mode);
2469 /* handle only GP modes for now... */
2470 if (!ia32_mode_needs_gp_reg(mode))
2474 /* store must be the only user of the val node */
2475 if (get_irn_n_edges(val) > 1)
2477 /* skip pointless convs */
2479 ir_node *conv_op = get_Conv_op(val);
2480 ir_mode *pred_mode = get_irn_mode(conv_op);
2481 if (!ia32_mode_needs_gp_reg(pred_mode))
2483 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2491 /* value must be in the same block */
2492 if (get_nodes_block(node) != get_nodes_block(val))
2495 switch (get_irn_opcode(val)) {
2497 op1 = get_Add_left(val);
2498 op2 = get_Add_right(val);
2499 if (ia32_cg_config.use_incdec) {
2500 if (is_Const_1(op2)) {
2501 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2503 } else if (is_Const_Minus_1(op2)) {
2504 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2508 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2509 new_bd_ia32_AddMem, new_bd_ia32_AddMem_8bit,
2510 match_commutative | match_immediate);
2513 op1 = get_Sub_left(val);
2514 op2 = get_Sub_right(val);
2515 if (is_Const(op2)) {
2516 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2518 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2519 new_bd_ia32_SubMem, new_bd_ia32_SubMem_8bit,
2523 op1 = get_And_left(val);
2524 op2 = get_And_right(val);
2525 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2526 new_bd_ia32_AndMem, new_bd_ia32_AndMem_8bit,
2527 match_commutative | match_immediate);
2530 op1 = get_Or_left(val);
2531 op2 = get_Or_right(val);
2532 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2533 new_bd_ia32_OrMem, new_bd_ia32_OrMem_8bit,
2534 match_commutative | match_immediate);
2537 op1 = get_Eor_left(val);
2538 op2 = get_Eor_right(val);
2539 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2540 new_bd_ia32_XorMem, new_bd_ia32_XorMem_8bit,
2541 match_commutative | match_immediate);
2544 op1 = get_Shl_left(val);
2545 op2 = get_Shl_right(val);
2546 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2547 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2551 op1 = get_Shr_left(val);
2552 op2 = get_Shr_right(val);
2553 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2554 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2558 op1 = get_Shrs_left(val);
2559 op2 = get_Shrs_right(val);
2560 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2561 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2565 op1 = get_Rotl_left(val);
2566 op2 = get_Rotl_right(val);
2567 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2568 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2571 /* TODO: match ROR patterns... */
2573 new_node = try_create_SetMem(val, ptr, mem);
2577 op1 = get_Minus_op(val);
2578 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2581 /* should be lowered already */
2582 assert(mode != mode_b);
2583 op1 = get_Not_op(val);
2584 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2590 if (new_node != NULL) {
2591 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2592 get_irn_pinned(node) == op_pin_state_pinned) {
2593 set_irn_pinned(new_node, op_pin_state_pinned);
2600 static bool possible_int_mode_for_fp(ir_mode *mode)
2604 if (!mode_is_signed(mode))
2606 size = get_mode_size_bits(mode);
2607 if (size != 16 && size != 32)
2612 static int is_float_to_int_conv(const ir_node *node)
2614 ir_mode *mode = get_irn_mode(node);
2618 if (!possible_int_mode_for_fp(mode))
2623 conv_op = get_Conv_op(node);
2624 conv_mode = get_irn_mode(conv_op);
2626 if (!mode_is_float(conv_mode))
2633 * Transform a Store(floatConst) into a sequence of
2636 * @return the created ia32 Store node
2638 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2640 ir_mode *mode = get_irn_mode(cns);
2641 unsigned size = get_mode_size_bytes(mode);
2642 ir_tarval *tv = get_Const_tarval(cns);
2643 ir_node *block = get_nodes_block(node);
2644 ir_node *new_block = be_transform_node(block);
2645 ir_node *ptr = get_Store_ptr(node);
2646 ir_node *mem = get_Store_mem(node);
2647 dbg_info *dbgi = get_irn_dbg_info(node);
2650 int throws_exception = ir_throws_exception(node);
2652 ia32_address_t addr;
2654 build_address_ptr(&addr, ptr, mem);
2661 val= get_tarval_sub_bits(tv, ofs) |
2662 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2663 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2664 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2667 } else if (size >= 2) {
2668 val= get_tarval_sub_bits(tv, ofs) |
2669 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2673 panic("invalid size of Store float to mem (%+F)", node);
2675 ir_graph *const irg = get_Block_irg(new_block);
2676 ir_node *const imm = ia32_create_Immediate(irg, NULL, 0, val);
2678 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2679 addr.index, addr.mem, imm);
2680 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2682 ir_set_throws_exception(new_node, throws_exception);
2683 set_irn_pinned(new_node, get_irn_pinned(node));
2684 set_ia32_op_type(new_node, ia32_AddrModeD);
2685 set_ia32_ls_mode(new_node, mode);
2686 set_address(new_node, &addr);
2687 SET_IA32_ORIG_NODE(new_node, node);
2694 addr.offset += delta;
2695 } while (size != 0);
2698 return new_rd_Sync(dbgi, new_block, i, ins);
2700 return get_Proj_pred(ins[0]);
2705 * Generate a vfist or vfisttp instruction.
2707 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2708 ir_node *index, ir_node *mem, ir_node *val)
2710 if (ia32_cg_config.use_fisttp) {
2711 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2712 if other users exists */
2713 ir_node *vfisttp = new_bd_ia32_fisttp(dbgi, block, base, index, mem, val);
2714 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_fisttp_res);
2715 be_new_Keep(block, 1, &value);
2719 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2722 ir_node *vfist = new_bd_ia32_fist(dbgi, block, base, index, mem, val, trunc_mode);
2728 * Transforms a general (no special case) Store.
2730 * @return the created ia32 Store node
2732 static ir_node *gen_general_Store(ir_node *node)
2734 ir_node *val = get_Store_value(node);
2735 ir_mode *mode = get_irn_mode(val);
2736 ir_node *block = get_nodes_block(node);
2737 ir_node *new_block = be_transform_node(block);
2738 ir_node *ptr = get_Store_ptr(node);
2739 ir_node *mem = get_Store_mem(node);
2740 dbg_info *dbgi = get_irn_dbg_info(node);
2741 int throws_exception = ir_throws_exception(node);
2744 ia32_address_t addr;
2746 /* check for destination address mode */
2747 new_node = try_create_dest_am(node);
2748 if (new_node != NULL)
2751 /* construct store address */
2752 memset(&addr, 0, sizeof(addr));
2753 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2755 if (addr.base == NULL) {
2756 addr.base = noreg_GP;
2758 addr.base = be_transform_node(addr.base);
2761 if (addr.index == NULL) {
2762 addr.index = noreg_GP;
2764 addr.index = be_transform_node(addr.index);
2766 addr.mem = be_transform_node(mem);
2768 if (mode_is_float(mode)) {
2769 if (ia32_cg_config.use_sse2) {
2770 new_val = be_transform_node(val);
2771 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2772 addr.index, addr.mem, new_val);
2774 val = ia32_skip_float_downconv(val);
2775 new_val = be_transform_node(val);
2776 new_node = new_bd_ia32_fst(dbgi, new_block, addr.base,
2777 addr.index, addr.mem, new_val, mode);
2779 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2780 val = get_Conv_op(val);
2781 new_val = be_transform_node(val);
2782 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2784 unsigned dest_bits = get_mode_size_bits(mode);
2785 while (is_downconv(val)
2786 && get_mode_size_bits(get_irn_mode(val)) >= dest_bits) {
2787 val = get_Conv_op(val);
2789 new_val = create_immediate_or_transform(val);
2790 assert(mode != mode_b);
2792 new_node = dest_bits == 8
2793 ? new_bd_ia32_Store_8bit(dbgi, new_block, addr.base, addr.index, addr.mem, new_val)
2794 : new_bd_ia32_Store (dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2796 ir_set_throws_exception(new_node, throws_exception);
2798 set_irn_pinned(new_node, get_irn_pinned(node));
2799 set_ia32_op_type(new_node, ia32_AddrModeD);
2800 set_ia32_ls_mode(new_node, mode);
2802 set_address(new_node, &addr);
2803 SET_IA32_ORIG_NODE(new_node, node);
2809 * Transforms a Store.
2811 * @return the created ia32 Store node
2813 static ir_node *gen_Store(ir_node *node)
2815 ir_node *val = get_Store_value(node);
2816 ir_mode *mode = get_irn_mode(val);
2818 if (mode_is_float(mode) && is_Const(val)) {
2819 /* We can transform every floating const store
2820 into a sequence of integer stores.
2821 If the constant is already in a register,
2822 it would be better to use it, but we don't
2823 have this information here. */
2824 return gen_float_const_Store(node, val);
2826 return gen_general_Store(node);
2830 * Transforms a Switch.
2832 * @return the created ia32 SwitchJmp node
2834 static ir_node *gen_Switch(ir_node *node)
2836 dbg_info *dbgi = get_irn_dbg_info(node);
2837 ir_graph *irg = get_irn_irg(node);
2838 ir_node *block = be_transform_node(get_nodes_block(node));
2839 ir_node *sel = get_Switch_selector(node);
2840 ir_node *new_sel = be_transform_node(sel);
2841 ir_mode *sel_mode = get_irn_mode(sel);
2842 const ir_switch_table *table = get_Switch_table(node);
2843 unsigned n_outs = get_Switch_n_outs(node);
2847 assert(get_mode_size_bits(sel_mode) <= 32);
2848 assert(!mode_is_float(sel_mode));
2849 sel = ia32_skip_sameconv(sel);
2850 if (get_mode_size_bits(sel_mode) < 32)
2851 new_sel = transform_upconv(sel, node);
2853 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2854 set_entity_visibility(entity, ir_visibility_private);
2855 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2857 table = ir_switch_table_duplicate(irg, table);
2859 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2860 set_ia32_am_scale(new_node, 2);
2861 set_ia32_am_sc(new_node, entity);
2862 set_ia32_op_type(new_node, ia32_AddrModeS);
2863 set_ia32_ls_mode(new_node, mode_Iu);
2864 SET_IA32_ORIG_NODE(new_node, node);
2865 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2866 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2872 * Transform a Cond node.
2874 static ir_node *gen_Cond(ir_node *node)
2876 ir_node *block = get_nodes_block(node);
2877 ir_node *new_block = be_transform_node(block);
2878 dbg_info *dbgi = get_irn_dbg_info(node);
2879 ir_node *sel = get_Cond_selector(node);
2880 ir_node *flags = NULL;
2882 ia32_condition_code_t cc;
2884 /* we get flags from a Cmp */
2885 flags = get_flags_node(sel, &cc);
2887 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2888 SET_IA32_ORIG_NODE(new_node, node);
2894 * Transform a be_Copy.
2896 static ir_node *gen_be_Copy(ir_node *node)
2898 ir_node *new_node = be_duplicate_node(node);
2899 ir_mode *mode = get_irn_mode(new_node);
2901 if (ia32_mode_needs_gp_reg(mode)) {
2902 set_irn_mode(new_node, mode_Iu);
2908 static ir_node *create_Fucom(ir_node *node)
2910 dbg_info *dbgi = get_irn_dbg_info(node);
2911 ir_node *block = get_nodes_block(node);
2912 ir_node *new_block = be_transform_node(block);
2913 ir_node *left = get_Cmp_left(node);
2914 ir_node *new_left = be_transform_node(left);
2915 ir_node *right = get_Cmp_right(node);
2916 ir_mode *cmp_mode = get_irn_mode(left);
2919 check_x87_floatmode(cmp_mode);
2921 if (ia32_cg_config.use_fucomi) {
2922 new_right = be_transform_node(right);
2923 new_node = new_bd_ia32_Fucomi(dbgi, new_block, new_left,
2925 set_ia32_commutative(new_node);
2926 SET_IA32_ORIG_NODE(new_node, node);
2928 if (is_Const_0(right)) {
2929 new_node = new_bd_ia32_FtstFnstsw(dbgi, new_block, new_left, 0);
2931 new_right = be_transform_node(right);
2932 new_node = new_bd_ia32_FucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2933 set_ia32_commutative(new_node);
2936 SET_IA32_ORIG_NODE(new_node, node);
2938 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2939 SET_IA32_ORIG_NODE(new_node, node);
2945 static ir_node *create_Ucomi(ir_node *node)
2947 dbg_info *dbgi = get_irn_dbg_info(node);
2948 ir_node *src_block = get_nodes_block(node);
2949 ir_node *new_block = be_transform_node(src_block);
2950 ir_node *left = get_Cmp_left(node);
2951 ir_node *right = get_Cmp_right(node);
2953 ia32_address_mode_t am;
2954 ia32_address_t *addr = &am.addr;
2956 match_arguments(&am, src_block, left, right, NULL,
2957 match_commutative | match_am);
2959 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2960 addr->mem, am.new_op1, am.new_op2,
2962 set_am_attributes(new_node, &am);
2964 SET_IA32_ORIG_NODE(new_node, node);
2966 new_node = fix_mem_proj(new_node, &am);
2971 static bool ia32_mux_upper_bits_clean(const ir_node *node, ir_mode *mode)
2973 ir_node *mux_true = get_Mux_true(node);
2974 ir_node *mux_false = get_Mux_false(node);
2975 ir_mode *mux_mode = get_irn_mode(node);
2976 /* mux nodes which get transformed to the set instruction are not clean */
2977 if (is_Const(mux_true) && is_Const(mux_false)
2978 && get_mode_size_bits(mux_mode) == 8) {
2981 return be_upper_bits_clean(mux_true, mode)
2982 && be_upper_bits_clean(mux_false, mode);
2986 * Generate code for a Cmp.
2988 static ir_node *gen_Cmp(ir_node *node)
2990 dbg_info *dbgi = get_irn_dbg_info(node);
2991 ir_node *block = get_nodes_block(node);
2992 ir_node *new_block = be_transform_node(block);
2993 ir_node *left = get_Cmp_left(node);
2994 ir_node *right = get_Cmp_right(node);
2995 ir_mode *cmp_mode = get_irn_mode(left);
2997 ia32_address_mode_t am;
2998 ia32_address_t *addr = &am.addr;
3000 if (mode_is_float(cmp_mode)) {
3001 if (ia32_cg_config.use_sse2) {
3002 return create_Ucomi(node);
3004 return create_Fucom(node);
3008 assert(ia32_mode_needs_gp_reg(cmp_mode));
3010 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3011 if (is_Const_0(right) &&
3013 get_irn_n_edges(left) == 1) {
3014 /* Test(and_left, and_right) */
3015 ir_node *and_left = get_And_left(left);
3016 ir_node *and_right = get_And_right(left);
3018 /* matze: code here used mode instead of cmd_mode, I think it is always
3019 * the same as cmp_mode, but I leave this here to see if this is really
3022 assert(get_irn_mode(and_left) == cmp_mode);
3024 match_arguments(&am, block, and_left, and_right, NULL,
3026 match_am | match_8bit_am | match_16bit_am |
3027 match_am_and_immediates | match_immediate);
3029 /* use 32bit compare mode if possible since the opcode is smaller */
3030 if (am.op_type == ia32_Normal &&
3031 be_upper_bits_clean(and_left, cmp_mode) &&
3032 be_upper_bits_clean(and_right, cmp_mode)) {
3033 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3036 new_node = get_mode_size_bits(cmp_mode) == 8
3037 ? new_bd_ia32_Test_8bit(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted)
3038 : new_bd_ia32_Test (dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3040 /* Cmp(left, right) */
3041 match_arguments(&am, block, left, right, NULL,
3043 match_am | match_8bit_am | match_16bit_am |
3044 match_am_and_immediates | match_immediate);
3045 /* use 32bit compare mode if possible since the opcode is smaller */
3046 if (am.op_type == ia32_Normal &&
3047 be_upper_bits_clean(left, cmp_mode) &&
3048 be_upper_bits_clean(right, cmp_mode)) {
3049 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3052 new_node = get_mode_size_bits(cmp_mode) == 8
3053 ? new_bd_ia32_Cmp_8bit(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted)
3054 : new_bd_ia32_Cmp (dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3056 set_am_attributes(new_node, &am);
3057 set_ia32_ls_mode(new_node, cmp_mode);
3059 SET_IA32_ORIG_NODE(new_node, node);
3061 new_node = fix_mem_proj(new_node, &am);
3066 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3067 ia32_condition_code_t cc)
3069 dbg_info *dbgi = get_irn_dbg_info(node);
3070 ir_node *block = get_nodes_block(node);
3071 ir_node *new_block = be_transform_node(block);
3072 ir_node *val_true = get_Mux_true(node);
3073 ir_node *val_false = get_Mux_false(node);
3075 ia32_address_mode_t am;
3076 ia32_address_t *addr;
3078 assert(ia32_cg_config.use_cmov);
3079 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3083 match_arguments(&am, block, val_false, val_true, flags,
3084 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3086 if (am.ins_permuted)
3087 cc = ia32_negate_condition_code(cc);
3089 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3090 addr->mem, am.new_op1, am.new_op2, new_flags,
3092 set_am_attributes(new_node, &am);
3094 SET_IA32_ORIG_NODE(new_node, node);
3096 new_node = fix_mem_proj(new_node, &am);
3102 * Creates a ia32 Setcc instruction.
3104 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3105 ir_node *flags, ia32_condition_code_t cc,
3108 ir_mode *mode = get_irn_mode(orig_node);
3111 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3112 SET_IA32_ORIG_NODE(new_node, orig_node);
3114 /* we might need to conv the result up */
3115 if (get_mode_size_bits(mode) > 8) {
3116 new_node = new_bd_ia32_Conv_I2I_8bit(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, mode_Bu);
3117 SET_IA32_ORIG_NODE(new_node, orig_node);
3124 * Create instruction for an unsigned Difference or Zero.
3126 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3128 ir_mode *mode = get_irn_mode(psi);
3138 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3139 match_mode_neutral | match_am | match_immediate | match_two_users);
3141 block = get_nodes_block(new_node);
3143 if (is_Proj(new_node)) {
3144 sub = get_Proj_pred(new_node);
3147 set_irn_mode(sub, mode_T);
3148 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3150 assert(is_ia32_Sub(sub));
3151 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3153 dbgi = get_irn_dbg_info(psi);
3154 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3155 set_ia32_ls_mode(sbb, mode_Iu);
3156 notn = new_bd_ia32_Not(dbgi, block, sbb);
3158 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3159 set_ia32_ls_mode(new_node, mode_Iu);
3160 set_ia32_commutative(new_node);
3165 * Create an const array of two float consts.
3167 * @param c0 the first constant
3168 * @param c1 the second constant
3169 * @param new_mode IN/OUT for the mode of the constants, if NULL
3170 * smallest possible mode will be used
3172 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3175 ir_mode *mode = *new_mode;
3177 ir_initializer_t *initializer;
3178 ir_tarval *tv0 = get_Const_tarval(c0);
3179 ir_tarval *tv1 = get_Const_tarval(c1);
3182 /* detect the best mode for the constants */
3183 mode = get_tarval_mode(tv0);
3185 if (mode != mode_F) {
3186 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3187 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3189 tv0 = tarval_convert_to(tv0, mode);
3190 tv1 = tarval_convert_to(tv1, mode);
3191 } else if (mode != mode_D) {
3192 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3193 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3195 tv0 = tarval_convert_to(tv0, mode);
3196 tv1 = tarval_convert_to(tv1, mode);
3203 tp = ia32_get_prim_type(mode);
3204 tp = ia32_create_float_array(tp);
3206 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3208 set_entity_ld_ident(ent, get_entity_ident(ent));
3209 set_entity_visibility(ent, ir_visibility_private);
3210 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3212 initializer = create_initializer_compound(2);
3214 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3215 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3217 set_entity_initializer(ent, initializer);
3224 * Possible transformations for creating a Setcc.
3226 enum setcc_transform_insn {
3238 typedef struct setcc_transform {
3240 ia32_condition_code_t cc;
3242 enum setcc_transform_insn transform;
3246 } setcc_transform_t;
3249 * Setcc can only handle 0 and 1 result.
3250 * Find a transformation that creates 0 and 1 from
3253 static void find_const_transform(ia32_condition_code_t cc,
3254 ir_tarval *t, ir_tarval *f,
3255 setcc_transform_t *res)
3261 if (tarval_is_null(t)) {
3265 cc = ia32_negate_condition_code(cc);
3266 } else if (tarval_cmp(t, f) == ir_relation_less) {
3267 // now, t is the bigger one
3271 cc = ia32_negate_condition_code(cc);
3275 if (! tarval_is_null(f)) {
3276 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3279 res->steps[step].transform = SETCC_TR_ADD;
3281 if (t == tarval_bad)
3282 panic("constant subtract failed");
3283 if (! tarval_is_long(f))
3284 panic("tarval is not long");
3286 res->steps[step].val = get_tarval_long(f);
3288 f = tarval_sub(f, f, NULL);
3289 assert(tarval_is_null(f));
3292 if (tarval_is_one(t)) {
3293 res->steps[step].transform = SETCC_TR_SET;
3294 res->num_steps = ++step;
3298 if (tarval_is_minus_one(t)) {
3299 res->steps[step].transform = SETCC_TR_NEG;
3301 res->steps[step].transform = SETCC_TR_SET;
3302 res->num_steps = ++step;
3305 if (tarval_is_long(t)) {
3306 long v = get_tarval_long(t);
3308 res->steps[step].val = 0;
3311 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3313 res->steps[step].transform = SETCC_TR_LEAxx;
3314 res->steps[step].scale = 3; /* (a << 3) + a */
3317 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3319 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3320 res->steps[step].scale = 3; /* (a << 3) */
3323 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3325 res->steps[step].transform = SETCC_TR_LEAxx;
3326 res->steps[step].scale = 2; /* (a << 2) + a */
3329 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3331 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3332 res->steps[step].scale = 2; /* (a << 2) */
3335 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3337 res->steps[step].transform = SETCC_TR_LEAxx;
3338 res->steps[step].scale = 1; /* (a << 1) + a */
3341 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3343 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3344 res->steps[step].scale = 1; /* (a << 1) */
3347 res->num_steps = step;
3350 if (! tarval_is_single_bit(t)) {
3351 res->steps[step].transform = SETCC_TR_AND;
3352 res->steps[step].val = v;
3354 res->steps[step].transform = SETCC_TR_NEG;
3356 int val = get_tarval_lowest_bit(t);
3359 res->steps[step].transform = SETCC_TR_SHL;
3360 res->steps[step].scale = val;
3364 res->steps[step].transform = SETCC_TR_SET;
3365 res->num_steps = ++step;
3368 panic("tarval is not long");
3372 * Transforms a Mux node into some code sequence.
3374 * @return The transformed node.
3376 static ir_node *gen_Mux(ir_node *node)
3378 dbg_info *dbgi = get_irn_dbg_info(node);
3379 ir_node *block = get_nodes_block(node);
3380 ir_node *new_block = be_transform_node(block);
3381 ir_node *mux_true = get_Mux_true(node);
3382 ir_node *mux_false = get_Mux_false(node);
3383 ir_node *sel = get_Mux_sel(node);
3384 ir_mode *mode = get_irn_mode(node);
3388 ia32_condition_code_t cc;
3390 assert(get_irn_mode(sel) == mode_b);
3392 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3394 if (ia32_mode_needs_gp_reg(mode)) {
3395 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3398 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3399 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3403 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3404 if (mode_is_float(mode)) {
3405 ir_node *cmp_left = get_Cmp_left(sel);
3406 ir_node *cmp_right = get_Cmp_right(sel);
3407 ir_relation relation = get_Cmp_relation(sel);
3409 if (ia32_cg_config.use_sse2) {
3410 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3411 if (cmp_left == mux_true && cmp_right == mux_false) {
3412 /* Mux(a <= b, a, b) => MIN */
3413 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3414 match_commutative | match_am | match_two_users);
3415 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3416 /* Mux(a <= b, b, a) => MAX */
3417 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3418 match_commutative | match_am | match_two_users);
3420 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3421 if (cmp_left == mux_true && cmp_right == mux_false) {
3422 /* Mux(a >= b, a, b) => MAX */
3423 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3424 match_commutative | match_am | match_two_users);
3425 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3426 /* Mux(a >= b, b, a) => MIN */
3427 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3428 match_commutative | match_am | match_two_users);
3433 if (is_Const(mux_true) && is_Const(mux_false)) {
3434 ia32_address_mode_t am;
3439 flags = get_flags_node(sel, &cc);
3440 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3442 if (ia32_cg_config.use_sse2) {
3443 /* cannot load from different mode on SSE */
3446 /* x87 can load any mode */
3450 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3452 if (new_mode == mode_F) {
3454 } else if (new_mode == mode_D) {
3456 } else if (new_mode == ia32_mode_E) {
3457 /* arg, shift 16 NOT supported */
3459 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3461 panic("Unsupported constant size");
3464 am.ls_mode = new_mode;
3465 am.addr.base = get_symconst_base();
3466 am.addr.index = new_node;
3467 am.addr.mem = nomem;
3469 am.addr.scale = scale;
3470 am.addr.use_frame = 0;
3471 am.addr.tls_segment = false;
3472 am.addr.frame_entity = NULL;
3473 am.addr.symconst_sign = 0;
3474 am.mem_proj = am.addr.mem;
3475 am.op_type = ia32_AddrModeS;
3478 am.pinned = op_pin_state_floats;
3480 am.ins_permuted = false;
3482 if (ia32_cg_config.use_sse2)
3483 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3485 load = new_bd_ia32_fld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3486 set_am_attributes(load, &am);
3488 return new_rd_Proj(NULL, load, mode_fp, pn_ia32_res);
3490 panic("cannot transform floating point Mux");
3493 assert(ia32_mode_needs_gp_reg(mode));
3496 ir_node *cmp_left = get_Cmp_left(sel);
3497 ir_node *cmp_right = get_Cmp_right(sel);
3498 ir_relation relation = get_Cmp_relation(sel);
3499 ir_node *val_true = mux_true;
3500 ir_node *val_false = mux_false;
3502 if (is_Const(val_true) && is_Const_null(val_true)) {
3503 ir_node *tmp = val_false;
3504 val_false = val_true;
3506 relation = get_negated_relation(relation);
3508 if (is_Const_0(val_false) && is_Sub(val_true)) {
3509 if ((relation & ir_relation_greater)
3510 && get_Sub_left(val_true) == cmp_left
3511 && get_Sub_right(val_true) == cmp_right) {
3512 return create_doz(node, cmp_left, cmp_right);
3514 if ((relation & ir_relation_less)
3515 && get_Sub_left(val_true) == cmp_right
3516 && get_Sub_right(val_true) == cmp_left) {
3517 return create_doz(node, cmp_right, cmp_left);
3522 flags = get_flags_node(sel, &cc);
3524 if (is_Const(mux_true) && is_Const(mux_false)) {
3525 /* both are const, good */
3526 ir_tarval *tv_true = get_Const_tarval(mux_true);
3527 ir_tarval *tv_false = get_Const_tarval(mux_false);
3528 setcc_transform_t res;
3531 find_const_transform(cc, tv_true, tv_false, &res);
3533 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3536 switch (res.steps[step].transform) {
3538 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3539 add_ia32_am_offs_int(new_node, res.steps[step].val);
3541 case SETCC_TR_ADDxx:
3542 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3545 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3546 set_ia32_am_scale(new_node, res.steps[step].scale);
3547 set_ia32_am_offs_int(new_node, res.steps[step].val);
3549 case SETCC_TR_LEAxx:
3550 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3551 set_ia32_am_scale(new_node, res.steps[step].scale);
3552 set_ia32_am_offs_int(new_node, res.steps[step].val);
3555 imm = ia32_immediate_from_long(res.steps[step].scale);
3556 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3559 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3562 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3565 imm = ia32_immediate_from_long(res.steps[step].val);
3566 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3569 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3572 panic("unknown setcc transform");
3576 new_node = create_CMov(node, sel, flags, cc);
3583 * Create a conversion from x87 state register to general purpose.
3585 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3587 ir_node *block = be_transform_node(get_nodes_block(node));
3588 ir_node *op = get_Conv_op(node);
3589 ir_node *new_op = be_transform_node(op);
3590 ir_graph *irg = current_ir_graph;
3591 dbg_info *dbgi = get_irn_dbg_info(node);
3592 ir_mode *mode = get_irn_mode(node);
3593 ir_node *frame = get_irg_frame(irg);
3594 ir_node *fist, *load, *mem;
3596 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3597 set_irn_pinned(fist, op_pin_state_floats);
3598 set_ia32_use_frame(fist);
3599 set_ia32_op_type(fist, ia32_AddrModeD);
3600 arch_add_irn_flags(fist, arch_irn_flags_spill);
3602 assert((long)pn_ia32_fist_M == (long) pn_ia32_fisttp_M);
3603 mem = new_r_Proj(fist, mode_M, pn_ia32_fist_M);
3605 assert(get_mode_size_bits(mode) <= 32);
3606 /* exception we can only store signed 32 bit integers, so for unsigned
3607 we store a 64bit (signed) integer and load the lower bits */
3608 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3609 set_ia32_ls_mode(fist, mode_Ls);
3611 set_ia32_ls_mode(fist, mode_Is);
3613 SET_IA32_ORIG_NODE(fist, node);
3616 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3618 set_irn_pinned(load, op_pin_state_floats);
3619 set_ia32_use_frame(load);
3620 set_ia32_op_type(load, ia32_AddrModeS);
3621 set_ia32_ls_mode(load, mode_Is);
3622 if (get_ia32_ls_mode(fist) == mode_Ls) {
3623 ia32_attr_t *attr = get_ia32_attr(load);
3624 attr->data.need_64bit_stackent = 1;
3626 ia32_attr_t *attr = get_ia32_attr(load);
3627 attr->data.need_32bit_stackent = 1;
3629 SET_IA32_ORIG_NODE(load, node);
3631 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3635 * Creates a x87 Conv by placing a Store and a Load
3637 static ir_node *gen_x87_conv(ir_mode *tgt_mode, ir_node *node)
3639 ir_node *block = get_nodes_block(node);
3640 ir_graph *irg = get_Block_irg(block);
3641 dbg_info *dbgi = get_irn_dbg_info(node);
3642 ir_node *frame = get_irg_frame(irg);
3644 ir_node *store, *load;
3647 store = new_bd_ia32_fst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3648 set_ia32_use_frame(store);
3649 set_ia32_op_type(store, ia32_AddrModeD);
3650 arch_add_irn_flags(store, arch_irn_flags_spill);
3651 SET_IA32_ORIG_NODE(store, node);
3653 store_mem = new_r_Proj(store, mode_M, pn_ia32_fst_M);
3655 load = new_bd_ia32_fld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3656 set_ia32_use_frame(load);
3657 set_ia32_op_type(load, ia32_AddrModeS);
3658 SET_IA32_ORIG_NODE(load, node);
3660 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_fld_res);
3664 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3665 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3667 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3669 func = get_mode_size_bits(mode) == 8 ?
3670 new_bd_ia32_Conv_I2I_8bit : new_bd_ia32_Conv_I2I;
3671 return func(dbgi, block, base, index, mem, val, mode);
3675 * Create a conversion from general purpose to x87 register
3677 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3679 ir_node *src_block = get_nodes_block(node);
3680 ir_node *block = be_transform_node(src_block);
3681 ir_graph *irg = get_Block_irg(block);
3682 dbg_info *dbgi = get_irn_dbg_info(node);
3683 ir_node *op = get_Conv_op(node);
3684 ir_node *new_op = NULL;
3686 ir_mode *store_mode;
3692 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3693 if (possible_int_mode_for_fp(src_mode)) {
3694 ia32_address_mode_t am;
3696 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am | match_upconv);
3697 if (am.op_type == ia32_AddrModeS) {
3698 ia32_address_t *addr = &am.addr;
3700 fild = new_bd_ia32_fild(dbgi, block, addr->base, addr->index, addr->mem);
3701 new_node = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
3703 set_am_attributes(fild, &am);
3704 SET_IA32_ORIG_NODE(fild, node);
3706 fix_mem_proj(fild, &am);
3711 if (new_op == NULL) {
3712 new_op = be_transform_node(op);
3715 mode = get_irn_mode(op);
3717 /* first convert to 32 bit signed if necessary */
3718 if (get_mode_size_bits(src_mode) < 32) {
3719 if (!be_upper_bits_clean(op, src_mode)) {
3720 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3721 SET_IA32_ORIG_NODE(new_op, node);
3726 assert(get_mode_size_bits(mode) == 32);
3729 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3731 set_ia32_use_frame(store);
3732 set_ia32_op_type(store, ia32_AddrModeD);
3733 set_ia32_ls_mode(store, mode_Iu);
3734 arch_add_irn_flags(store, arch_irn_flags_spill);
3736 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3738 /* exception for 32bit unsigned, do a 64bit spill+load */
3739 if (!mode_is_signed(mode)) {
3742 ir_node *zero_const = ia32_create_Immediate(irg, NULL, 0, 0);
3744 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3745 noreg_GP, nomem, zero_const);
3746 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3748 set_ia32_use_frame(zero_store);
3749 set_ia32_op_type(zero_store, ia32_AddrModeD);
3750 add_ia32_am_offs_int(zero_store, 4);
3751 set_ia32_ls_mode(zero_store, mode_Iu);
3752 arch_add_irn_flags(zero_store, arch_irn_flags_spill);
3754 in[0] = zero_store_mem;
3757 store_mem = new_rd_Sync(dbgi, block, 2, in);
3758 store_mode = mode_Ls;
3760 store_mode = mode_Is;
3764 fild = new_bd_ia32_fild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3766 set_ia32_use_frame(fild);
3767 set_ia32_op_type(fild, ia32_AddrModeS);
3768 set_ia32_ls_mode(fild, store_mode);
3770 new_node = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
3776 * Create a conversion from one integer mode into another one
3778 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3779 dbg_info *dbgi, ir_node *block, ir_node *op,
3782 ir_node *new_block = be_transform_node(block);
3784 ia32_address_mode_t am;
3785 ia32_address_t *addr = &am.addr;
3788 assert(get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode));
3790 #ifdef DEBUG_libfirm
3792 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3797 if (be_upper_bits_clean(op, src_mode)) {
3798 return be_transform_node(op);
3801 match_arguments(&am, block, NULL, op, NULL,
3802 match_am | match_8bit_am | match_16bit_am);
3804 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3805 addr->mem, am.new_op2, src_mode);
3806 set_am_attributes(new_node, &am);
3807 /* match_arguments assume that out-mode = in-mode, this isn't true here
3809 set_ia32_ls_mode(new_node, src_mode);
3810 SET_IA32_ORIG_NODE(new_node, node);
3811 new_node = fix_mem_proj(new_node, &am);
3816 * Transforms a Conv node.
3818 * @return The created ia32 Conv node
3820 static ir_node *gen_Conv(ir_node *node)
3822 ir_node *block = get_nodes_block(node);
3823 ir_node *new_block = be_transform_node(block);
3824 ir_node *op = get_Conv_op(node);
3825 ir_node *new_op = NULL;
3826 dbg_info *dbgi = get_irn_dbg_info(node);
3827 ir_mode *src_mode = get_irn_mode(op);
3828 ir_mode *tgt_mode = get_irn_mode(node);
3829 int src_bits = get_mode_size_bits(src_mode);
3830 int tgt_bits = get_mode_size_bits(tgt_mode);
3831 ir_node *res = NULL;
3833 assert(!mode_is_int(src_mode) || src_bits <= 32);
3834 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3836 /* modeB -> X should already be lowered by the lower_mode_b pass */
3837 if (src_mode == mode_b) {
3838 panic("ConvB not lowered %+F", node);
3841 if (src_mode == tgt_mode) {
3842 /* this should be optimized already, but who knows... */
3843 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3844 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3845 return be_transform_node(op);
3848 if (mode_is_float(src_mode)) {
3849 new_op = be_transform_node(op);
3850 /* we convert from float ... */
3851 if (mode_is_float(tgt_mode)) {
3853 if (ia32_cg_config.use_sse2) {
3854 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3855 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3857 set_ia32_ls_mode(res, tgt_mode);
3859 if (src_bits < tgt_bits) {
3860 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3863 res = gen_x87_conv(tgt_mode, new_op);
3864 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3870 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3871 if (ia32_cg_config.use_sse2) {
3872 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3874 set_ia32_ls_mode(res, src_mode);
3876 return gen_x87_fp_to_gp(node);
3880 /* we convert from int ... */
3881 if (mode_is_float(tgt_mode)) {
3883 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3884 if (ia32_cg_config.use_sse2) {
3885 new_op = be_transform_node(op);
3886 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3888 set_ia32_ls_mode(res, tgt_mode);
3890 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3891 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3892 res = gen_x87_gp_to_fp(node, src_mode);
3894 /* we need a float-conv, if the int mode has more bits than the
3896 if (float_mantissa < int_mantissa) {
3897 res = gen_x87_conv(tgt_mode, res);
3898 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3902 } else if (tgt_mode == mode_b) {
3903 /* mode_b lowering already took care that we only have 0/1 values */
3904 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3905 src_mode, tgt_mode));
3906 return be_transform_node(op);
3909 if (src_bits >= tgt_bits) {
3910 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3911 src_mode, tgt_mode));
3912 return be_transform_node(op);
3915 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3923 static ir_node *create_immediate_or_transform(ir_node *const node)
3925 ir_node *new_node = ia32_try_create_Immediate(node, 'i');
3926 if (new_node == NULL) {
3927 new_node = be_transform_node(node);
3933 * Transforms a FrameAddr into an ia32 Add.
3935 static ir_node *gen_be_FrameAddr(ir_node *node)
3937 ir_node *block = be_transform_node(get_nodes_block(node));
3938 ir_node *op = be_get_FrameAddr_frame(node);
3939 ir_node *new_op = be_transform_node(op);
3940 dbg_info *dbgi = get_irn_dbg_info(node);
3943 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3944 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3945 set_ia32_use_frame(new_node);
3947 SET_IA32_ORIG_NODE(new_node, node);
3953 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3955 static ir_node *gen_be_Return(ir_node *node)
3957 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
3958 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
3959 ir_node *new_ret_val = be_transform_node(ret_val);
3960 ir_node *new_ret_mem = be_transform_node(ret_mem);
3961 dbg_info *dbgi = get_irn_dbg_info(node);
3962 ir_node *block = be_transform_node(get_nodes_block(node));
3963 ir_graph *irg = get_Block_irg(block);
3964 ir_entity *ent = get_irg_entity(irg);
3965 ir_type *tp = get_entity_type(ent);
3979 assert(ret_val != NULL);
3980 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3981 return be_duplicate_node(node);
3984 res_type = get_method_res_type(tp, 0);
3986 if (! is_Primitive_type(res_type)) {
3987 return be_duplicate_node(node);
3990 mode = get_type_mode(res_type);
3991 if (! mode_is_float(mode)) {
3992 return be_duplicate_node(node);
3995 assert(get_method_n_ress(tp) == 1);
3997 frame = get_irg_frame(irg);
3999 /* store xmm0 onto stack */
4000 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4001 new_ret_mem, new_ret_val);
4002 set_ia32_ls_mode(sse_store, mode);
4003 set_ia32_op_type(sse_store, ia32_AddrModeD);
4004 set_ia32_use_frame(sse_store);
4005 arch_add_irn_flags(sse_store, arch_irn_flags_spill);
4006 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4008 /* load into x87 register */
4009 fld = new_bd_ia32_fld(dbgi, block, frame, noreg_GP, store_mem, mode);
4010 set_ia32_op_type(fld, ia32_AddrModeS);
4011 set_ia32_use_frame(fld);
4013 mproj = new_r_Proj(fld, mode_M, pn_ia32_fld_M);
4014 fld = new_r_Proj(fld, mode_fp, pn_ia32_fld_res);
4016 /* create a new return */
4017 arity = get_irn_arity(node);
4018 in = ALLOCAN(ir_node*, arity);
4019 pop = be_Return_get_pop(node);
4020 for (i = 0; i < arity; ++i) {
4021 ir_node *op = get_irn_n(node, i);
4022 if (op == ret_val) {
4024 } else if (op == ret_mem) {
4027 in[i] = be_transform_node(op);
4030 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4031 copy_node_attr(irg, node, new_node);
4037 * Transform a be_AddSP into an ia32_SubSP.
4039 static ir_node *gen_be_AddSP(ir_node *node)
4041 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4042 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4044 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4045 match_am | match_immediate);
4046 assert(is_ia32_SubSP(new_node));
4047 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4048 &ia32_registers[REG_ESP]);
4053 * Transform a be_SubSP into an ia32_AddSP
4055 static ir_node *gen_be_SubSP(ir_node *node)
4057 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4058 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4060 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4061 match_am | match_immediate);
4062 assert(is_ia32_AddSP(new_node));
4063 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4064 &ia32_registers[REG_ESP]);
4068 static ir_node *gen_Phi(ir_node *node)
4070 ir_mode *mode = get_irn_mode(node);
4071 const arch_register_req_t *req;
4072 if (ia32_mode_needs_gp_reg(mode)) {
4073 /* we shouldn't have any 64bit stuff around anymore */
4074 assert(get_mode_size_bits(mode) <= 32);
4075 /* all integer operations are on 32bit registers now */
4077 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4078 } else if (mode_is_float(mode)) {
4079 if (ia32_cg_config.use_sse2) {
4081 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4084 req = ia32_reg_classes[CLASS_ia32_fp].class_req;
4087 req = arch_no_register_req;
4090 return be_transform_phi(node, req);
4093 static ir_node *gen_Jmp(ir_node *node)
4095 ir_node *block = get_nodes_block(node);
4096 ir_node *new_block = be_transform_node(block);
4097 dbg_info *dbgi = get_irn_dbg_info(node);
4100 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4101 SET_IA32_ORIG_NODE(new_node, node);
4109 static ir_node *gen_IJmp(ir_node *node)
4111 ir_node *block = get_nodes_block(node);
4112 ir_node *new_block = be_transform_node(block);
4113 dbg_info *dbgi = get_irn_dbg_info(node);
4114 ir_node *op = get_IJmp_target(node);
4116 ia32_address_mode_t am;
4117 ia32_address_t *addr = &am.addr;
4119 assert(get_irn_mode(op) == mode_P);
4121 match_arguments(&am, block, NULL, op, NULL,
4122 match_am | match_immediate | match_upconv);
4124 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4125 addr->mem, am.new_op2);
4126 set_am_attributes(new_node, &am);
4127 SET_IA32_ORIG_NODE(new_node, node);
4129 new_node = fix_mem_proj(new_node, &am);
4134 static ir_node *gen_ia32_l_Add(ir_node *node)
4136 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4137 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4138 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4139 match_commutative | match_am | match_immediate |
4140 match_mode_neutral);
4142 if (is_Proj(lowered)) {
4143 lowered = get_Proj_pred(lowered);
4145 assert(is_ia32_Add(lowered));
4146 set_irn_mode(lowered, mode_T);
4152 static ir_node *gen_ia32_l_Adc(ir_node *node)
4154 return gen_binop_flags(node, new_bd_ia32_Adc,
4155 match_commutative | match_am | match_immediate |
4156 match_mode_neutral);
4160 * Transforms a l_MulS into a "real" MulS node.
4162 * @return the created ia32 Mul node
4164 static ir_node *gen_ia32_l_Mul(ir_node *node)
4166 ir_node *left = get_binop_left(node);
4167 ir_node *right = get_binop_right(node);
4169 return gen_binop(node, left, right, new_bd_ia32_Mul,
4170 match_commutative | match_am | match_mode_neutral);
4174 * Transforms a l_IMulS into a "real" IMul1OPS node.
4176 * @return the created ia32 IMul1OP node
4178 static ir_node *gen_ia32_l_IMul(ir_node *node)
4180 ir_node *left = get_binop_left(node);
4181 ir_node *right = get_binop_right(node);
4183 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4184 match_commutative | match_am | match_mode_neutral);
4187 static ir_node *gen_ia32_l_Sub(ir_node *node)
4189 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4190 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4191 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4192 match_am | match_immediate | match_mode_neutral);
4194 if (is_Proj(lowered)) {
4195 lowered = get_Proj_pred(lowered);
4197 assert(is_ia32_Sub(lowered));
4198 set_irn_mode(lowered, mode_T);
4204 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4206 return gen_binop_flags(node, new_bd_ia32_Sbb,
4207 match_am | match_immediate | match_mode_neutral);
4210 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4212 ir_node *src_block = get_nodes_block(node);
4213 ir_node *block = be_transform_node(src_block);
4214 ir_graph *irg = get_Block_irg(block);
4215 dbg_info *dbgi = get_irn_dbg_info(node);
4216 ir_node *frame = get_irg_frame(irg);
4217 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4218 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4219 ir_node *new_val_low = be_transform_node(val_low);
4220 ir_node *new_val_high = be_transform_node(val_high);
4222 ir_node *sync, *fild, *res;
4224 ir_node *store_high;
4228 if (ia32_cg_config.use_sse2) {
4229 panic("not implemented for SSE2");
4233 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4235 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4237 SET_IA32_ORIG_NODE(store_low, node);
4238 SET_IA32_ORIG_NODE(store_high, node);
4240 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4241 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4243 set_ia32_use_frame(store_low);
4244 set_ia32_use_frame(store_high);
4245 set_ia32_op_type(store_low, ia32_AddrModeD);
4246 set_ia32_op_type(store_high, ia32_AddrModeD);
4247 set_ia32_ls_mode(store_low, mode_Iu);
4248 set_ia32_ls_mode(store_high, mode_Is);
4249 arch_add_irn_flags(store_low, arch_irn_flags_spill);
4250 arch_add_irn_flags(store_high, arch_irn_flags_spill);
4251 add_ia32_am_offs_int(store_high, 4);
4255 sync = new_rd_Sync(dbgi, block, 2, in);
4258 fild = new_bd_ia32_fild(dbgi, block, frame, noreg_GP, sync);
4260 set_ia32_use_frame(fild);
4261 set_ia32_op_type(fild, ia32_AddrModeS);
4262 set_ia32_ls_mode(fild, mode_Ls);
4264 SET_IA32_ORIG_NODE(fild, node);
4266 res = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
4268 if (! mode_is_signed(get_irn_mode(val_high))) {
4269 ia32_address_mode_t am;
4271 ir_node *count = ia32_create_Immediate(irg, NULL, 0, 31);
4274 am.addr.base = get_symconst_base();
4275 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4276 am.addr.mem = nomem;
4279 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4280 am.addr.tls_segment = false;
4281 am.addr.use_frame = 0;
4282 am.addr.frame_entity = NULL;
4283 am.addr.symconst_sign = 0;
4284 am.ls_mode = mode_F;
4285 am.mem_proj = nomem;
4286 am.op_type = ia32_AddrModeS;
4288 am.new_op2 = ia32_new_NoReg_fp(irg);
4289 am.pinned = op_pin_state_floats;
4291 am.ins_permuted = false;
4293 fadd = new_bd_ia32_fadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4294 am.new_op1, am.new_op2, get_fpcw());
4295 set_am_attributes(fadd, &am);
4297 set_irn_mode(fadd, mode_T);
4298 res = new_rd_Proj(NULL, fadd, mode_fp, pn_ia32_res);
4303 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4305 ir_node *src_block = get_nodes_block(node);
4306 ir_node *block = be_transform_node(src_block);
4307 ir_graph *irg = get_Block_irg(block);
4308 dbg_info *dbgi = get_irn_dbg_info(node);
4309 ir_node *frame = get_irg_frame(irg);
4310 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4311 ir_node *new_val = be_transform_node(val);
4314 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4315 SET_IA32_ORIG_NODE(fist, node);
4316 set_ia32_use_frame(fist);
4317 set_ia32_op_type(fist, ia32_AddrModeD);
4318 set_ia32_ls_mode(fist, mode_Ls);
4319 arch_add_irn_flags(fist, arch_irn_flags_spill);
4321 assert((long)pn_ia32_fist_M == (long) pn_ia32_fisttp_M);
4322 return new_r_Proj(fist, mode_M, pn_ia32_fist_M);
4325 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4327 ir_node *block = be_transform_node(get_nodes_block(node));
4328 ir_graph *irg = get_Block_irg(block);
4329 ir_node *pred = get_Proj_pred(node);
4330 ir_node *new_pred = be_transform_node(pred);
4331 ir_node *frame = get_irg_frame(irg);
4332 dbg_info *dbgi = get_irn_dbg_info(node);
4333 long pn = get_Proj_proj(node);
4338 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4339 SET_IA32_ORIG_NODE(load, node);
4340 set_ia32_use_frame(load);
4341 set_ia32_op_type(load, ia32_AddrModeS);
4342 set_ia32_ls_mode(load, mode_Iu);
4343 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4344 * 32 bit from it with this particular load */
4345 attr = get_ia32_attr(load);
4346 attr->data.need_64bit_stackent = 1;
4348 if (pn == pn_ia32_l_FloattoLL_res_high) {
4349 add_ia32_am_offs_int(load, 4);
4351 assert(pn == pn_ia32_l_FloattoLL_res_low);
4354 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4360 * Transform the Projs of an AddSP.
4362 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4364 ir_node *pred = get_Proj_pred(node);
4365 ir_node *new_pred = be_transform_node(pred);
4366 dbg_info *dbgi = get_irn_dbg_info(node);
4367 long proj = get_Proj_proj(node);
4369 if (proj == pn_be_AddSP_sp) {
4370 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4371 pn_ia32_SubSP_stack);
4372 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4374 } else if (proj == pn_be_AddSP_res) {
4375 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4376 pn_ia32_SubSP_addr);
4377 } else if (proj == pn_be_AddSP_M) {
4378 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4381 panic("No idea how to transform proj->AddSP");
4385 * Transform the Projs of a SubSP.
4387 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4389 ir_node *pred = get_Proj_pred(node);
4390 ir_node *new_pred = be_transform_node(pred);
4391 dbg_info *dbgi = get_irn_dbg_info(node);
4392 long proj = get_Proj_proj(node);
4394 if (proj == pn_be_SubSP_sp) {
4395 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4396 pn_ia32_AddSP_stack);
4397 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4399 } else if (proj == pn_be_SubSP_M) {
4400 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4403 panic("No idea how to transform proj->SubSP");
4407 * Transform and renumber the Projs from a Load.
4409 static ir_node *gen_Proj_Load(ir_node *node)
4412 ir_node *pred = get_Proj_pred(node);
4413 dbg_info *dbgi = get_irn_dbg_info(node);
4414 long proj = get_Proj_proj(node);
4416 /* loads might be part of source address mode matches, so we don't
4417 * transform the ProjMs yet (with the exception of loads whose result is
4420 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4423 /* this is needed, because sometimes we have loops that are only
4424 reachable through the ProjM */
4425 be_enqueue_preds(node);
4426 /* do it in 2 steps, to silence firm verifier */
4427 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4428 set_Proj_proj(res, pn_ia32_mem);
4432 /* renumber the proj */
4433 new_pred = be_transform_node(pred);
4434 if (is_ia32_Load(new_pred)) {
4435 switch ((pn_Load)proj) {
4437 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4439 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4440 case pn_Load_X_except:
4441 /* This Load might raise an exception. Mark it. */
4442 set_ia32_exc_label(new_pred, 1);
4443 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4444 case pn_Load_X_regular:
4445 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4447 } else if (is_ia32_Conv_I2I(new_pred)) {
4448 set_irn_mode(new_pred, mode_T);
4449 switch ((pn_Load)proj) {
4451 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4453 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4454 case pn_Load_X_except:
4455 /* This Load might raise an exception. Mark it. */
4456 set_ia32_exc_label(new_pred, 1);
4457 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4458 case pn_Load_X_regular:
4459 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4461 } else if (is_ia32_xLoad(new_pred)) {
4462 switch ((pn_Load)proj) {
4464 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4466 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4467 case pn_Load_X_except:
4468 /* This Load might raise an exception. Mark it. */
4469 set_ia32_exc_label(new_pred, 1);
4470 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4471 case pn_Load_X_regular:
4472 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4474 } else if (is_ia32_fld(new_pred)) {
4475 switch ((pn_Load)proj) {
4477 return new_rd_Proj(dbgi, new_pred, mode_fp, pn_ia32_fld_res);
4479 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fld_M);
4480 case pn_Load_X_except:
4481 /* This Load might raise an exception. Mark it. */
4482 set_ia32_exc_label(new_pred, 1);
4483 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fld_X_except);
4484 case pn_Load_X_regular:
4485 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fld_X_regular);
4488 /* can happen for ProJMs when source address mode happened for the
4491 /* however it should not be the result proj, as that would mean the
4492 load had multiple users and should not have been used for
4494 if (proj != pn_Load_M) {
4495 panic("internal error: transformed node not a Load");
4497 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4500 panic("No idea how to transform Proj(Load) %+F", node);
4503 static ir_node *gen_Proj_Store(ir_node *node)
4505 ir_node *pred = get_Proj_pred(node);
4506 ir_node *new_pred = be_transform_node(pred);
4507 dbg_info *dbgi = get_irn_dbg_info(node);
4508 long pn = get_Proj_proj(node);
4510 if (is_ia32_Store(new_pred)) {
4511 switch ((pn_Store)pn) {
4513 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4514 case pn_Store_X_except:
4515 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4516 case pn_Store_X_regular:
4517 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4519 } else if (is_ia32_fist(new_pred)) {
4520 switch ((pn_Store)pn) {
4522 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fist_M);
4523 case pn_Store_X_except:
4524 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fist_X_except);
4525 case pn_Store_X_regular:
4526 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fist_X_regular);
4528 } else if (is_ia32_fisttp(new_pred)) {
4529 switch ((pn_Store)pn) {
4531 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fisttp_M);
4532 case pn_Store_X_except:
4533 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fisttp_X_except);
4534 case pn_Store_X_regular:
4535 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fisttp_X_regular);
4537 } else if (is_ia32_fst(new_pred)) {
4538 switch ((pn_Store)pn) {
4540 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fst_M);
4541 case pn_Store_X_except:
4542 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fst_X_except);
4543 case pn_Store_X_regular:
4544 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fst_X_regular);
4546 } else if (is_ia32_xStore(new_pred)) {
4547 switch ((pn_Store)pn) {
4549 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4550 case pn_Store_X_except:
4551 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4552 case pn_Store_X_regular:
4553 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4555 } else if (is_Sync(new_pred)) {
4556 /* hack for the case that gen_float_const_Store produced a Sync */
4557 if (pn == pn_Store_M) {
4560 panic("exception control flow not implemented yet");
4561 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4562 /* destination address mode */
4563 if (pn == pn_Store_M) {
4566 panic("exception control flow for destination AM not implemented yet");
4569 panic("No idea how to transform Proj(Store) %+F", node);
4573 * Transform and renumber the Projs from a Div or Mod instruction.
4575 static ir_node *gen_Proj_Div(ir_node *node)
4577 ir_node *pred = get_Proj_pred(node);
4578 ir_node *new_pred = be_transform_node(pred);
4579 dbg_info *dbgi = get_irn_dbg_info(node);
4580 long proj = get_Proj_proj(node);
4582 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4583 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4585 switch ((pn_Div)proj) {
4587 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4588 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4589 } else if (is_ia32_xDiv(new_pred)) {
4590 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4591 } else if (is_ia32_fdiv(new_pred)) {
4592 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fdiv_M);
4594 panic("Div transformed to unexpected thing %+F", new_pred);
4597 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4598 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4599 } else if (is_ia32_xDiv(new_pred)) {
4600 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4601 } else if (is_ia32_fdiv(new_pred)) {
4602 return new_rd_Proj(dbgi, new_pred, mode_fp, pn_ia32_fdiv_res);
4604 panic("Div transformed to unexpected thing %+F", new_pred);
4606 case pn_Div_X_except:
4607 set_ia32_exc_label(new_pred, 1);
4608 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4609 case pn_Div_X_regular:
4610 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4613 panic("No idea how to transform proj->Div");
4617 * Transform and renumber the Projs from a Div or Mod instruction.
4619 static ir_node *gen_Proj_Mod(ir_node *node)
4621 ir_node *pred = get_Proj_pred(node);
4622 ir_node *new_pred = be_transform_node(pred);
4623 dbg_info *dbgi = get_irn_dbg_info(node);
4624 long proj = get_Proj_proj(node);
4626 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4627 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4628 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4630 switch ((pn_Mod)proj) {
4632 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4634 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4635 case pn_Mod_X_except:
4636 set_ia32_exc_label(new_pred, 1);
4637 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4638 case pn_Mod_X_regular:
4639 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4641 panic("No idea how to transform proj->Mod");
4645 * Transform and renumber the Projs from a CopyB.
4647 static ir_node *gen_Proj_CopyB(ir_node *node)
4649 ir_node *pred = get_Proj_pred(node);
4650 ir_node *new_pred = be_transform_node(pred);
4651 dbg_info *dbgi = get_irn_dbg_info(node);
4652 long proj = get_Proj_proj(node);
4654 switch ((pn_CopyB)proj) {
4656 if (is_ia32_CopyB_i(new_pred)) {
4657 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4658 } else if (is_ia32_CopyB(new_pred)) {
4659 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4662 case pn_CopyB_X_regular:
4663 if (is_ia32_CopyB_i(new_pred)) {
4664 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4665 } else if (is_ia32_CopyB(new_pred)) {
4666 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4669 case pn_CopyB_X_except:
4670 if (is_ia32_CopyB_i(new_pred)) {
4671 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4672 } else if (is_ia32_CopyB(new_pred)) {
4673 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4678 panic("No idea how to transform proj->CopyB");
4681 static ir_node *gen_be_Call(ir_node *node)
4683 dbg_info *const dbgi = get_irn_dbg_info(node);
4684 ir_node *const src_block = get_nodes_block(node);
4685 ir_node *const block = be_transform_node(src_block);
4686 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4687 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4688 ir_node *const sp = be_transform_node(src_sp);
4689 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4690 ia32_address_mode_t am;
4691 ia32_address_t *const addr = &am.addr;
4696 ir_node * eax = noreg_GP;
4697 ir_node * ecx = noreg_GP;
4698 ir_node * edx = noreg_GP;
4699 unsigned const pop = be_Call_get_pop(node);
4700 ir_type *const call_tp = be_Call_get_type(node);
4701 int old_no_pic_adjust;
4702 int throws_exception = ir_throws_exception(node);
4704 /* Run the x87 simulator if the call returns a float value */
4705 if (get_method_n_ress(call_tp) > 0) {
4706 ir_type *const res_type = get_method_res_type(call_tp, 0);
4707 ir_mode *const res_mode = get_type_mode(res_type);
4709 if (res_mode != NULL && mode_is_float(res_mode)) {
4710 ir_graph *const irg = get_Block_irg(block);
4711 ia32_request_x87_sim(irg);
4715 /* We do not want be_Call direct calls */
4716 assert(be_Call_get_entity(node) == NULL);
4718 /* special case for PIC trampoline calls */
4719 old_no_pic_adjust = ia32_no_pic_adjust;
4720 ia32_no_pic_adjust = be_options.pic;
4722 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4723 match_am | match_immediate | match_upconv);
4725 ia32_no_pic_adjust = old_no_pic_adjust;
4727 i = get_irn_arity(node) - 1;
4728 fpcw = be_transform_node(get_irn_n(node, i--));
4729 for (; i >= n_be_Call_first_arg; --i) {
4730 arch_register_req_t const *const req
4731 = arch_get_irn_register_req_in(node, i);
4732 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4734 assert(req->type == arch_register_req_type_limited);
4735 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4737 switch (*req->limited) {
4738 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4739 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4740 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4741 default: panic("Invalid GP register for register parameter");
4745 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4746 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4747 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4748 ir_set_throws_exception(call, throws_exception);
4749 set_am_attributes(call, &am);
4750 call = fix_mem_proj(call, &am);
4752 if (get_irn_pinned(node) == op_pin_state_pinned)
4753 set_irn_pinned(call, op_pin_state_pinned);
4755 SET_IA32_ORIG_NODE(call, node);
4757 if (ia32_cg_config.use_sse2) {
4758 /* remember this call for post-processing */
4759 ARR_APP1(ir_node *, call_list, call);
4760 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4767 * Transform Builtin trap
4769 static ir_node *gen_trap(ir_node *node)
4771 dbg_info *dbgi = get_irn_dbg_info(node);
4772 ir_node *block = be_transform_node(get_nodes_block(node));
4773 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4775 return new_bd_ia32_UD2(dbgi, block, mem);
4779 * Transform Builtin debugbreak
4781 static ir_node *gen_debugbreak(ir_node *node)
4783 dbg_info *dbgi = get_irn_dbg_info(node);
4784 ir_node *block = be_transform_node(get_nodes_block(node));
4785 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4787 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4791 * Transform Builtin return_address
4793 static ir_node *gen_return_address(ir_node *node)
4795 ir_node *param = get_Builtin_param(node, 0);
4796 ir_node *frame = get_Builtin_param(node, 1);
4797 dbg_info *dbgi = get_irn_dbg_info(node);
4798 ir_tarval *tv = get_Const_tarval(param);
4799 ir_graph *irg = get_irn_irg(node);
4800 unsigned long value = get_tarval_long(tv);
4802 ir_node *block = be_transform_node(get_nodes_block(node));
4803 ir_node *ptr = be_transform_node(frame);
4807 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4808 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4809 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4812 /* load the return address from this frame */
4813 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4815 set_irn_pinned(load, get_irn_pinned(node));
4816 set_ia32_op_type(load, ia32_AddrModeS);
4817 set_ia32_ls_mode(load, mode_Iu);
4819 set_ia32_am_offs_int(load, 0);
4820 set_ia32_use_frame(load);
4821 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4823 if (get_irn_pinned(node) == op_pin_state_floats) {
4824 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
4825 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
4826 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4827 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4830 SET_IA32_ORIG_NODE(load, node);
4831 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4835 * Transform Builtin frame_address
4837 static ir_node *gen_frame_address(ir_node *node)
4839 ir_node *param = get_Builtin_param(node, 0);
4840 ir_node *frame = get_Builtin_param(node, 1);
4841 dbg_info *dbgi = get_irn_dbg_info(node);
4842 ir_tarval *tv = get_Const_tarval(param);
4843 ir_graph *irg = get_irn_irg(node);
4844 unsigned long value = get_tarval_long(tv);
4846 ir_node *block = be_transform_node(get_nodes_block(node));
4847 ir_node *ptr = be_transform_node(frame);
4852 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4853 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4854 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4857 /* load the frame address from this frame */
4858 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4860 set_irn_pinned(load, get_irn_pinned(node));
4861 set_ia32_op_type(load, ia32_AddrModeS);
4862 set_ia32_ls_mode(load, mode_Iu);
4864 ent = ia32_get_frame_address_entity(irg);
4866 set_ia32_am_offs_int(load, 0);
4867 set_ia32_use_frame(load);
4868 set_ia32_frame_ent(load, ent);
4870 /* will fail anyway, but gcc does this: */
4871 set_ia32_am_offs_int(load, 0);
4874 if (get_irn_pinned(node) == op_pin_state_floats) {
4875 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
4876 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
4877 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4878 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4881 SET_IA32_ORIG_NODE(load, node);
4882 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4886 * Transform Builtin frame_address
4888 static ir_node *gen_prefetch(ir_node *node)
4891 ir_node *ptr, *block, *mem, *base, *idx;
4892 ir_node *param, *new_node;
4895 ia32_address_t addr;
4897 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4898 /* no prefetch at all, route memory */
4899 return be_transform_node(get_Builtin_mem(node));
4902 param = get_Builtin_param(node, 1);
4903 tv = get_Const_tarval(param);
4904 rw = get_tarval_long(tv);
4906 /* construct load address */
4907 memset(&addr, 0, sizeof(addr));
4908 ptr = get_Builtin_param(node, 0);
4909 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4916 base = be_transform_node(base);
4922 idx = be_transform_node(idx);
4925 dbgi = get_irn_dbg_info(node);
4926 block = be_transform_node(get_nodes_block(node));
4927 mem = be_transform_node(get_Builtin_mem(node));
4929 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4930 /* we have 3DNow!, this was already checked above */
4931 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4932 } else if (ia32_cg_config.use_sse_prefetch) {
4933 /* note: rw == 1 is IGNORED in that case */
4934 param = get_Builtin_param(node, 2);
4935 tv = get_Const_tarval(param);
4936 locality = get_tarval_long(tv);
4938 /* SSE style prefetch */
4941 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
4944 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
4947 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
4950 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
4954 assert(ia32_cg_config.use_3dnow_prefetch);
4955 /* 3DNow! style prefetch */
4956 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
4959 set_irn_pinned(new_node, get_irn_pinned(node));
4960 set_ia32_op_type(new_node, ia32_AddrModeS);
4961 set_ia32_ls_mode(new_node, mode_Bu);
4962 set_address(new_node, &addr);
4964 SET_IA32_ORIG_NODE(new_node, node);
4966 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4970 * Transform bsf like node
4972 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4974 ir_node *param = get_Builtin_param(node, 0);
4975 dbg_info *dbgi = get_irn_dbg_info(node);
4977 ir_node *block = get_nodes_block(node);
4978 ir_node *new_block = be_transform_node(block);
4980 ia32_address_mode_t am;
4981 ia32_address_t *addr = &am.addr;
4984 match_arguments(&am, block, NULL, param, NULL, match_am);
4986 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4987 set_am_attributes(cnt, &am);
4988 set_ia32_ls_mode(cnt, get_irn_mode(param));
4990 SET_IA32_ORIG_NODE(cnt, node);
4991 return fix_mem_proj(cnt, &am);
4995 * Transform builtin ffs.
4997 static ir_node *gen_ffs(ir_node *node)
4999 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5000 ir_node *real = skip_Proj(bsf);
5001 dbg_info *dbgi = get_irn_dbg_info(real);
5002 ir_node *block = get_nodes_block(real);
5003 ir_node *flag, *set, *conv, *neg, *orn, *add;
5006 if (get_irn_mode(real) != mode_T) {
5007 set_irn_mode(real, mode_T);
5008 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5011 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5014 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5015 SET_IA32_ORIG_NODE(set, node);
5018 conv = new_bd_ia32_Conv_I2I_8bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5019 SET_IA32_ORIG_NODE(conv, node);
5022 neg = new_bd_ia32_Neg(dbgi, block, conv);
5025 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5026 set_ia32_ls_mode(orn, mode_Iu);
5027 set_ia32_commutative(orn);
5030 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5031 add_ia32_am_offs_int(add, 1);
5036 * Transform builtin clz.
5038 static ir_node *gen_clz(ir_node *node)
5040 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5041 ir_node *real = skip_Proj(bsr);
5042 dbg_info *dbgi = get_irn_dbg_info(real);
5043 ir_node *block = get_nodes_block(real);
5044 ir_graph *irg = get_Block_irg(block);
5045 ir_node *imm = ia32_create_Immediate(irg, NULL, 0, 31);
5047 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5051 * Transform builtin ctz.
5053 static ir_node *gen_ctz(ir_node *node)
5055 return gen_unop_AM(node, new_bd_ia32_Bsf);
5059 * Transform builtin parity.
5061 static ir_node *gen_parity(ir_node *node)
5063 dbg_info *dbgi = get_irn_dbg_info(node);
5064 ir_node *block = get_nodes_block(node);
5065 ir_node *new_block = be_transform_node(block);
5066 ir_node *param = get_Builtin_param(node, 0);
5067 ir_node *new_param = be_transform_node(param);
5070 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5071 * so we have to do complicated xoring first.
5072 * (we should also better lower this before the backend so we still have a
5073 * chance for CSE, constant folding and other goodies for some of these
5076 ir_graph *const irg = get_Block_irg(new_block);
5077 ir_node *const count = ia32_create_Immediate(irg, NULL, 0, 16);
5078 ir_node *const shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5079 ir_node *const xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem, shr, new_param);
5080 ir_node *const xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xorn);
5083 set_ia32_ls_mode(xorn, mode_Iu);
5084 set_ia32_commutative(xorn);
5086 set_irn_mode(xor2, mode_T);
5087 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5090 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5091 SET_IA32_ORIG_NODE(new_node, node);
5094 new_node = new_bd_ia32_Conv_I2I_8bit(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, mode_Bu);
5095 SET_IA32_ORIG_NODE(new_node, node);
5100 * Transform builtin popcount
5102 static ir_node *gen_popcount(ir_node *node)
5104 ir_node *param = get_Builtin_param(node, 0);
5105 dbg_info *dbgi = get_irn_dbg_info(node);
5107 ir_node *block = get_nodes_block(node);
5108 ir_node *new_block = be_transform_node(block);
5111 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5113 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5114 if (ia32_cg_config.use_popcnt) {
5115 ia32_address_mode_t am;
5116 ia32_address_t *addr = &am.addr;
5119 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am | match_upconv);
5121 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5122 set_am_attributes(cnt, &am);
5123 set_ia32_ls_mode(cnt, get_irn_mode(param));
5125 SET_IA32_ORIG_NODE(cnt, node);
5126 return fix_mem_proj(cnt, &am);
5129 new_param = be_transform_node(param);
5131 /* do the standard popcount algo */
5132 /* TODO: This is stupid, we should transform this before the backend,
5133 * to get CSE, localopts, etc. for the operations
5134 * TODO: This is also not the optimal algorithm (it is just the starting
5135 * example in hackers delight, they optimize it more on the following page)
5136 * But I'm too lazy to fix this now, as the code should get lowered before
5137 * the backend anyway.
5139 ir_graph *const irg = get_Block_irg(new_block);
5141 /* m1 = x & 0x55555555 */
5142 imm = ia32_create_Immediate(irg, NULL, 0, 0x55555555);
5143 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5146 simm = ia32_create_Immediate(irg, NULL, 0, 1);
5147 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5149 /* m2 = s1 & 0x55555555 */
5150 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5153 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5155 /* m4 = m3 & 0x33333333 */
5156 imm = ia32_create_Immediate(irg, NULL, 0, 0x33333333);
5157 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5160 simm = ia32_create_Immediate(irg, NULL, 0, 2);
5161 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5163 /* m5 = s2 & 0x33333333 */
5164 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5167 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5169 /* m7 = m6 & 0x0F0F0F0F */
5170 imm = ia32_create_Immediate(irg, NULL, 0, 0x0F0F0F0F);
5171 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5174 simm = ia32_create_Immediate(irg, NULL, 0, 4);
5175 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5177 /* m8 = s3 & 0x0F0F0F0F */
5178 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5181 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5183 /* m10 = m9 & 0x00FF00FF */
5184 imm = ia32_create_Immediate(irg, NULL, 0, 0x00FF00FF);
5185 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5188 simm = ia32_create_Immediate(irg, NULL, 0, 8);
5189 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5191 /* m11 = s4 & 0x00FF00FF */
5192 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5194 /* m12 = m10 + m11 */
5195 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5197 /* m13 = m12 & 0x0000FFFF */
5198 imm = ia32_create_Immediate(irg, NULL, 0, 0x0000FFFF);
5199 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5201 /* s5 = m12 >> 16 */
5202 simm = ia32_create_Immediate(irg, NULL, 0, 16);
5203 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5205 /* res = m13 + s5 */
5206 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5210 * Transform builtin byte swap.
5212 static ir_node *gen_bswap(ir_node *node)
5214 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5215 dbg_info *dbgi = get_irn_dbg_info(node);
5217 ir_node *block = get_nodes_block(node);
5218 ir_node *new_block = be_transform_node(block);
5219 ir_mode *mode = get_irn_mode(param);
5220 unsigned size = get_mode_size_bits(mode);
5224 if (ia32_cg_config.use_bswap) {
5225 /* swap available */
5226 return new_bd_ia32_Bswap(dbgi, new_block, param);
5228 ir_graph *const irg = get_Block_irg(new_block);
5229 ir_node *const i8 = ia32_create_Immediate(irg, NULL, 0, 8);
5230 ir_node *const rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5231 ir_node *const i16 = ia32_create_Immediate(irg, NULL, 0, 16);
5232 ir_node *const rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5233 ir_node *const rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5234 set_ia32_ls_mode(rol1, mode_Hu);
5235 set_ia32_ls_mode(rol2, mode_Iu);
5236 set_ia32_ls_mode(rol3, mode_Hu);
5241 /* swap16 always available */
5242 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5245 panic("Invalid bswap size (%d)", size);
5250 * Transform builtin outport.
5252 static ir_node *gen_outport(ir_node *node)
5254 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0));
5255 ir_node *oldv = get_Builtin_param(node, 1);
5256 ir_mode *mode = get_irn_mode(oldv);
5257 ir_node *value = be_transform_node(oldv);
5258 ir_node *block = be_transform_node(get_nodes_block(node));
5259 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5260 dbg_info *dbgi = get_irn_dbg_info(node);
5262 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5263 set_ia32_ls_mode(res, mode);
5268 * Transform builtin inport.
5270 static ir_node *gen_inport(ir_node *node)
5272 ir_type *tp = get_Builtin_type(node);
5273 ir_type *rstp = get_method_res_type(tp, 0);
5274 ir_mode *mode = get_type_mode(rstp);
5275 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0));
5276 ir_node *block = be_transform_node(get_nodes_block(node));
5277 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5278 dbg_info *dbgi = get_irn_dbg_info(node);
5280 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5281 set_ia32_ls_mode(res, mode);
5283 /* check for missing Result Proj */
5288 * Transform a builtin inner trampoline
5290 static ir_node *gen_inner_trampoline(ir_node *node)
5292 ir_node *ptr = get_Builtin_param(node, 0);
5293 ir_node *callee = get_Builtin_param(node, 1);
5294 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5295 ir_node *mem = get_Builtin_mem(node);
5296 ir_node *block = get_nodes_block(node);
5297 ir_node *new_block = be_transform_node(block);
5301 ir_node *trampoline;
5303 dbg_info *dbgi = get_irn_dbg_info(node);
5304 ia32_address_t addr;
5306 /* construct store address */
5307 memset(&addr, 0, sizeof(addr));
5308 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5310 if (addr.base == NULL) {
5311 addr.base = noreg_GP;
5313 addr.base = be_transform_node(addr.base);
5316 if (addr.index == NULL) {
5317 addr.index = noreg_GP;
5319 addr.index = be_transform_node(addr.index);
5321 addr.mem = be_transform_node(mem);
5323 ir_graph *const irg = get_Block_irg(new_block);
5324 /* mov ecx, <env> */
5325 val = ia32_create_Immediate(irg, NULL, 0, 0xB9);
5326 store = new_bd_ia32_Store_8bit(dbgi, new_block, addr.base, addr.index, addr.mem, val);
5327 set_irn_pinned(store, get_irn_pinned(node));
5328 set_ia32_op_type(store, ia32_AddrModeD);
5329 set_ia32_ls_mode(store, mode_Bu);
5330 set_address(store, &addr);
5334 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5335 addr.index, addr.mem, env);
5336 set_irn_pinned(store, get_irn_pinned(node));
5337 set_ia32_op_type(store, ia32_AddrModeD);
5338 set_ia32_ls_mode(store, mode_Iu);
5339 set_address(store, &addr);
5343 /* jmp rel <callee> */
5344 val = ia32_create_Immediate(irg, NULL, 0, 0xE9);
5345 store = new_bd_ia32_Store_8bit(dbgi, new_block, addr.base, addr.index, addr.mem, val);
5346 set_irn_pinned(store, get_irn_pinned(node));
5347 set_ia32_op_type(store, ia32_AddrModeD);
5348 set_ia32_ls_mode(store, mode_Bu);
5349 set_address(store, &addr);
5353 trampoline = be_transform_node(ptr);
5355 /* the callee is typically an immediate */
5356 if (is_SymConst(callee)) {
5357 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5359 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5360 add_ia32_am_offs_int(rel, -10);
5362 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5364 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5365 addr.index, addr.mem, rel);
5366 set_irn_pinned(store, get_irn_pinned(node));
5367 set_ia32_op_type(store, ia32_AddrModeD);
5368 set_ia32_ls_mode(store, mode_Iu);
5369 set_address(store, &addr);
5374 return new_r_Tuple(new_block, 2, in);
5378 * Transform Builtin node.
5380 static ir_node *gen_Builtin(ir_node *node)
5382 ir_builtin_kind kind = get_Builtin_kind(node);
5386 return gen_trap(node);
5387 case ir_bk_debugbreak:
5388 return gen_debugbreak(node);
5389 case ir_bk_return_address:
5390 return gen_return_address(node);
5391 case ir_bk_frame_address:
5392 return gen_frame_address(node);
5393 case ir_bk_prefetch:
5394 return gen_prefetch(node);
5396 return gen_ffs(node);
5398 return gen_clz(node);
5400 return gen_ctz(node);
5402 return gen_parity(node);
5403 case ir_bk_popcount:
5404 return gen_popcount(node);
5406 return gen_bswap(node);
5408 return gen_outport(node);
5410 return gen_inport(node);
5411 case ir_bk_inner_trampoline:
5412 return gen_inner_trampoline(node);
5414 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5418 * Transform Proj(Builtin) node.
5420 static ir_node *gen_Proj_Builtin(ir_node *proj)
5422 ir_node *node = get_Proj_pred(proj);
5423 ir_node *new_node = be_transform_node(node);
5424 ir_builtin_kind kind = get_Builtin_kind(node);
5427 case ir_bk_return_address:
5428 case ir_bk_frame_address:
5433 case ir_bk_popcount:
5435 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5438 case ir_bk_debugbreak:
5439 case ir_bk_prefetch:
5441 assert(get_Proj_proj(proj) == pn_Builtin_M);
5444 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5445 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5447 assert(get_Proj_proj(proj) == pn_Builtin_M);
5448 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5450 case ir_bk_inner_trampoline:
5451 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5452 return get_Tuple_pred(new_node, 1);
5454 assert(get_Proj_proj(proj) == pn_Builtin_M);
5455 return get_Tuple_pred(new_node, 0);
5458 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5461 static ir_node *gen_be_IncSP(ir_node *node)
5463 ir_node *res = be_duplicate_node(node);
5464 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5470 * Transform the Projs from a be_Call.
5472 static ir_node *gen_Proj_be_Call(ir_node *node)
5474 ir_node *call = get_Proj_pred(node);
5475 ir_node *new_call = be_transform_node(call);
5476 dbg_info *dbgi = get_irn_dbg_info(node);
5477 long proj = get_Proj_proj(node);
5478 ir_mode *mode = get_irn_mode(node);
5481 if (proj == pn_be_Call_M) {
5482 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5484 /* transform call modes */
5485 if (mode_is_data(mode)) {
5486 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5490 /* Map from be_Call to ia32_Call proj number */
5491 if (proj == pn_be_Call_sp) {
5492 proj = pn_ia32_Call_stack;
5493 } else if (proj == pn_be_Call_M) {
5494 proj = pn_ia32_Call_M;
5495 } else if (proj == pn_be_Call_X_except) {
5496 proj = pn_ia32_Call_X_except;
5497 } else if (proj == pn_be_Call_X_regular) {
5498 proj = pn_ia32_Call_X_regular;
5500 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5502 assert(proj >= pn_be_Call_first_res);
5503 assert(arch_register_req_is(req, limited));
5505 be_foreach_out(new_call, i) {
5506 arch_register_req_t const *const new_req = arch_get_irn_register_req_out(new_call, i);
5507 if (!arch_register_req_is(new_req, limited) ||
5508 new_req->cls != req->cls ||
5509 *new_req->limited != *req->limited)
5515 panic("no matching out requirement found");
5519 res = new_rd_Proj(dbgi, new_call, mode, proj);
5521 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5523 case pn_ia32_Call_stack:
5524 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5527 case pn_ia32_Call_fpcw:
5528 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5535 static ir_node *gen_Proj_ASM(ir_node *node)
5537 ir_mode *mode = get_irn_mode(node);
5538 ir_node *pred = get_Proj_pred(node);
5539 ir_node *new_pred = be_transform_node(pred);
5540 long pos = get_Proj_proj(node);
5542 if (mode == mode_M) {
5543 pos = arch_get_irn_n_outs(new_pred)-1;
5544 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5546 } else if (mode_is_float(mode)) {
5549 panic("unexpected proj mode at ASM");
5552 return new_r_Proj(new_pred, mode, pos);
5556 * Transform and potentially renumber Proj nodes.
5558 static ir_node *gen_Proj(ir_node *node)
5560 ir_node *pred = get_Proj_pred(node);
5563 switch (get_irn_opcode(pred)) {
5565 return gen_Proj_Load(node);
5567 return gen_Proj_Store(node);
5569 return gen_Proj_ASM(node);
5571 return gen_Proj_Builtin(node);
5573 return gen_Proj_Div(node);
5575 return gen_Proj_Mod(node);
5577 return gen_Proj_CopyB(node);
5579 return gen_Proj_be_SubSP(node);
5581 return gen_Proj_be_AddSP(node);
5583 return gen_Proj_be_Call(node);
5585 proj = get_Proj_proj(node);
5587 case pn_Start_X_initial_exec: {
5588 ir_node *block = get_nodes_block(pred);
5589 ir_node *new_block = be_transform_node(block);
5590 dbg_info *dbgi = get_irn_dbg_info(node);
5591 /* we exchange the ProjX with a jump */
5592 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5600 if (is_ia32_l_FloattoLL(pred)) {
5601 return gen_Proj_l_FloattoLL(node);
5603 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5607 ir_mode *mode = get_irn_mode(node);
5608 if (ia32_mode_needs_gp_reg(mode)) {
5609 ir_node *new_pred = be_transform_node(pred);
5610 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5611 get_Proj_proj(node));
5612 new_proj->node_nr = node->node_nr;
5617 return be_duplicate_node(node);
5621 * Enters all transform functions into the generic pointer
5623 static void register_transformers(void)
5625 /* first clear the generic function pointer for all ops */
5626 be_start_transform_setup();
5628 be_set_transform_function(op_Add, gen_Add);
5629 be_set_transform_function(op_And, gen_And);
5630 be_set_transform_function(op_ASM, ia32_gen_ASM);
5631 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5632 be_set_transform_function(op_be_Call, gen_be_Call);
5633 be_set_transform_function(op_be_Copy, gen_be_Copy);
5634 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5635 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5636 be_set_transform_function(op_be_Return, gen_be_Return);
5637 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5638 be_set_transform_function(op_Builtin, gen_Builtin);
5639 be_set_transform_function(op_Cmp, gen_Cmp);
5640 be_set_transform_function(op_Cond, gen_Cond);
5641 be_set_transform_function(op_Const, gen_Const);
5642 be_set_transform_function(op_Conv, gen_Conv);
5643 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5644 be_set_transform_function(op_Div, gen_Div);
5645 be_set_transform_function(op_Eor, gen_Eor);
5646 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5647 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5648 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5649 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5650 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5651 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5652 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5653 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5654 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5655 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5656 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5657 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5658 be_set_transform_function(op_ia32_NoReg_FP, be_duplicate_node);
5659 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5660 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5661 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5662 be_set_transform_function(op_IJmp, gen_IJmp);
5663 be_set_transform_function(op_Jmp, gen_Jmp);
5664 be_set_transform_function(op_Load, gen_Load);
5665 be_set_transform_function(op_Minus, gen_Minus);
5666 be_set_transform_function(op_Mod, gen_Mod);
5667 be_set_transform_function(op_Mul, gen_Mul);
5668 be_set_transform_function(op_Mulh, gen_Mulh);
5669 be_set_transform_function(op_Mux, gen_Mux);
5670 be_set_transform_function(op_Not, gen_Not);
5671 be_set_transform_function(op_Or, gen_Or);
5672 be_set_transform_function(op_Phi, gen_Phi);
5673 be_set_transform_function(op_Proj, gen_Proj);
5674 be_set_transform_function(op_Rotl, gen_Rotl);
5675 be_set_transform_function(op_Shl, gen_Shl);
5676 be_set_transform_function(op_Shr, gen_Shr);
5677 be_set_transform_function(op_Shrs, gen_Shrs);
5678 be_set_transform_function(op_Store, gen_Store);
5679 be_set_transform_function(op_Sub, gen_Sub);
5680 be_set_transform_function(op_Switch, gen_Switch);
5681 be_set_transform_function(op_SymConst, gen_SymConst);
5682 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5684 be_set_upper_bits_clean_function(op_Mux, ia32_mux_upper_bits_clean);
5688 * Pre-transform all unknown and noreg nodes.
5690 static void ia32_pretransform_node(void)
5692 ir_graph *irg = current_ir_graph;
5693 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
5695 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5696 irg_data->noreg_fp = be_pre_transform_node(irg_data->noreg_fp);
5697 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5698 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5699 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5701 nomem = get_irg_no_mem(irg);
5702 noreg_GP = ia32_new_NoReg_gp(irg);
5706 * Post-process all calls if we are in SSE mode.
5707 * The ABI requires that the results are in st0, copy them
5708 * to a xmm register.
5710 static void postprocess_fp_call_results(void)
5714 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5715 ir_node *call = call_list[i];
5716 ir_type *mtp = call_types[i];
5719 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5720 ir_type *res_tp = get_method_res_type(mtp, j);
5721 ir_node *res, *new_res;
5724 if (! is_atomic_type(res_tp)) {
5725 /* no floating point return */
5728 res_mode = get_type_mode(res_tp);
5729 if (! mode_is_float(res_mode)) {
5730 /* no floating point return */
5734 res = be_get_Proj_for_pn(call, pn_ia32_Call_st0 + j);
5737 /* now patch the users */
5738 foreach_out_edge_safe(res, edge) {
5739 ir_node *succ = get_edge_src_irn(edge);
5742 if (be_is_Keep(succ))
5745 if (is_ia32_xStore(succ)) {
5746 /* an xStore can be patched into an vfst */
5747 dbg_info *db = get_irn_dbg_info(succ);
5748 ir_node *block = get_nodes_block(succ);
5749 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5750 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5751 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5752 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5753 ir_mode *mode = get_ia32_ls_mode(succ);
5755 ir_node *st = new_bd_ia32_fst(db, block, base, idx, mem, value, mode);
5756 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_fst_M);
5757 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5758 if (is_ia32_use_frame(succ))
5759 set_ia32_use_frame(st);
5760 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5761 set_irn_pinned(st, get_irn_pinned(succ));
5762 set_ia32_op_type(st, ia32_AddrModeD);
5764 assert((long)pn_ia32_xStore_M == (long)pn_ia32_fst_M);
5765 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_fst_X_regular);
5766 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_fst_X_except);
5773 if (new_res == NULL) {
5774 dbg_info *db = get_irn_dbg_info(call);
5775 ir_node *block = get_nodes_block(call);
5776 ir_node *frame = get_irg_frame(current_ir_graph);
5777 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5778 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5779 ir_node *vfst, *xld, *new_mem;
5782 /* store st(0) on stack */
5783 vfst = new_bd_ia32_fst(db, block, frame, noreg_GP, call_mem,
5785 set_ia32_op_type(vfst, ia32_AddrModeD);
5786 set_ia32_use_frame(vfst);
5787 arch_add_irn_flags(vfst, arch_irn_flags_spill);
5789 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_fst_M);
5791 /* load into SSE register */
5792 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5794 set_ia32_op_type(xld, ia32_AddrModeS);
5795 set_ia32_use_frame(xld);
5797 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5798 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5800 if (old_mem != NULL) {
5801 edges_reroute(old_mem, new_mem);
5805 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5811 /* do the transformation */
5812 void ia32_transform_graph(ir_graph *irg)
5816 register_transformers();
5817 initial_fpcw = NULL;
5818 ia32_no_pic_adjust = 0;
5820 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5822 be_timer_push(T_HEIGHTS);
5823 ia32_heights = heights_new(irg);
5824 be_timer_pop(T_HEIGHTS);
5825 ia32_calculate_non_address_mode_nodes(irg);
5827 /* the transform phase is not safe for CSE (yet) because several nodes get
5828 * attributes set after their creation */
5829 cse_last = get_opt_cse();
5832 call_list = NEW_ARR_F(ir_node *, 0);
5833 call_types = NEW_ARR_F(ir_type *, 0);
5834 be_transform_graph(irg, ia32_pretransform_node);
5836 if (ia32_cg_config.use_sse2)
5837 postprocess_fp_call_results();
5838 DEL_ARR_F(call_types);
5839 DEL_ARR_F(call_list);
5841 set_opt_cse(cse_last);
5843 ia32_free_non_address_mode_nodes();
5844 heights_free(ia32_heights);
5845 ia32_heights = NULL;
5848 void ia32_init_transform(void)
5850 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");