2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
54 #include "betranshlp.h"
57 #include "bearch_ia32_t.h"
58 #include "ia32_common_transform.h"
59 #include "ia32_nodes_attr.h"
60 #include "ia32_transform.h"
61 #include "ia32_new_nodes.h"
62 #include "ia32_dbg_stat.h"
63 #include "ia32_optimize.h"
64 #include "ia32_address_mode.h"
65 #include "ia32_architecture.h"
67 #include "gen_ia32_regalloc_if.h"
69 /* define this to construct SSE constants instead of load them */
70 #undef CONSTRUCT_SSE_CONST
72 #define mode_fp (ia32_reg_classes[CLASS_ia32_fp].mode)
73 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
75 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
77 static ir_node *old_initial_fpcw = NULL;
78 static ir_node *initial_fpcw = NULL;
79 int ia32_no_pic_adjust;
81 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
82 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
85 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
86 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
89 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
90 ir_node *op1, ir_node *op2);
92 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
93 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
95 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
96 ir_node *base, ir_node *index, ir_node *mem);
98 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
102 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
104 static ir_node *create_immediate_or_transform(ir_node *node);
106 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
107 dbg_info *dbgi, ir_node *block,
108 ir_node *op, ir_node *orig_node);
110 /* its enough to have those once */
111 static ir_node *nomem, *noreg_GP;
113 /** a list to postprocess all calls */
114 static ir_node **call_list;
115 static ir_type **call_types;
117 /** Return non-zero is a node represents the 0 constant. */
118 static bool is_Const_0(ir_node *node)
120 return is_Const(node) && is_Const_null(node);
123 /** Return non-zero is a node represents the 1 constant. */
124 static bool is_Const_1(ir_node *node)
126 return is_Const(node) && is_Const_one(node);
129 /** Return non-zero is a node represents the -1 constant. */
130 static bool is_Const_Minus_1(ir_node *node)
132 return is_Const(node) && is_Const_all_one(node);
136 * returns true if constant can be created with a simple float command
138 static bool is_simple_x87_Const(ir_node *node)
140 ir_tarval *tv = get_Const_tarval(node);
141 if (tarval_is_null(tv) || tarval_is_one(tv))
144 /* TODO: match all the other float constants */
149 * returns true if constant can be created with a simple float command
151 static bool is_simple_sse_Const(ir_node *node)
153 ir_tarval *tv = get_Const_tarval(node);
154 ir_mode *mode = get_tarval_mode(tv);
159 if (tarval_is_null(tv)
160 #ifdef CONSTRUCT_SSE_CONST
165 #ifdef CONSTRUCT_SSE_CONST
166 if (mode == mode_D) {
167 unsigned val = get_tarval_sub_bits(tv, 0) |
168 (get_tarval_sub_bits(tv, 1) << 8) |
169 (get_tarval_sub_bits(tv, 2) << 16) |
170 (get_tarval_sub_bits(tv, 3) << 24);
172 /* lower 32bit are zero, really a 32bit constant */
175 #endif /* CONSTRUCT_SSE_CONST */
176 /* TODO: match all the other float constants */
181 * return NoREG or pic_base in case of PIC.
182 * This is necessary as base address for newly created symbols
184 static ir_node *get_symconst_base(void)
186 ir_graph *irg = current_ir_graph;
188 if (be_options.pic) {
189 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
190 return arch_env->impl->get_pic_base(irg);
197 * Transforms a Const.
199 static ir_node *gen_Const(ir_node *node)
201 ir_node *old_block = get_nodes_block(node);
202 ir_node *block = be_transform_node(old_block);
203 dbg_info *dbgi = get_irn_dbg_info(node);
204 ir_mode *mode = get_irn_mode(node);
205 ir_tarval *tv = get_Const_tarval(node);
207 assert(is_Const(node));
209 if (mode_is_float(mode)) {
210 ir_graph *irg = get_irn_irg(node);
211 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
212 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
217 if (ia32_cg_config.use_sse2) {
218 if (tarval_is_null(tv)) {
219 load = new_bd_ia32_xZero(dbgi, block);
220 set_ia32_ls_mode(load, mode);
222 #ifdef CONSTRUCT_SSE_CONST
223 } else if (tarval_is_one(tv)) {
224 int cnst = mode == mode_F ? 26 : 55;
225 ir_node *imm1 = ia32_create_Immediate(irg, NULL, 0, cnst);
226 ir_node *imm2 = ia32_create_Immediate(irg, NULL, 0, 2);
227 ir_node *pslld, *psrld;
229 load = new_bd_ia32_xAllOnes(dbgi, block);
230 set_ia32_ls_mode(load, mode);
231 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
232 set_ia32_ls_mode(pslld, mode);
233 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
234 set_ia32_ls_mode(psrld, mode);
236 #endif /* CONSTRUCT_SSE_CONST */
237 } else if (mode == mode_F) {
238 /* we can place any 32bit constant by using a movd gp, sse */
239 unsigned val = get_tarval_sub_bits(tv, 0) |
240 (get_tarval_sub_bits(tv, 1) << 8) |
241 (get_tarval_sub_bits(tv, 2) << 16) |
242 (get_tarval_sub_bits(tv, 3) << 24);
243 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
244 load = new_bd_ia32_xMovd(dbgi, block, cnst);
245 set_ia32_ls_mode(load, mode);
249 #ifdef CONSTRUCT_SSE_CONST
250 if (mode == mode_D) {
251 unsigned val = get_tarval_sub_bits(tv, 0) |
252 (get_tarval_sub_bits(tv, 1) << 8) |
253 (get_tarval_sub_bits(tv, 2) << 16) |
254 (get_tarval_sub_bits(tv, 3) << 24);
256 ir_node *imm32 = ia32_create_Immediate(irg, NULL, 0, 32);
257 ir_node *cnst, *psllq;
259 /* fine, lower 32bit are zero, produce 32bit value */
260 val = get_tarval_sub_bits(tv, 4) |
261 (get_tarval_sub_bits(tv, 5) << 8) |
262 (get_tarval_sub_bits(tv, 6) << 16) |
263 (get_tarval_sub_bits(tv, 7) << 24);
264 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
265 load = new_bd_ia32_xMovd(dbgi, block, cnst);
266 set_ia32_ls_mode(load, mode);
267 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
268 set_ia32_ls_mode(psllq, mode);
273 #endif /* CONSTRUCT_SSE_CONST */
274 floatent = ia32_create_float_const_entity(isa, tv, NULL);
276 base = get_symconst_base();
277 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
279 set_ia32_op_type(load, ia32_AddrModeS);
280 set_ia32_am_sc(load, floatent);
281 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
282 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
285 if (tarval_is_null(tv)) {
286 load = new_bd_ia32_fldz(dbgi, block);
288 set_ia32_ls_mode(load, mode);
289 } else if (tarval_is_one(tv)) {
290 load = new_bd_ia32_fld1(dbgi, block);
292 set_ia32_ls_mode(load, mode);
297 floatent = ia32_create_float_const_entity(isa, tv, NULL);
298 /* create_float_const_ent is smart and sometimes creates
300 ls_mode = get_type_mode(get_entity_type(floatent));
301 base = get_symconst_base();
302 load = new_bd_ia32_fld(dbgi, block, base, noreg_GP, nomem,
304 set_ia32_op_type(load, ia32_AddrModeS);
305 set_ia32_am_sc(load, floatent);
306 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
307 res = new_r_Proj(load, mode_fp, pn_ia32_fld_res);
310 #ifdef CONSTRUCT_SSE_CONST
312 #endif /* CONSTRUCT_SSE_CONST */
313 SET_IA32_ORIG_NODE(load, node);
315 } else { /* non-float mode */
319 tv = tarval_convert_to(tv, mode_Iu);
321 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
323 panic("couldn't convert constant tarval (%+F)", node);
325 val = get_tarval_long(tv);
327 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
328 SET_IA32_ORIG_NODE(cnst, node);
335 * Transforms a SymConst.
337 static ir_node *gen_SymConst(ir_node *node)
339 ir_node *old_block = get_nodes_block(node);
340 ir_node *block = be_transform_node(old_block);
341 dbg_info *dbgi = get_irn_dbg_info(node);
342 ir_mode *mode = get_irn_mode(node);
345 if (mode_is_float(mode)) {
346 if (ia32_cg_config.use_sse2)
347 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
349 cnst = new_bd_ia32_fld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
350 set_ia32_am_sc(cnst, get_SymConst_entity(node));
351 set_ia32_use_frame(cnst);
355 if (get_SymConst_kind(node) != symconst_addr_ent) {
356 panic("backend only support symconst_addr_ent (at %+F)", node);
358 entity = get_SymConst_entity(node);
359 if (get_entity_owner(entity) == get_tls_type()) {
360 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
361 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
362 set_ia32_am_sc(lea, entity);
365 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
369 SET_IA32_ORIG_NODE(cnst, node);
374 static ir_type *make_array_type(ir_type *tp)
376 unsigned alignment = get_type_alignment_bytes(tp);
377 unsigned size = get_type_size_bytes(tp);
378 ir_type *res = new_type_array(1, tp);
379 set_type_alignment_bytes(res, alignment);
380 set_array_bounds_int(res, 0, 0, 2);
381 if (alignment > size)
383 set_type_size_bytes(res, 2 * size);
384 set_type_state(res, layout_fixed);
389 * Create a float[2] array type for the given atomic type.
391 * @param tp the atomic type
393 static ir_type *ia32_create_float_array(ir_type *tp)
395 ir_mode *mode = get_type_mode(tp);
398 if (mode == mode_F) {
399 static ir_type *float_F;
403 arr = float_F = make_array_type(tp);
404 } else if (mode == mode_D) {
405 static ir_type *float_D;
409 arr = float_D = make_array_type(tp);
411 static ir_type *float_E;
415 arr = float_E = make_array_type(tp);
420 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
421 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
423 static const struct {
425 const char *cnst_str;
427 } names [ia32_known_const_max] = {
428 { "C_sfp_sign", "0x80000000", 0 },
429 { "C_dfp_sign", "0x8000000000000000", 1 },
430 { "C_sfp_abs", "0x7FFFFFFF", 0 },
431 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
432 { "C_ull_bias", "0x10000000000000000", 2 }
434 static ir_entity *ent_cache[ia32_known_const_max];
436 ir_entity *ent = ent_cache[kct];
439 ir_graph *irg = current_ir_graph;
440 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
441 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
442 const char *cnst_str = names[kct].cnst_str;
443 ident *name = new_id_from_str(names[kct].name);
446 switch (names[kct].mode) {
447 case 0: mode = mode_Iu; break;
448 case 1: mode = mode_Lu; break;
449 case 2: mode = mode_F; break;
450 default: panic("internal compiler error");
452 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
454 if (kct == ia32_ULLBIAS) {
455 ir_type *type = ia32_get_prim_type(mode_F);
456 ir_type *atype = ia32_create_float_array(type);
457 ir_initializer_t *initializer;
459 ent = new_entity(get_glob_type(), name, atype);
461 set_entity_ld_ident(ent, name);
462 set_entity_visibility(ent, ir_visibility_private);
463 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
465 initializer = create_initializer_compound(2);
466 set_initializer_compound_value(initializer, 0,
467 create_initializer_tarval(get_mode_null(mode)));
468 set_initializer_compound_value(initializer, 1,
469 create_initializer_tarval(tv));
470 set_entity_initializer(ent, initializer);
472 ent = ia32_create_float_const_entity(isa, tv, name);
474 /* cache the entry */
475 ent_cache[kct] = ent;
478 return ent_cache[kct];
482 * return true if the node is a Proj(Load) and could be used in source address
483 * mode for another node. Will return only true if the @p other node is not
484 * dependent on the memory of the Load (for binary operations use the other
485 * input here, for unary operations use NULL).
487 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
488 ir_node *other, ir_node *other2,
495 /* float constants are always available */
496 if (is_Const(node)) {
497 mode = get_irn_mode(node);
498 if (mode_is_float(mode)) {
499 ir_tarval *tv = get_Const_tarval(node);
500 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
502 if (ia32_cg_config.use_sse2) {
503 if (is_simple_sse_Const(node))
506 if (is_simple_x87_Const(node))
509 if (get_irn_n_edges(node) > 1)
518 load = get_Proj_pred(node);
519 pn = get_Proj_proj(node);
520 if (!is_Load(load) || pn != pn_Load_res)
522 if (get_nodes_block(load) != block)
524 mode = get_irn_mode(node);
525 /* we can't fold mode_E AM */
526 if (mode == ia32_mode_E)
528 /* we only use address mode if we're the only user of the load */
529 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
531 /* in some edge cases with address mode we might reach the load normally
532 * and through some AM sequence, if it is already materialized then we
533 * can't create an AM node from it */
534 if (be_is_transformed(node))
537 /* don't do AM if other node inputs depend on the load (via mem-proj) */
538 if (other != NULL && ia32_prevents_AM(block, load, other))
541 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
547 typedef struct ia32_address_mode_t ia32_address_mode_t;
548 struct ia32_address_mode_t {
553 ia32_op_type_t op_type;
557 unsigned commutative : 1;
558 unsigned ins_permuted : 1;
561 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
563 /* construct load address */
564 memset(addr, 0, sizeof(addr[0]));
565 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
567 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
568 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
569 addr->mem = be_transform_node(mem);
572 static void build_address(ia32_address_mode_t *am, ir_node *node,
573 ia32_create_am_flags_t flags)
575 ia32_address_t *addr = &am->addr;
581 /* floating point immediates */
582 if (is_Const(node)) {
583 ir_graph *irg = get_irn_irg(node);
584 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
585 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
586 ir_tarval *tv = get_Const_tarval(node);
587 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
588 addr->base = get_symconst_base();
589 addr->index = noreg_GP;
591 addr->symconst_ent = entity;
592 addr->tls_segment = false;
594 am->ls_mode = get_type_mode(get_entity_type(entity));
595 am->pinned = op_pin_state_floats;
599 load = get_Proj_pred(node);
600 ptr = get_Load_ptr(load);
601 mem = get_Load_mem(load);
602 new_mem = be_transform_node(mem);
603 am->pinned = get_irn_pinned(load);
604 am->ls_mode = get_Load_mode(load);
605 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
608 /* construct load address */
609 ia32_create_address_mode(addr, ptr, flags);
611 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
612 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
616 static void set_address(ir_node *node, const ia32_address_t *addr)
618 set_ia32_am_scale(node, addr->scale);
619 set_ia32_am_sc(node, addr->symconst_ent);
620 set_ia32_am_offs_int(node, addr->offset);
621 set_ia32_am_tls_segment(node, addr->tls_segment);
622 if (addr->symconst_sign)
623 set_ia32_am_sc_sign(node);
625 set_ia32_use_frame(node);
626 set_ia32_frame_ent(node, addr->frame_entity);
630 * Apply attributes of a given address mode to a node.
632 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
634 set_address(node, &am->addr);
636 set_ia32_op_type(node, am->op_type);
637 set_ia32_ls_mode(node, am->ls_mode);
638 if (am->pinned == op_pin_state_pinned) {
639 /* beware: some nodes are already pinned and did not allow to change the state */
640 if (get_irn_pinned(node) != op_pin_state_pinned)
641 set_irn_pinned(node, op_pin_state_pinned);
644 set_ia32_commutative(node);
648 * Check, if a given node is a Down-Conv, ie. a integer Conv
649 * from a mode with a mode with more bits to a mode with lesser bits.
650 * Moreover, we return only true if the node has not more than 1 user.
652 * @param node the node
653 * @return non-zero if node is a Down-Conv
655 static int is_downconv(const ir_node *node)
663 src_mode = get_irn_mode(get_Conv_op(node));
664 dest_mode = get_irn_mode(node);
666 ia32_mode_needs_gp_reg(src_mode) &&
667 ia32_mode_needs_gp_reg(dest_mode) &&
668 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
671 /** Skip all Down-Conv's on a given node and return the resulting node. */
672 ir_node *ia32_skip_downconv(ir_node *node)
674 while (is_downconv(node)) {
675 /* we only want to skip the conv when we're the only user
676 * (because this test is used in the context of address-mode selection
677 * and we don't want to use address mode for multiple users) */
678 if (get_irn_n_edges(node) > 1)
681 node = get_Conv_op(node);
687 static bool is_float_downconv(const ir_node *node)
691 ir_node *pred = get_Conv_op(node);
692 ir_mode *pred_mode = get_irn_mode(pred);
693 ir_mode *mode = get_irn_mode(node);
694 return mode_is_float(pred_mode)
695 && get_mode_size_bits(mode) <= get_mode_size_bits(pred_mode);
698 static ir_node *ia32_skip_float_downconv(ir_node *node)
700 while (is_float_downconv(node)) {
701 node = get_Conv_op(node);
706 static bool is_sameconv(ir_node *node)
714 /* we only want to skip the conv when we're the only user
715 * (because this test is used in the context of address-mode selection
716 * and we don't want to use address mode for multiple users) */
717 if (get_irn_n_edges(node) > 1)
720 src_mode = get_irn_mode(get_Conv_op(node));
721 dest_mode = get_irn_mode(node);
723 ia32_mode_needs_gp_reg(src_mode) &&
724 ia32_mode_needs_gp_reg(dest_mode) &&
725 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
728 /** Skip all signedness convs */
729 static ir_node *ia32_skip_sameconv(ir_node *node)
731 while (is_sameconv(node)) {
732 node = get_Conv_op(node);
738 static ir_node *transform_sext(ir_node *node, ir_node *orig_node)
740 ir_mode *mode = get_irn_mode(node);
741 ir_node *block = get_nodes_block(node);
742 dbg_info *dbgi = get_irn_dbg_info(node);
743 return create_I2I_Conv(mode, mode_Is, dbgi, block, node, orig_node);
746 static ir_node *transform_zext(ir_node *node, ir_node *orig_node)
748 ir_mode *mode = get_irn_mode(node);
749 ir_node *block = get_nodes_block(node);
750 dbg_info *dbgi = get_irn_dbg_info(node);
751 /* normalize to an unsigned mode */
752 switch (get_mode_size_bits(mode)) {
753 case 8: mode = mode_Bu; break;
754 case 16: mode = mode_Hu; break;
756 panic("ia32: invalid mode in zest: %+F", node);
758 return create_I2I_Conv(mode, mode_Iu, dbgi, block, node, orig_node);
761 static ir_node *transform_upconv(ir_node *node, ir_node *orig_node)
763 ir_mode *mode = get_irn_mode(node);
764 if (mode_is_signed(mode)) {
765 return transform_sext(node, orig_node);
767 return transform_zext(node, orig_node);
772 * matches operands of a node into ia32 addressing/operand modes. This covers
773 * usage of source address mode, immediates, operations with non 32-bit modes,
775 * The resulting data is filled into the @p am struct. block is the block
776 * of the node whose arguments are matched. op1, op2 are the first and second
777 * input that are matched (op1 may be NULL). other_op is another unrelated
778 * input that is not matched! but which is needed sometimes to check if AM
779 * for op1/op2 is legal.
780 * @p flags describes the supported modes of the operation in detail.
782 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
783 ir_node *op1, ir_node *op2, ir_node *other_op,
786 ia32_address_t *addr = &am->addr;
787 ir_mode *mode = get_irn_mode(op2);
788 int mode_bits = get_mode_size_bits(mode);
789 ir_node *new_op1, *new_op2;
791 unsigned commutative;
792 int use_am_and_immediates;
795 memset(am, 0, sizeof(am[0]));
797 commutative = (flags & match_commutative) != 0;
798 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
799 use_am = (flags & match_am) != 0;
800 use_immediate = (flags & match_immediate) != 0;
801 assert(!use_am_and_immediates || use_immediate);
804 assert(!commutative || op1 != NULL);
805 assert(use_am || !(flags & match_8bit_am));
806 assert(use_am || !(flags & match_16bit_am));
808 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
809 (mode_bits == 16 && !(flags & match_16bit_am))) {
813 /* we can simply skip downconvs for mode neutral nodes: the upper bits
814 * can be random for these operations */
815 if (flags & match_mode_neutral) {
816 op2 = ia32_skip_downconv(op2);
818 op1 = ia32_skip_downconv(op1);
821 op2 = ia32_skip_sameconv(op2);
823 op1 = ia32_skip_sameconv(op1);
827 /* match immediates. firm nodes are normalized: constants are always on the
830 if (!(flags & match_try_am) && use_immediate) {
831 new_op2 = ia32_try_create_Immediate(op2, 'i');
834 if (new_op2 == NULL &&
835 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
836 build_address(am, op2, ia32_create_am_normal);
837 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
838 if (mode_is_float(mode)) {
839 new_op2 = ia32_new_NoReg_fp(current_ir_graph);
843 am->op_type = ia32_AddrModeS;
844 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
846 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
848 build_address(am, op1, ia32_create_am_normal);
850 if (mode_is_float(mode)) {
851 noreg = ia32_new_NoReg_fp(current_ir_graph);
856 if (new_op2 != NULL) {
859 new_op1 = be_transform_node(op2);
861 am->ins_permuted = true;
863 am->op_type = ia32_AddrModeS;
865 am->op_type = ia32_Normal;
867 if (flags & match_try_am) {
873 mode = get_irn_mode(op2);
874 if (get_mode_size_bits(mode) != 32
875 && (flags & (match_mode_neutral | match_upconv | match_zero_ext))) {
876 if (flags & match_upconv) {
877 new_op1 = (op1 == NULL ? NULL : transform_upconv(op1, op1));
879 new_op2 = transform_upconv(op2, op2);
880 } else if (flags & match_zero_ext) {
881 new_op1 = (op1 == NULL ? NULL : transform_zext(op1, op1));
883 new_op2 = transform_zext(op2, op2);
885 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
887 new_op2 = be_transform_node(op2);
888 assert(flags & match_mode_neutral);
892 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
894 new_op2 = be_transform_node(op2);
898 if (addr->base == NULL)
899 addr->base = noreg_GP;
900 if (addr->index == NULL)
901 addr->index = noreg_GP;
902 if (addr->mem == NULL)
905 am->new_op1 = new_op1;
906 am->new_op2 = new_op2;
907 am->commutative = commutative;
911 * "Fixes" a node that uses address mode by turning it into mode_T
912 * and returning a pn_ia32_res Proj.
914 * @param node the node
915 * @param am its address mode
917 * @return a Proj(pn_ia32_res) if a memory address mode is used,
920 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
925 if (am->mem_proj == NULL)
928 /* we have to create a mode_T so the old MemProj can attach to us */
929 mode = get_irn_mode(node);
930 load = get_Proj_pred(am->mem_proj);
932 be_set_transformed_node(load, node);
934 if (mode != mode_T) {
935 set_irn_mode(node, mode_T);
936 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
943 * Construct a standard binary operation, set AM and immediate if required.
945 * @param node The original node for which the binop is created
946 * @param op1 The first operand
947 * @param op2 The second operand
948 * @param func The node constructor function
949 * @return The constructed ia32 node.
951 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
952 construct_binop_func *func, match_flags_t flags)
955 ir_node *block, *new_block, *new_node;
956 ia32_address_mode_t am;
957 ia32_address_t *addr = &am.addr;
959 block = get_nodes_block(node);
960 match_arguments(&am, block, op1, op2, NULL, flags);
962 dbgi = get_irn_dbg_info(node);
963 new_block = be_transform_node(block);
964 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
965 am.new_op1, am.new_op2);
966 set_am_attributes(new_node, &am);
967 /* we can't use source address mode anymore when using immediates */
968 if (!(flags & match_am_and_immediates) &&
969 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
970 set_ia32_am_support(new_node, ia32_am_none);
971 SET_IA32_ORIG_NODE(new_node, node);
973 new_node = fix_mem_proj(new_node, &am);
979 * Generic names for the inputs of an ia32 binary op.
982 n_ia32_l_binop_left, /**< ia32 left input */
983 n_ia32_l_binop_right, /**< ia32 right input */
984 n_ia32_l_binop_eflags /**< ia32 eflags input */
986 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
987 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
988 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
989 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
990 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
991 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
994 * Construct a binary operation which also consumes the eflags.
996 * @param node The node to transform
997 * @param func The node constructor function
998 * @param flags The match flags
999 * @return The constructor ia32 node
1001 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1002 match_flags_t flags)
1004 ir_node *src_block = get_nodes_block(node);
1005 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1006 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1007 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1009 ir_node *block, *new_node, *new_eflags;
1010 ia32_address_mode_t am;
1011 ia32_address_t *addr = &am.addr;
1013 match_arguments(&am, src_block, op1, op2, eflags, flags);
1015 dbgi = get_irn_dbg_info(node);
1016 block = be_transform_node(src_block);
1017 new_eflags = be_transform_node(eflags);
1018 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1019 am.new_op1, am.new_op2, new_eflags);
1020 set_am_attributes(new_node, &am);
1021 /* we can't use source address mode anymore when using immediates */
1022 if (!(flags & match_am_and_immediates) &&
1023 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1024 set_ia32_am_support(new_node, ia32_am_none);
1025 SET_IA32_ORIG_NODE(new_node, node);
1027 new_node = fix_mem_proj(new_node, &am);
1032 static ir_node *get_fpcw(void)
1034 if (initial_fpcw != NULL)
1035 return initial_fpcw;
1037 initial_fpcw = be_transform_node(old_initial_fpcw);
1038 return initial_fpcw;
1041 static ir_node *skip_float_upconv(ir_node *node)
1043 ir_mode *mode = get_irn_mode(node);
1044 assert(mode_is_float(mode));
1046 while (is_Conv(node)) {
1047 ir_node *pred = get_Conv_op(node);
1048 ir_mode *pred_mode = get_irn_mode(pred);
1051 * suboptimal, but without this check the address mode matcher
1052 * can incorrectly think that something has only 1 user
1054 if (get_irn_n_edges(node) > 1)
1057 if (!mode_is_float(pred_mode)
1058 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1066 static void check_x87_floatmode(ir_mode *mode)
1068 if (mode != ia32_mode_E) {
1069 panic("ia32: x87 only supports x86 extended float mode");
1074 * Construct a standard binary operation, set AM and immediate if required.
1076 * @param op1 The first operand
1077 * @param op2 The second operand
1078 * @param func The node constructor function
1079 * @return The constructed ia32 node.
1081 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1082 construct_binop_float_func *func)
1088 ia32_address_mode_t am;
1089 ia32_address_t *addr = &am.addr;
1090 ia32_x87_attr_t *attr;
1091 /* All operations are considered commutative, because there are reverse
1093 match_flags_t flags = match_commutative | match_am;
1095 = is_Div(node) ? get_Div_resmode(node) : get_irn_mode(node);
1096 check_x87_floatmode(mode);
1098 op1 = skip_float_upconv(op1);
1099 op2 = skip_float_upconv(op2);
1101 block = get_nodes_block(node);
1102 match_arguments(&am, block, op1, op2, NULL, flags);
1104 dbgi = get_irn_dbg_info(node);
1105 new_block = be_transform_node(block);
1106 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1107 am.new_op1, am.new_op2, get_fpcw());
1108 set_am_attributes(new_node, &am);
1110 attr = get_ia32_x87_attr(new_node);
1111 attr->attr.data.ins_permuted = am.ins_permuted;
1113 SET_IA32_ORIG_NODE(new_node, node);
1115 new_node = fix_mem_proj(new_node, &am);
1121 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1123 * @param op1 The first operand
1124 * @param op2 The second operand
1125 * @param func The node constructor function
1126 * @return The constructed ia32 node.
1128 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1129 construct_shift_func *func,
1130 match_flags_t flags)
1132 ir_mode *mode = get_irn_mode(node);
1134 assert(! mode_is_float(mode));
1135 assert(flags & match_immediate);
1136 assert((flags & ~(match_mode_neutral | match_zero_ext | match_upconv | match_immediate)) == 0);
1138 if (get_mode_modulo_shift(mode) != 32) {
1139 /* TODO: implement special cases for non-modulo shifts */
1140 panic("modulo shift!=32 not supported by ia32 backend");
1145 if (flags & match_mode_neutral) {
1146 op1 = ia32_skip_downconv(op1);
1147 new_op1 = be_transform_node(op1);
1149 op1 = ia32_skip_sameconv(op1);
1150 if (get_mode_size_bits(mode) != 32) {
1151 if (flags & match_upconv) {
1152 new_op1 = transform_upconv(op1, node);
1153 } else if (flags & match_zero_ext) {
1154 new_op1 = transform_zext(op1, node);
1156 /* match_mode_neutral not handled here because it makes no
1157 * sense for shift operations */
1158 panic("ia32 code selection failed for %+F", node);
1161 new_op1 = be_transform_node(op1);
1165 /* the shift amount can be any mode that is bigger than 5 bits, since all
1166 * other bits are ignored anyway */
1167 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1168 ir_node *const op = get_Conv_op(op2);
1169 if (mode_is_float(get_irn_mode(op)))
1172 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1174 new_op2 = create_immediate_or_transform(op2);
1176 dbg_info *dbgi = get_irn_dbg_info(node);
1177 ir_node *block = get_nodes_block(node);
1178 ir_node *new_block = be_transform_node(block);
1179 ir_node *new_node = func(dbgi, new_block, new_op1, new_op2);
1180 SET_IA32_ORIG_NODE(new_node, node);
1182 /* lowered shift instruction may have a dependency operand, handle it here */
1183 if (get_irn_arity(node) == 3) {
1184 /* we have a dependency */
1185 ir_node* dep = get_irn_n(node, 2);
1186 if (get_irn_n_edges(dep) > 1) {
1187 /* ... which has at least one user other than 'node' */
1188 ir_node *new_dep = be_transform_node(dep);
1189 add_irn_dep(new_node, new_dep);
1198 * Construct a standard unary operation, set AM and immediate if required.
1200 * @param op The operand
1201 * @param func The node constructor function
1202 * @return The constructed ia32 node.
1204 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1205 match_flags_t flags)
1208 ir_node *block, *new_block, *new_op, *new_node;
1210 assert(flags == 0 || flags == match_mode_neutral);
1211 if (flags & match_mode_neutral) {
1212 op = ia32_skip_downconv(op);
1215 new_op = be_transform_node(op);
1216 dbgi = get_irn_dbg_info(node);
1217 block = get_nodes_block(node);
1218 new_block = be_transform_node(block);
1219 new_node = func(dbgi, new_block, new_op);
1221 SET_IA32_ORIG_NODE(new_node, node);
1226 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1227 ia32_address_t *addr)
1237 base = be_transform_node(base);
1244 idx = be_transform_node(idx);
1247 /* segment overrides are ineffective for Leas :-( so we have to patch
1249 if (addr->tls_segment) {
1250 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1251 assert(addr->symconst_ent != NULL);
1252 if (base == noreg_GP)
1255 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1256 addr->tls_segment = false;
1259 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1260 set_address(res, addr);
1266 * Returns non-zero if a given address mode has a symbolic or
1267 * numerical offset != 0.
1269 static int am_has_immediates(const ia32_address_t *addr)
1271 return addr->offset != 0 || addr->symconst_ent != NULL
1272 || addr->frame_entity || addr->use_frame;
1275 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1276 ir_node *high, ir_node *low,
1280 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1281 * op1 - target to be shifted
1282 * op2 - contains bits to be shifted into target
1284 * Only op3 can be an immediate.
1286 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1287 ir_node *high, ir_node *low, ir_node *count,
1288 new_shiftd_func func)
1290 ir_node *new_block = be_transform_node(block);
1291 ir_node *new_high = be_transform_node(high);
1292 ir_node *new_low = be_transform_node(low);
1296 /* the shift amount can be any mode that is bigger than 5 bits, since all
1297 * other bits are ignored anyway */
1298 while (is_Conv(count) &&
1299 get_irn_n_edges(count) == 1 &&
1300 mode_is_int(get_irn_mode(count))) {
1301 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1302 count = get_Conv_op(count);
1304 new_count = create_immediate_or_transform(count);
1306 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1311 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1314 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1316 if (is_Const(value1) && is_Const(value2)) {
1317 ir_tarval *tv1 = get_Const_tarval(value1);
1318 ir_tarval *tv2 = get_Const_tarval(value2);
1319 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1320 long v1 = get_tarval_long(tv1);
1321 long v2 = get_tarval_long(tv2);
1322 return v1 <= v2 && v2 == 32-v1;
1328 static ir_node *match_64bit_shift(ir_node *node)
1330 ir_node *op1 = get_binop_left(node);
1331 ir_node *op2 = get_binop_right(node);
1332 assert(is_Or(node) || is_Add(node));
1340 /* match ShlD operation */
1341 if (is_Shl(op1) && is_Shr(op2)) {
1342 ir_node *shl_right = get_Shl_right(op1);
1343 ir_node *shl_left = get_Shl_left(op1);
1344 ir_node *shr_right = get_Shr_right(op2);
1345 ir_node *shr_left = get_Shr_left(op2);
1346 /* constant ShlD operation */
1347 if (is_complementary_shifts(shl_right, shr_right)) {
1348 dbg_info *dbgi = get_irn_dbg_info(node);
1349 ir_node *block = get_nodes_block(node);
1350 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1353 /* constant ShrD operation */
1354 if (is_complementary_shifts(shr_right, shl_right)) {
1355 dbg_info *dbgi = get_irn_dbg_info(node);
1356 ir_node *block = get_nodes_block(node);
1357 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1360 /* lower_dw produces the following for ShlD:
1361 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1362 if (is_Shr(shr_left) && is_Not(shr_right)
1363 && is_Const_1(get_Shr_right(shr_left))
1364 && get_Not_op(shr_right) == shl_right) {
1365 dbg_info *dbgi = get_irn_dbg_info(node);
1366 ir_node *block = get_nodes_block(node);
1367 ir_node *val_h = get_Shr_left(shr_left);
1368 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1371 /* lower_dw produces the following for ShrD:
1372 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1373 if (is_Shl(shl_left) && is_Not(shl_right)
1374 && is_Const_1(get_Shl_right(shl_left))
1375 && get_Not_op(shl_right) == shr_right) {
1376 dbg_info *dbgi = get_irn_dbg_info(node);
1377 ir_node *block = get_nodes_block(node);
1378 ir_node *val_h = get_Shl_left(shl_left);
1379 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1388 * Creates an ia32 Add.
1390 * @return the created ia32 Add node
1392 static ir_node *gen_Add(ir_node *node)
1394 ir_mode *mode = get_irn_mode(node);
1395 ir_node *op1 = get_Add_left(node);
1396 ir_node *op2 = get_Add_right(node);
1398 ir_node *block, *new_block, *new_node, *add_immediate_op;
1399 ia32_address_t addr;
1400 ia32_address_mode_t am;
1402 new_node = match_64bit_shift(node);
1403 if (new_node != NULL)
1406 if (mode_is_float(mode)) {
1407 if (ia32_cg_config.use_sse2)
1408 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1409 match_commutative | match_am);
1411 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fadd);
1414 ia32_mark_non_am(node);
1418 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1419 * 1. Add with immediate -> Lea
1420 * 2. Add with possible source address mode -> Add
1421 * 3. Otherwise -> Lea
1423 memset(&addr, 0, sizeof(addr));
1424 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1425 add_immediate_op = NULL;
1427 dbgi = get_irn_dbg_info(node);
1428 block = get_nodes_block(node);
1429 new_block = be_transform_node(block);
1432 if (addr.base == NULL && addr.index == NULL) {
1433 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1434 addr.symconst_sign, 0, addr.offset);
1435 SET_IA32_ORIG_NODE(new_node, node);
1438 /* add with immediate? */
1439 if (addr.index == NULL) {
1440 add_immediate_op = addr.base;
1441 } else if (addr.base == NULL && addr.scale == 0) {
1442 add_immediate_op = addr.index;
1445 if (add_immediate_op != NULL) {
1446 if (!am_has_immediates(&addr)) {
1447 #ifdef DEBUG_libfirm
1448 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1451 return be_transform_node(add_immediate_op);
1454 new_node = create_lea_from_address(dbgi, new_block, &addr);
1455 SET_IA32_ORIG_NODE(new_node, node);
1459 /* test if we can use source address mode */
1460 match_arguments(&am, block, op1, op2, NULL, match_commutative
1461 | match_mode_neutral | match_am | match_immediate | match_try_am);
1463 /* construct an Add with source address mode */
1464 if (am.op_type == ia32_AddrModeS) {
1465 ia32_address_t *am_addr = &am.addr;
1466 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1467 am_addr->index, am_addr->mem, am.new_op1,
1469 set_am_attributes(new_node, &am);
1470 SET_IA32_ORIG_NODE(new_node, node);
1472 new_node = fix_mem_proj(new_node, &am);
1477 /* otherwise construct a lea */
1478 new_node = create_lea_from_address(dbgi, new_block, &addr);
1479 SET_IA32_ORIG_NODE(new_node, node);
1484 * Creates an ia32 Mul.
1486 * @return the created ia32 Mul node
1488 static ir_node *gen_Mul(ir_node *node)
1490 ir_node *op1 = get_Mul_left(node);
1491 ir_node *op2 = get_Mul_right(node);
1492 ir_mode *mode = get_irn_mode(node);
1494 if (mode_is_float(mode)) {
1495 if (ia32_cg_config.use_sse2)
1496 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1497 match_commutative | match_am);
1499 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fmul);
1501 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1502 match_commutative | match_am | match_mode_neutral |
1503 match_immediate | match_am_and_immediates);
1507 * Creates an ia32 Mulh.
1508 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1509 * this result while Mul returns the lower 32 bit.
1511 * @return the created ia32 Mulh node
1513 static ir_node *gen_Mulh(ir_node *node)
1515 dbg_info *dbgi = get_irn_dbg_info(node);
1516 ir_node *op1 = get_Mulh_left(node);
1517 ir_node *op2 = get_Mulh_right(node);
1518 ir_mode *mode = get_irn_mode(node);
1520 ir_node *proj_res_high;
1522 if (get_mode_size_bits(mode) != 32) {
1523 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1526 if (mode_is_signed(mode)) {
1527 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1528 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1530 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1531 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1533 return proj_res_high;
1537 * Creates an ia32 And.
1539 * @return The created ia32 And node
1541 static ir_node *gen_And(ir_node *node)
1543 ir_node *op1 = get_And_left(node);
1544 ir_node *op2 = get_And_right(node);
1545 assert(! mode_is_float(get_irn_mode(node)));
1547 /* is it a zero extension? */
1548 if (is_Const(op2)) {
1549 ir_tarval *tv = get_Const_tarval(op2);
1550 long v = get_tarval_long(tv);
1552 if (v == 0xFF || v == 0xFFFF) {
1553 dbg_info *dbgi = get_irn_dbg_info(node);
1554 ir_node *block = get_nodes_block(node);
1561 assert(v == 0xFFFF);
1564 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1569 return gen_binop(node, op1, op2, new_bd_ia32_And,
1570 match_commutative | match_mode_neutral | match_am | match_immediate);
1574 * Creates an ia32 Or.
1576 * @return The created ia32 Or node
1578 static ir_node *gen_Or(ir_node *node)
1580 ir_node *op1 = get_Or_left(node);
1581 ir_node *op2 = get_Or_right(node);
1584 res = match_64bit_shift(node);
1588 assert (! mode_is_float(get_irn_mode(node)));
1589 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1590 | match_mode_neutral | match_am | match_immediate);
1596 * Creates an ia32 Eor.
1598 * @return The created ia32 Eor node
1600 static ir_node *gen_Eor(ir_node *node)
1602 ir_node *op1 = get_Eor_left(node);
1603 ir_node *op2 = get_Eor_right(node);
1605 assert(! mode_is_float(get_irn_mode(node)));
1606 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1607 | match_mode_neutral | match_am | match_immediate);
1612 * Creates an ia32 Sub.
1614 * @return The created ia32 Sub node
1616 static ir_node *gen_Sub(ir_node *node)
1618 ir_node *op1 = get_Sub_left(node);
1619 ir_node *op2 = get_Sub_right(node);
1620 ir_mode *mode = get_irn_mode(node);
1622 if (mode_is_float(mode)) {
1623 if (ia32_cg_config.use_sse2)
1624 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1626 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fsub);
1629 if (is_Const(op2)) {
1630 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1634 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1635 | match_am | match_immediate);
1638 static ir_node *transform_AM_mem(ir_node *const block,
1639 ir_node *const src_val,
1640 ir_node *const src_mem,
1641 ir_node *const am_mem)
1643 if (is_NoMem(am_mem)) {
1644 return be_transform_node(src_mem);
1645 } else if (is_Proj(src_val) &&
1647 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1648 /* avoid memory loop */
1650 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1651 ir_node *const ptr_pred = get_Proj_pred(src_val);
1652 int const arity = get_Sync_n_preds(src_mem);
1657 NEW_ARR_A(ir_node*, ins, arity + 1);
1659 /* NOTE: This sometimes produces dead-code because the old sync in
1660 * src_mem might not be used anymore, we should detect this case
1661 * and kill the sync... */
1662 for (i = arity - 1; i >= 0; --i) {
1663 ir_node *const pred = get_Sync_pred(src_mem, i);
1665 /* avoid memory loop */
1666 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1669 ins[n++] = be_transform_node(pred);
1672 if (n==1 && ins[0] == am_mem) {
1674 /* creating a new Sync and relying on CSE may fail,
1675 * if am_mem is a ProjM, which does not yet verify. */
1679 return new_r_Sync(block, n, ins);
1683 ins[0] = be_transform_node(src_mem);
1685 return new_r_Sync(block, 2, ins);
1690 * Create a 32bit to 64bit signed extension.
1692 * @param dbgi debug info
1693 * @param block the block where node nodes should be placed
1694 * @param val the value to extend
1695 * @param orig the original node
1697 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1698 ir_node *val, const ir_node *orig)
1703 if (ia32_cg_config.use_short_sex_eax) {
1704 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1705 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1707 ir_graph *const irg = get_Block_irg(block);
1708 ir_node *const imm31 = ia32_create_Immediate(irg, NULL, 0, 31);
1709 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1711 SET_IA32_ORIG_NODE(res, orig);
1716 * Generates an ia32 Div with additional infrastructure for the
1717 * register allocator if needed.
1719 static ir_node *create_Div(ir_node *node)
1721 dbg_info *dbgi = get_irn_dbg_info(node);
1722 ir_node *block = get_nodes_block(node);
1723 ir_node *new_block = be_transform_node(block);
1724 int throws_exception = ir_throws_exception(node);
1731 ir_node *sign_extension;
1732 ia32_address_mode_t am;
1733 ia32_address_t *addr = &am.addr;
1735 /* the upper bits have random contents for smaller modes */
1736 switch (get_irn_opcode(node)) {
1738 op1 = get_Div_left(node);
1739 op2 = get_Div_right(node);
1740 mem = get_Div_mem(node);
1741 mode = get_Div_resmode(node);
1744 op1 = get_Mod_left(node);
1745 op2 = get_Mod_right(node);
1746 mem = get_Mod_mem(node);
1747 mode = get_Mod_resmode(node);
1750 panic("invalid divmod node %+F", node);
1753 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv);
1755 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1756 is the memory of the consumed address. We can have only the second op as address
1757 in Div nodes, so check only op2. */
1758 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1760 if (mode_is_signed(mode)) {
1761 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1762 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1763 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1765 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1767 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1768 addr->index, new_mem, am.new_op2,
1769 am.new_op1, sign_extension);
1771 ir_set_throws_exception(new_node, throws_exception);
1773 set_irn_pinned(new_node, get_irn_pinned(node));
1775 set_am_attributes(new_node, &am);
1776 SET_IA32_ORIG_NODE(new_node, node);
1778 new_node = fix_mem_proj(new_node, &am);
1784 * Generates an ia32 Mod.
1786 static ir_node *gen_Mod(ir_node *node)
1788 return create_Div(node);
1792 * Generates an ia32 Div.
1794 static ir_node *gen_Div(ir_node *node)
1796 ir_mode *mode = get_Div_resmode(node);
1797 if (mode_is_float(mode)) {
1798 ir_node *op1 = get_Div_left(node);
1799 ir_node *op2 = get_Div_right(node);
1801 if (ia32_cg_config.use_sse2) {
1802 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1804 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fdiv);
1808 return create_Div(node);
1812 * Creates an ia32 Shl.
1814 * @return The created ia32 Shl node
1816 static ir_node *gen_Shl(ir_node *node)
1818 ir_node *left = get_Shl_left(node);
1819 ir_node *right = get_Shl_right(node);
1821 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1822 match_mode_neutral | match_immediate);
1826 * Creates an ia32 Shr.
1828 * @return The created ia32 Shr node
1830 static ir_node *gen_Shr(ir_node *node)
1832 ir_node *left = get_Shr_left(node);
1833 ir_node *right = get_Shr_right(node);
1835 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
1836 match_immediate | match_zero_ext);
1840 * Creates an ia32 Sar.
1842 * @return The created ia32 Shrs node
1844 static ir_node *gen_Shrs(ir_node *node)
1846 ir_node *left = get_Shrs_left(node);
1847 ir_node *right = get_Shrs_right(node);
1849 if (is_Const(right)) {
1850 ir_tarval *tv = get_Const_tarval(right);
1851 long val = get_tarval_long(tv);
1853 /* this is a sign extension */
1854 dbg_info *dbgi = get_irn_dbg_info(node);
1855 ir_node *block = be_transform_node(get_nodes_block(node));
1856 ir_node *new_op = be_transform_node(left);
1858 return create_sex_32_64(dbgi, block, new_op, node);
1862 /* 8 or 16 bit sign extension? */
1863 if (is_Const(right) && is_Shl(left)) {
1864 ir_node *shl_left = get_Shl_left(left);
1865 ir_node *shl_right = get_Shl_right(left);
1866 if (is_Const(shl_right)) {
1867 ir_tarval *tv1 = get_Const_tarval(right);
1868 ir_tarval *tv2 = get_Const_tarval(shl_right);
1869 if (tv1 == tv2 && tarval_is_long(tv1)) {
1870 long val = get_tarval_long(tv1);
1871 if (val == 16 || val == 24) {
1872 dbg_info *dbgi = get_irn_dbg_info(node);
1873 ir_node *block = get_nodes_block(node);
1883 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1892 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
1893 match_immediate | match_upconv);
1899 * Creates an ia32 Rol.
1901 * @param op1 The first operator
1902 * @param op2 The second operator
1903 * @return The created ia32 RotL node
1905 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1907 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1913 * Creates an ia32 Ror.
1914 * NOTE: There is no RotR with immediate because this would always be a RotL
1915 * "imm-mode_size_bits" which can be pre-calculated.
1917 * @param op1 The first operator
1918 * @param op2 The second operator
1919 * @return The created ia32 RotR node
1921 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1923 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1929 * Creates an ia32 RotR or RotL (depending on the found pattern).
1931 * @return The created ia32 RotL or RotR node
1933 static ir_node *gen_Rotl(ir_node *node)
1935 ir_node *op1 = get_Rotl_left(node);
1936 ir_node *op2 = get_Rotl_right(node);
1938 if (is_Minus(op2)) {
1939 return gen_Ror(node, op1, get_Minus_op(op2));
1942 return gen_Rol(node, op1, op2);
1948 * Transforms a Minus node.
1950 * @return The created ia32 Minus node
1952 static ir_node *gen_Minus(ir_node *node)
1954 ir_node *op = get_Minus_op(node);
1955 ir_node *block = be_transform_node(get_nodes_block(node));
1956 dbg_info *dbgi = get_irn_dbg_info(node);
1957 ir_mode *mode = get_irn_mode(node);
1962 if (mode_is_float(mode)) {
1963 ir_node *new_op = be_transform_node(op);
1964 if (ia32_cg_config.use_sse2) {
1965 /* TODO: non-optimal... if we have many xXors, then we should
1966 * rather create a load for the const and use that instead of
1967 * several AM nodes... */
1968 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1970 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1971 noreg_GP, nomem, new_op, noreg_xmm);
1973 size = get_mode_size_bits(mode);
1974 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1976 set_ia32_am_sc(new_node, ent);
1977 set_ia32_op_type(new_node, ia32_AddrModeS);
1978 set_ia32_ls_mode(new_node, mode);
1980 new_node = new_bd_ia32_fchs(dbgi, block, new_op);
1983 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1986 SET_IA32_ORIG_NODE(new_node, node);
1992 * Transforms a Not node.
1994 * @return The created ia32 Not node
1996 static ir_node *gen_Not(ir_node *node)
1998 ir_node *op = get_Not_op(node);
2000 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
2001 assert(!mode_is_float(get_irn_mode(node)));
2003 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
2006 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
2007 bool negate, ir_node *node)
2009 ir_node *new_block = be_transform_node(block);
2010 ir_mode *mode = get_irn_mode(op);
2011 ir_node *new_op = be_transform_node(op);
2016 assert(mode_is_float(mode));
2018 if (ia32_cg_config.use_sse2) {
2019 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
2020 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
2021 noreg_GP, nomem, new_op, noreg_fp);
2023 size = get_mode_size_bits(mode);
2024 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
2026 set_ia32_am_sc(new_node, ent);
2028 SET_IA32_ORIG_NODE(new_node, node);
2030 set_ia32_op_type(new_node, ia32_AddrModeS);
2031 set_ia32_ls_mode(new_node, mode);
2033 /* TODO, implement -Abs case */
2036 check_x87_floatmode(mode);
2037 new_node = new_bd_ia32_fabs(dbgi, new_block, new_op);
2038 SET_IA32_ORIG_NODE(new_node, node);
2040 new_node = new_bd_ia32_fchs(dbgi, new_block, new_node);
2041 SET_IA32_ORIG_NODE(new_node, node);
2049 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2051 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2053 dbg_info *dbgi = get_irn_dbg_info(cmp);
2054 ir_node *block = get_nodes_block(cmp);
2055 ir_node *new_block = be_transform_node(block);
2056 ir_node *op1 = be_transform_node(x);
2057 ir_node *op2 = be_transform_node(n);
2059 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2062 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2064 bool overflow_possible)
2066 if (mode_is_float(mode)) {
2068 case ir_relation_equal: return ia32_cc_float_equal;
2069 case ir_relation_less: return ia32_cc_float_below;
2070 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2071 case ir_relation_greater: return ia32_cc_float_above;
2072 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2073 case ir_relation_less_greater: return ia32_cc_not_equal;
2074 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2075 case ir_relation_unordered: return ia32_cc_parity;
2076 case ir_relation_unordered_equal: return ia32_cc_equal;
2077 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2078 case ir_relation_unordered_less_equal:
2079 return ia32_cc_float_unordered_below_equal;
2080 case ir_relation_unordered_greater:
2081 return ia32_cc_float_unordered_above;
2082 case ir_relation_unordered_greater_equal:
2083 return ia32_cc_float_unordered_above_equal;
2084 case ir_relation_unordered_less_greater:
2085 return ia32_cc_float_not_equal;
2086 case ir_relation_false:
2087 case ir_relation_true:
2088 /* should we introduce a jump always/jump never? */
2091 panic("Unexpected float pnc");
2092 } else if (mode_is_signed(mode)) {
2094 case ir_relation_unordered_equal:
2095 case ir_relation_equal: return ia32_cc_equal;
2096 case ir_relation_unordered_less:
2097 case ir_relation_less:
2098 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2099 case ir_relation_unordered_less_equal:
2100 case ir_relation_less_equal: return ia32_cc_less_equal;
2101 case ir_relation_unordered_greater:
2102 case ir_relation_greater: return ia32_cc_greater;
2103 case ir_relation_unordered_greater_equal:
2104 case ir_relation_greater_equal:
2105 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2106 case ir_relation_unordered_less_greater:
2107 case ir_relation_less_greater: return ia32_cc_not_equal;
2108 case ir_relation_less_equal_greater:
2109 case ir_relation_unordered:
2110 case ir_relation_false:
2111 case ir_relation_true:
2112 /* introduce jump always/jump never? */
2115 panic("Unexpected pnc");
2118 case ir_relation_unordered_equal:
2119 case ir_relation_equal: return ia32_cc_equal;
2120 case ir_relation_unordered_less:
2121 case ir_relation_less: return ia32_cc_below;
2122 case ir_relation_unordered_less_equal:
2123 case ir_relation_less_equal: return ia32_cc_below_equal;
2124 case ir_relation_unordered_greater:
2125 case ir_relation_greater: return ia32_cc_above;
2126 case ir_relation_unordered_greater_equal:
2127 case ir_relation_greater_equal: return ia32_cc_above_equal;
2128 case ir_relation_unordered_less_greater:
2129 case ir_relation_less_greater: return ia32_cc_not_equal;
2130 case ir_relation_less_equal_greater:
2131 case ir_relation_unordered:
2132 case ir_relation_false:
2133 case ir_relation_true:
2134 /* introduce jump always/jump never? */
2137 panic("Unexpected pnc");
2141 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2143 /* must have a Cmp as input */
2144 ir_relation relation = get_Cmp_relation(cmp);
2145 ir_node *l = get_Cmp_left(cmp);
2146 ir_node *r = get_Cmp_right(cmp);
2147 ir_mode *mode = get_irn_mode(l);
2148 bool overflow_possible;
2151 /* check for bit-test */
2152 if (ia32_cg_config.use_bt
2153 && (relation == ir_relation_equal
2154 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2155 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2157 ir_node *la = get_And_left(l);
2158 ir_node *ra = get_And_right(l);
2165 ir_node *c = get_Shl_left(la);
2166 if (is_Const_1(c) && is_Const_0(r)) {
2167 /* (1 << n) & ra) */
2168 ir_node *n = get_Shl_right(la);
2169 flags = gen_bt(cmp, ra, n);
2170 /* the bit is copied into the CF flag */
2171 if (relation & ir_relation_equal)
2172 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2174 *cc_out = ia32_cc_below; /* test for CF=1 */
2180 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2181 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2182 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2183 * a predecessor node). So add the < bit.
2184 * (Note that we do not want to produce <=> (which can happen for
2185 * unoptimized code), because no x86 flag can represent that */
2186 if (!(relation & ir_relation_equal) && relation & ir_relation_less_greater)
2187 relation |= get_negated_relation(ir_get_possible_cmp_relations(l, r)) & ir_relation_less_greater;
2189 overflow_possible = true;
2190 if (is_Const(r) && is_Const_null(r))
2191 overflow_possible = false;
2193 /* just do a normal transformation of the Cmp */
2194 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2195 flags = be_transform_node(cmp);
2200 * Transforms a Load.
2202 * @return the created ia32 Load node
2204 static ir_node *gen_Load(ir_node *node)
2206 ir_node *old_block = get_nodes_block(node);
2207 ir_node *block = be_transform_node(old_block);
2208 ir_node *ptr = get_Load_ptr(node);
2209 ir_node *mem = get_Load_mem(node);
2210 ir_node *new_mem = be_transform_node(mem);
2211 dbg_info *dbgi = get_irn_dbg_info(node);
2212 ir_mode *mode = get_Load_mode(node);
2213 int throws_exception = ir_throws_exception(node);
2217 ia32_address_t addr;
2219 /* construct load address */
2220 memset(&addr, 0, sizeof(addr));
2221 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2228 base = be_transform_node(base);
2234 idx = be_transform_node(idx);
2237 if (mode_is_float(mode)) {
2238 if (ia32_cg_config.use_sse2) {
2239 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2242 new_node = new_bd_ia32_fld(dbgi, block, base, idx, new_mem,
2246 assert(mode != mode_b);
2248 /* create a conv node with address mode for smaller modes */
2249 if (get_mode_size_bits(mode) < 32) {
2250 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2251 new_mem, noreg_GP, mode);
2253 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2256 ir_set_throws_exception(new_node, throws_exception);
2258 set_irn_pinned(new_node, get_irn_pinned(node));
2259 set_ia32_op_type(new_node, ia32_AddrModeS);
2260 set_ia32_ls_mode(new_node, mode);
2261 set_address(new_node, &addr);
2263 if (get_irn_pinned(node) == op_pin_state_floats) {
2264 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
2265 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
2266 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2267 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2270 SET_IA32_ORIG_NODE(new_node, node);
2275 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2276 ir_node *ptr, ir_node *other)
2283 /* we only use address mode if we're the only user of the load */
2284 if (get_irn_n_edges(node) > 1)
2287 load = get_Proj_pred(node);
2290 if (get_nodes_block(load) != block)
2293 /* store should have the same pointer as the load */
2294 if (get_Load_ptr(load) != ptr)
2297 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2298 if (other != NULL &&
2299 get_nodes_block(other) == block &&
2300 heights_reachable_in_block(ia32_heights, other, load)) {
2304 if (ia32_prevents_AM(block, load, mem))
2306 /* Store should be attached to the load via mem */
2307 assert(heights_reachable_in_block(ia32_heights, mem, load));
2312 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2313 ir_node *mem, ir_node *ptr, ir_mode *mode,
2314 construct_binop_dest_func *func,
2315 construct_binop_dest_func *func8bit,
2316 match_flags_t flags)
2318 ir_node *src_block = get_nodes_block(node);
2326 ia32_address_mode_t am;
2327 ia32_address_t *addr = &am.addr;
2328 memset(&am, 0, sizeof(am));
2330 assert(flags & match_immediate); /* there is no destam node without... */
2331 commutative = (flags & match_commutative) != 0;
2333 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2334 build_address(&am, op1, ia32_create_am_double_use);
2335 new_op = create_immediate_or_transform(op2);
2336 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2337 build_address(&am, op2, ia32_create_am_double_use);
2338 new_op = create_immediate_or_transform(op1);
2343 if (addr->base == NULL)
2344 addr->base = noreg_GP;
2345 if (addr->index == NULL)
2346 addr->index = noreg_GP;
2347 if (addr->mem == NULL)
2350 dbgi = get_irn_dbg_info(node);
2351 block = be_transform_node(src_block);
2352 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2354 if (get_mode_size_bits(mode) == 8) {
2355 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2357 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2359 set_address(new_node, addr);
2360 set_ia32_op_type(new_node, ia32_AddrModeD);
2361 set_ia32_ls_mode(new_node, mode);
2362 SET_IA32_ORIG_NODE(new_node, node);
2364 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2365 mem_proj = be_transform_node(am.mem_proj);
2366 be_set_transformed_node(am.mem_proj, new_node);
2367 be_set_transformed_node(mem_proj, new_node);
2372 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2373 ir_node *ptr, ir_mode *mode,
2374 construct_unop_dest_func *func)
2376 ir_node *src_block = get_nodes_block(node);
2382 ia32_address_mode_t am;
2383 ia32_address_t *addr = &am.addr;
2385 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2388 memset(&am, 0, sizeof(am));
2389 build_address(&am, op, ia32_create_am_double_use);
2391 dbgi = get_irn_dbg_info(node);
2392 block = be_transform_node(src_block);
2393 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2394 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2395 set_address(new_node, addr);
2396 set_ia32_op_type(new_node, ia32_AddrModeD);
2397 set_ia32_ls_mode(new_node, mode);
2398 SET_IA32_ORIG_NODE(new_node, node);
2400 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2401 mem_proj = be_transform_node(am.mem_proj);
2402 be_set_transformed_node(am.mem_proj, new_node);
2403 be_set_transformed_node(mem_proj, new_node);
2408 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2410 ir_mode *mode = get_irn_mode(node);
2411 ir_node *mux_true = get_Mux_true(node);
2412 ir_node *mux_false = get_Mux_false(node);
2420 ia32_condition_code_t cc;
2421 ia32_address_t addr;
2423 if (get_mode_size_bits(mode) != 8)
2426 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2428 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2434 cond = get_Mux_sel(node);
2435 flags = get_flags_node(cond, &cc);
2436 /* we can't handle the float special cases with SetM */
2437 if (cc & ia32_cc_additional_float_cases)
2440 cc = ia32_negate_condition_code(cc);
2442 build_address_ptr(&addr, ptr, mem);
2444 dbgi = get_irn_dbg_info(node);
2445 block = get_nodes_block(node);
2446 new_block = be_transform_node(block);
2447 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2448 addr.index, addr.mem, flags, cc);
2449 set_address(new_node, &addr);
2450 set_ia32_op_type(new_node, ia32_AddrModeD);
2451 set_ia32_ls_mode(new_node, mode);
2452 SET_IA32_ORIG_NODE(new_node, node);
2457 static ir_node *try_create_dest_am(ir_node *node)
2459 ir_node *val = get_Store_value(node);
2460 ir_node *mem = get_Store_mem(node);
2461 ir_node *ptr = get_Store_ptr(node);
2462 ir_mode *mode = get_irn_mode(val);
2463 unsigned bits = get_mode_size_bits(mode);
2468 /* handle only GP modes for now... */
2469 if (!ia32_mode_needs_gp_reg(mode))
2473 /* store must be the only user of the val node */
2474 if (get_irn_n_edges(val) > 1)
2476 /* skip pointless convs */
2478 ir_node *conv_op = get_Conv_op(val);
2479 ir_mode *pred_mode = get_irn_mode(conv_op);
2480 if (!ia32_mode_needs_gp_reg(pred_mode))
2482 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2490 /* value must be in the same block */
2491 if (get_nodes_block(node) != get_nodes_block(val))
2494 switch (get_irn_opcode(val)) {
2496 op1 = get_Add_left(val);
2497 op2 = get_Add_right(val);
2498 if (ia32_cg_config.use_incdec) {
2499 if (is_Const_1(op2)) {
2500 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2502 } else if (is_Const_Minus_1(op2)) {
2503 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2507 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2508 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2509 match_commutative | match_immediate);
2512 op1 = get_Sub_left(val);
2513 op2 = get_Sub_right(val);
2514 if (is_Const(op2)) {
2515 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2517 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2518 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2522 op1 = get_And_left(val);
2523 op2 = get_And_right(val);
2524 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2525 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2526 match_commutative | match_immediate);
2529 op1 = get_Or_left(val);
2530 op2 = get_Or_right(val);
2531 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2532 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2533 match_commutative | match_immediate);
2536 op1 = get_Eor_left(val);
2537 op2 = get_Eor_right(val);
2538 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2539 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2540 match_commutative | match_immediate);
2543 op1 = get_Shl_left(val);
2544 op2 = get_Shl_right(val);
2545 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2546 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2550 op1 = get_Shr_left(val);
2551 op2 = get_Shr_right(val);
2552 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2553 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2557 op1 = get_Shrs_left(val);
2558 op2 = get_Shrs_right(val);
2559 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2560 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2564 op1 = get_Rotl_left(val);
2565 op2 = get_Rotl_right(val);
2566 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2567 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2570 /* TODO: match ROR patterns... */
2572 new_node = try_create_SetMem(val, ptr, mem);
2576 op1 = get_Minus_op(val);
2577 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2580 /* should be lowered already */
2581 assert(mode != mode_b);
2582 op1 = get_Not_op(val);
2583 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2589 if (new_node != NULL) {
2590 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2591 get_irn_pinned(node) == op_pin_state_pinned) {
2592 set_irn_pinned(new_node, op_pin_state_pinned);
2599 static bool possible_int_mode_for_fp(ir_mode *mode)
2603 if (!mode_is_signed(mode))
2605 size = get_mode_size_bits(mode);
2606 if (size != 16 && size != 32)
2611 static int is_float_to_int_conv(const ir_node *node)
2613 ir_mode *mode = get_irn_mode(node);
2617 if (!possible_int_mode_for_fp(mode))
2622 conv_op = get_Conv_op(node);
2623 conv_mode = get_irn_mode(conv_op);
2625 if (!mode_is_float(conv_mode))
2632 * Transform a Store(floatConst) into a sequence of
2635 * @return the created ia32 Store node
2637 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2639 ir_mode *mode = get_irn_mode(cns);
2640 unsigned size = get_mode_size_bytes(mode);
2641 ir_tarval *tv = get_Const_tarval(cns);
2642 ir_node *block = get_nodes_block(node);
2643 ir_node *new_block = be_transform_node(block);
2644 ir_node *ptr = get_Store_ptr(node);
2645 ir_node *mem = get_Store_mem(node);
2646 dbg_info *dbgi = get_irn_dbg_info(node);
2649 int throws_exception = ir_throws_exception(node);
2651 ia32_address_t addr;
2653 build_address_ptr(&addr, ptr, mem);
2660 val= get_tarval_sub_bits(tv, ofs) |
2661 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2662 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2663 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2666 } else if (size >= 2) {
2667 val= get_tarval_sub_bits(tv, ofs) |
2668 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2672 panic("invalid size of Store float to mem (%+F)", node);
2674 ir_graph *const irg = get_Block_irg(new_block);
2675 ir_node *const imm = ia32_create_Immediate(irg, NULL, 0, val);
2677 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2678 addr.index, addr.mem, imm);
2679 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2681 ir_set_throws_exception(new_node, throws_exception);
2682 set_irn_pinned(new_node, get_irn_pinned(node));
2683 set_ia32_op_type(new_node, ia32_AddrModeD);
2684 set_ia32_ls_mode(new_node, mode);
2685 set_address(new_node, &addr);
2686 SET_IA32_ORIG_NODE(new_node, node);
2693 addr.offset += delta;
2694 } while (size != 0);
2697 return new_rd_Sync(dbgi, new_block, i, ins);
2699 return get_Proj_pred(ins[0]);
2704 * Generate a vfist or vfisttp instruction.
2706 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2707 ir_node *index, ir_node *mem, ir_node *val)
2709 if (ia32_cg_config.use_fisttp) {
2710 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2711 if other users exists */
2712 ir_node *vfisttp = new_bd_ia32_fisttp(dbgi, block, base, index, mem, val);
2713 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_fisttp_res);
2714 be_new_Keep(block, 1, &value);
2718 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2721 ir_node *vfist = new_bd_ia32_fist(dbgi, block, base, index, mem, val, trunc_mode);
2727 * Transforms a general (no special case) Store.
2729 * @return the created ia32 Store node
2731 static ir_node *gen_general_Store(ir_node *node)
2733 ir_node *val = get_Store_value(node);
2734 ir_mode *mode = get_irn_mode(val);
2735 ir_node *block = get_nodes_block(node);
2736 ir_node *new_block = be_transform_node(block);
2737 ir_node *ptr = get_Store_ptr(node);
2738 ir_node *mem = get_Store_mem(node);
2739 dbg_info *dbgi = get_irn_dbg_info(node);
2740 int throws_exception = ir_throws_exception(node);
2743 ia32_address_t addr;
2745 /* check for destination address mode */
2746 new_node = try_create_dest_am(node);
2747 if (new_node != NULL)
2750 /* construct store address */
2751 memset(&addr, 0, sizeof(addr));
2752 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2754 if (addr.base == NULL) {
2755 addr.base = noreg_GP;
2757 addr.base = be_transform_node(addr.base);
2760 if (addr.index == NULL) {
2761 addr.index = noreg_GP;
2763 addr.index = be_transform_node(addr.index);
2765 addr.mem = be_transform_node(mem);
2767 if (mode_is_float(mode)) {
2768 if (ia32_cg_config.use_sse2) {
2769 new_val = be_transform_node(val);
2770 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2771 addr.index, addr.mem, new_val);
2773 val = ia32_skip_float_downconv(val);
2774 new_val = be_transform_node(val);
2775 new_node = new_bd_ia32_fst(dbgi, new_block, addr.base,
2776 addr.index, addr.mem, new_val, mode);
2778 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2779 val = get_Conv_op(val);
2780 new_val = be_transform_node(val);
2781 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2783 unsigned dest_bits = get_mode_size_bits(mode);
2784 while (is_downconv(val)
2785 && get_mode_size_bits(get_irn_mode(val)) >= dest_bits) {
2786 val = get_Conv_op(val);
2788 new_val = create_immediate_or_transform(val);
2789 assert(mode != mode_b);
2791 if (dest_bits == 8) {
2792 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2793 addr.index, addr.mem, new_val);
2795 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2796 addr.index, addr.mem, new_val);
2799 ir_set_throws_exception(new_node, throws_exception);
2801 set_irn_pinned(new_node, get_irn_pinned(node));
2802 set_ia32_op_type(new_node, ia32_AddrModeD);
2803 set_ia32_ls_mode(new_node, mode);
2805 set_address(new_node, &addr);
2806 SET_IA32_ORIG_NODE(new_node, node);
2812 * Transforms a Store.
2814 * @return the created ia32 Store node
2816 static ir_node *gen_Store(ir_node *node)
2818 ir_node *val = get_Store_value(node);
2819 ir_mode *mode = get_irn_mode(val);
2821 if (mode_is_float(mode) && is_Const(val)) {
2822 /* We can transform every floating const store
2823 into a sequence of integer stores.
2824 If the constant is already in a register,
2825 it would be better to use it, but we don't
2826 have this information here. */
2827 return gen_float_const_Store(node, val);
2829 return gen_general_Store(node);
2833 * Transforms a Switch.
2835 * @return the created ia32 SwitchJmp node
2837 static ir_node *gen_Switch(ir_node *node)
2839 dbg_info *dbgi = get_irn_dbg_info(node);
2840 ir_graph *irg = get_irn_irg(node);
2841 ir_node *block = be_transform_node(get_nodes_block(node));
2842 ir_node *sel = get_Switch_selector(node);
2843 ir_node *new_sel = be_transform_node(sel);
2844 ir_mode *sel_mode = get_irn_mode(sel);
2845 const ir_switch_table *table = get_Switch_table(node);
2846 unsigned n_outs = get_Switch_n_outs(node);
2850 assert(get_mode_size_bits(sel_mode) <= 32);
2851 assert(!mode_is_float(sel_mode));
2852 sel = ia32_skip_sameconv(sel);
2853 if (get_mode_size_bits(sel_mode) < 32)
2854 new_sel = transform_upconv(sel, node);
2856 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2857 set_entity_visibility(entity, ir_visibility_private);
2858 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2860 table = ir_switch_table_duplicate(irg, table);
2862 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2863 set_ia32_am_scale(new_node, 2);
2864 set_ia32_am_sc(new_node, entity);
2865 set_ia32_op_type(new_node, ia32_AddrModeS);
2866 set_ia32_ls_mode(new_node, mode_Iu);
2867 SET_IA32_ORIG_NODE(new_node, node);
2868 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2869 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2875 * Transform a Cond node.
2877 static ir_node *gen_Cond(ir_node *node)
2879 ir_node *block = get_nodes_block(node);
2880 ir_node *new_block = be_transform_node(block);
2881 dbg_info *dbgi = get_irn_dbg_info(node);
2882 ir_node *sel = get_Cond_selector(node);
2883 ir_node *flags = NULL;
2885 ia32_condition_code_t cc;
2887 /* we get flags from a Cmp */
2888 flags = get_flags_node(sel, &cc);
2890 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2891 SET_IA32_ORIG_NODE(new_node, node);
2897 * Transform a be_Copy.
2899 static ir_node *gen_be_Copy(ir_node *node)
2901 ir_node *new_node = be_duplicate_node(node);
2902 ir_mode *mode = get_irn_mode(new_node);
2904 if (ia32_mode_needs_gp_reg(mode)) {
2905 set_irn_mode(new_node, mode_Iu);
2911 static ir_node *create_Fucom(ir_node *node)
2913 dbg_info *dbgi = get_irn_dbg_info(node);
2914 ir_node *block = get_nodes_block(node);
2915 ir_node *new_block = be_transform_node(block);
2916 ir_node *left = get_Cmp_left(node);
2917 ir_node *new_left = be_transform_node(left);
2918 ir_node *right = get_Cmp_right(node);
2919 ir_mode *cmp_mode = get_irn_mode(left);
2922 check_x87_floatmode(cmp_mode);
2924 if (ia32_cg_config.use_fucomi) {
2925 new_right = be_transform_node(right);
2926 new_node = new_bd_ia32_Fucomi(dbgi, new_block, new_left,
2928 set_ia32_commutative(new_node);
2929 SET_IA32_ORIG_NODE(new_node, node);
2931 if (is_Const_0(right)) {
2932 new_node = new_bd_ia32_FtstFnstsw(dbgi, new_block, new_left, 0);
2934 new_right = be_transform_node(right);
2935 new_node = new_bd_ia32_FucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2936 set_ia32_commutative(new_node);
2939 SET_IA32_ORIG_NODE(new_node, node);
2941 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2942 SET_IA32_ORIG_NODE(new_node, node);
2948 static ir_node *create_Ucomi(ir_node *node)
2950 dbg_info *dbgi = get_irn_dbg_info(node);
2951 ir_node *src_block = get_nodes_block(node);
2952 ir_node *new_block = be_transform_node(src_block);
2953 ir_node *left = get_Cmp_left(node);
2954 ir_node *right = get_Cmp_right(node);
2956 ia32_address_mode_t am;
2957 ia32_address_t *addr = &am.addr;
2959 match_arguments(&am, src_block, left, right, NULL,
2960 match_commutative | match_am);
2962 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2963 addr->mem, am.new_op1, am.new_op2,
2965 set_am_attributes(new_node, &am);
2967 SET_IA32_ORIG_NODE(new_node, node);
2969 new_node = fix_mem_proj(new_node, &am);
2974 static bool ia32_mux_upper_bits_clean(const ir_node *node, ir_mode *mode)
2976 ir_node *mux_true = get_Mux_true(node);
2977 ir_node *mux_false = get_Mux_false(node);
2978 ir_mode *mux_mode = get_irn_mode(node);
2979 /* mux nodes which get transformed to the set instruction are not clean */
2980 if (is_Const(mux_true) && is_Const(mux_false)
2981 && get_mode_size_bits(mux_mode) == 8) {
2984 return be_upper_bits_clean(mux_true, mode)
2985 && be_upper_bits_clean(mux_false, mode);
2989 * Generate code for a Cmp.
2991 static ir_node *gen_Cmp(ir_node *node)
2993 dbg_info *dbgi = get_irn_dbg_info(node);
2994 ir_node *block = get_nodes_block(node);
2995 ir_node *new_block = be_transform_node(block);
2996 ir_node *left = get_Cmp_left(node);
2997 ir_node *right = get_Cmp_right(node);
2998 ir_mode *cmp_mode = get_irn_mode(left);
3000 ia32_address_mode_t am;
3001 ia32_address_t *addr = &am.addr;
3003 if (mode_is_float(cmp_mode)) {
3004 if (ia32_cg_config.use_sse2) {
3005 return create_Ucomi(node);
3007 return create_Fucom(node);
3011 assert(ia32_mode_needs_gp_reg(cmp_mode));
3013 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3014 if (is_Const_0(right) &&
3016 get_irn_n_edges(left) == 1) {
3017 /* Test(and_left, and_right) */
3018 ir_node *and_left = get_And_left(left);
3019 ir_node *and_right = get_And_right(left);
3021 /* matze: code here used mode instead of cmd_mode, I think it is always
3022 * the same as cmp_mode, but I leave this here to see if this is really
3025 assert(get_irn_mode(and_left) == cmp_mode);
3027 match_arguments(&am, block, and_left, and_right, NULL,
3029 match_am | match_8bit_am | match_16bit_am |
3030 match_am_and_immediates | match_immediate);
3032 /* use 32bit compare mode if possible since the opcode is smaller */
3033 if (am.op_type == ia32_Normal &&
3034 be_upper_bits_clean(and_left, cmp_mode) &&
3035 be_upper_bits_clean(and_right, cmp_mode)) {
3036 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3039 if (get_mode_size_bits(cmp_mode) == 8) {
3040 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3041 addr->index, addr->mem,
3042 am.new_op1, am.new_op2,
3045 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base,
3046 addr->index, addr->mem, am.new_op1,
3047 am.new_op2, am.ins_permuted);
3050 /* Cmp(left, right) */
3051 match_arguments(&am, block, left, right, NULL,
3053 match_am | match_8bit_am | match_16bit_am |
3054 match_am_and_immediates | match_immediate);
3055 /* use 32bit compare mode if possible since the opcode is smaller */
3056 if (am.op_type == ia32_Normal &&
3057 be_upper_bits_clean(left, cmp_mode) &&
3058 be_upper_bits_clean(right, cmp_mode)) {
3059 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3062 if (get_mode_size_bits(cmp_mode) == 8) {
3063 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3064 addr->index, addr->mem, am.new_op1,
3065 am.new_op2, am.ins_permuted);
3067 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3068 addr->mem, am.new_op1, am.new_op2,
3072 set_am_attributes(new_node, &am);
3073 set_ia32_ls_mode(new_node, cmp_mode);
3075 SET_IA32_ORIG_NODE(new_node, node);
3077 new_node = fix_mem_proj(new_node, &am);
3082 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3083 ia32_condition_code_t cc)
3085 dbg_info *dbgi = get_irn_dbg_info(node);
3086 ir_node *block = get_nodes_block(node);
3087 ir_node *new_block = be_transform_node(block);
3088 ir_node *val_true = get_Mux_true(node);
3089 ir_node *val_false = get_Mux_false(node);
3091 ia32_address_mode_t am;
3092 ia32_address_t *addr;
3094 assert(ia32_cg_config.use_cmov);
3095 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3099 match_arguments(&am, block, val_false, val_true, flags,
3100 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3102 if (am.ins_permuted)
3103 cc = ia32_negate_condition_code(cc);
3105 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3106 addr->mem, am.new_op1, am.new_op2, new_flags,
3108 set_am_attributes(new_node, &am);
3110 SET_IA32_ORIG_NODE(new_node, node);
3112 new_node = fix_mem_proj(new_node, &am);
3118 * Creates a ia32 Setcc instruction.
3120 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3121 ir_node *flags, ia32_condition_code_t cc,
3124 ir_mode *mode = get_irn_mode(orig_node);
3127 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3128 SET_IA32_ORIG_NODE(new_node, orig_node);
3130 /* we might need to conv the result up */
3131 if (get_mode_size_bits(mode) > 8) {
3132 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3133 nomem, new_node, mode_Bu);
3134 SET_IA32_ORIG_NODE(new_node, orig_node);
3141 * Create instruction for an unsigned Difference or Zero.
3143 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3145 ir_mode *mode = get_irn_mode(psi);
3155 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3156 match_mode_neutral | match_am | match_immediate | match_two_users);
3158 block = get_nodes_block(new_node);
3160 if (is_Proj(new_node)) {
3161 sub = get_Proj_pred(new_node);
3164 set_irn_mode(sub, mode_T);
3165 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3167 assert(is_ia32_Sub(sub));
3168 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3170 dbgi = get_irn_dbg_info(psi);
3171 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3172 set_ia32_ls_mode(sbb, mode_Iu);
3173 notn = new_bd_ia32_Not(dbgi, block, sbb);
3175 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3176 set_ia32_ls_mode(new_node, mode_Iu);
3177 set_ia32_commutative(new_node);
3182 * Create an const array of two float consts.
3184 * @param c0 the first constant
3185 * @param c1 the second constant
3186 * @param new_mode IN/OUT for the mode of the constants, if NULL
3187 * smallest possible mode will be used
3189 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3192 ir_mode *mode = *new_mode;
3194 ir_initializer_t *initializer;
3195 ir_tarval *tv0 = get_Const_tarval(c0);
3196 ir_tarval *tv1 = get_Const_tarval(c1);
3199 /* detect the best mode for the constants */
3200 mode = get_tarval_mode(tv0);
3202 if (mode != mode_F) {
3203 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3204 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3206 tv0 = tarval_convert_to(tv0, mode);
3207 tv1 = tarval_convert_to(tv1, mode);
3208 } else if (mode != mode_D) {
3209 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3210 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3212 tv0 = tarval_convert_to(tv0, mode);
3213 tv1 = tarval_convert_to(tv1, mode);
3220 tp = ia32_get_prim_type(mode);
3221 tp = ia32_create_float_array(tp);
3223 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3225 set_entity_ld_ident(ent, get_entity_ident(ent));
3226 set_entity_visibility(ent, ir_visibility_private);
3227 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3229 initializer = create_initializer_compound(2);
3231 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3232 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3234 set_entity_initializer(ent, initializer);
3241 * Possible transformations for creating a Setcc.
3243 enum setcc_transform_insn {
3255 typedef struct setcc_transform {
3257 ia32_condition_code_t cc;
3259 enum setcc_transform_insn transform;
3263 } setcc_transform_t;
3266 * Setcc can only handle 0 and 1 result.
3267 * Find a transformation that creates 0 and 1 from
3270 static void find_const_transform(ia32_condition_code_t cc,
3271 ir_tarval *t, ir_tarval *f,
3272 setcc_transform_t *res)
3278 if (tarval_is_null(t)) {
3282 cc = ia32_negate_condition_code(cc);
3283 } else if (tarval_cmp(t, f) == ir_relation_less) {
3284 // now, t is the bigger one
3288 cc = ia32_negate_condition_code(cc);
3292 if (! tarval_is_null(f)) {
3293 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3296 res->steps[step].transform = SETCC_TR_ADD;
3298 if (t == tarval_bad)
3299 panic("constant subtract failed");
3300 if (! tarval_is_long(f))
3301 panic("tarval is not long");
3303 res->steps[step].val = get_tarval_long(f);
3305 f = tarval_sub(f, f, NULL);
3306 assert(tarval_is_null(f));
3309 if (tarval_is_one(t)) {
3310 res->steps[step].transform = SETCC_TR_SET;
3311 res->num_steps = ++step;
3315 if (tarval_is_minus_one(t)) {
3316 res->steps[step].transform = SETCC_TR_NEG;
3318 res->steps[step].transform = SETCC_TR_SET;
3319 res->num_steps = ++step;
3322 if (tarval_is_long(t)) {
3323 long v = get_tarval_long(t);
3325 res->steps[step].val = 0;
3328 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3330 res->steps[step].transform = SETCC_TR_LEAxx;
3331 res->steps[step].scale = 3; /* (a << 3) + a */
3334 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3336 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3337 res->steps[step].scale = 3; /* (a << 3) */
3340 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3342 res->steps[step].transform = SETCC_TR_LEAxx;
3343 res->steps[step].scale = 2; /* (a << 2) + a */
3346 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3348 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3349 res->steps[step].scale = 2; /* (a << 2) */
3352 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3354 res->steps[step].transform = SETCC_TR_LEAxx;
3355 res->steps[step].scale = 1; /* (a << 1) + a */
3358 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3360 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3361 res->steps[step].scale = 1; /* (a << 1) */
3364 res->num_steps = step;
3367 if (! tarval_is_single_bit(t)) {
3368 res->steps[step].transform = SETCC_TR_AND;
3369 res->steps[step].val = v;
3371 res->steps[step].transform = SETCC_TR_NEG;
3373 int val = get_tarval_lowest_bit(t);
3376 res->steps[step].transform = SETCC_TR_SHL;
3377 res->steps[step].scale = val;
3381 res->steps[step].transform = SETCC_TR_SET;
3382 res->num_steps = ++step;
3385 panic("tarval is not long");
3389 * Transforms a Mux node into some code sequence.
3391 * @return The transformed node.
3393 static ir_node *gen_Mux(ir_node *node)
3395 dbg_info *dbgi = get_irn_dbg_info(node);
3396 ir_node *block = get_nodes_block(node);
3397 ir_node *new_block = be_transform_node(block);
3398 ir_node *mux_true = get_Mux_true(node);
3399 ir_node *mux_false = get_Mux_false(node);
3400 ir_node *sel = get_Mux_sel(node);
3401 ir_mode *mode = get_irn_mode(node);
3405 ia32_condition_code_t cc;
3407 assert(get_irn_mode(sel) == mode_b);
3409 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3411 if (ia32_mode_needs_gp_reg(mode)) {
3412 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3415 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3416 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3420 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3421 if (mode_is_float(mode)) {
3422 ir_node *cmp_left = get_Cmp_left(sel);
3423 ir_node *cmp_right = get_Cmp_right(sel);
3424 ir_relation relation = get_Cmp_relation(sel);
3426 if (ia32_cg_config.use_sse2) {
3427 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3428 if (cmp_left == mux_true && cmp_right == mux_false) {
3429 /* Mux(a <= b, a, b) => MIN */
3430 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3431 match_commutative | match_am | match_two_users);
3432 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3433 /* Mux(a <= b, b, a) => MAX */
3434 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3435 match_commutative | match_am | match_two_users);
3437 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3438 if (cmp_left == mux_true && cmp_right == mux_false) {
3439 /* Mux(a >= b, a, b) => MAX */
3440 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3441 match_commutative | match_am | match_two_users);
3442 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3443 /* Mux(a >= b, b, a) => MIN */
3444 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3445 match_commutative | match_am | match_two_users);
3450 if (is_Const(mux_true) && is_Const(mux_false)) {
3451 ia32_address_mode_t am;
3456 flags = get_flags_node(sel, &cc);
3457 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3459 if (ia32_cg_config.use_sse2) {
3460 /* cannot load from different mode on SSE */
3463 /* x87 can load any mode */
3467 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3469 if (new_mode == mode_F) {
3471 } else if (new_mode == mode_D) {
3473 } else if (new_mode == ia32_mode_E) {
3474 /* arg, shift 16 NOT supported */
3476 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3478 panic("Unsupported constant size");
3481 am.ls_mode = new_mode;
3482 am.addr.base = get_symconst_base();
3483 am.addr.index = new_node;
3484 am.addr.mem = nomem;
3486 am.addr.scale = scale;
3487 am.addr.use_frame = 0;
3488 am.addr.tls_segment = false;
3489 am.addr.frame_entity = NULL;
3490 am.addr.symconst_sign = 0;
3491 am.mem_proj = am.addr.mem;
3492 am.op_type = ia32_AddrModeS;
3495 am.pinned = op_pin_state_floats;
3497 am.ins_permuted = false;
3499 if (ia32_cg_config.use_sse2)
3500 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3502 load = new_bd_ia32_fld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3503 set_am_attributes(load, &am);
3505 return new_rd_Proj(NULL, load, mode_fp, pn_ia32_res);
3507 panic("cannot transform floating point Mux");
3510 assert(ia32_mode_needs_gp_reg(mode));
3513 ir_node *cmp_left = get_Cmp_left(sel);
3514 ir_node *cmp_right = get_Cmp_right(sel);
3515 ir_relation relation = get_Cmp_relation(sel);
3516 ir_node *val_true = mux_true;
3517 ir_node *val_false = mux_false;
3519 if (is_Const(val_true) && is_Const_null(val_true)) {
3520 ir_node *tmp = val_false;
3521 val_false = val_true;
3523 relation = get_negated_relation(relation);
3525 if (is_Const_0(val_false) && is_Sub(val_true)) {
3526 if ((relation & ir_relation_greater)
3527 && get_Sub_left(val_true) == cmp_left
3528 && get_Sub_right(val_true) == cmp_right) {
3529 return create_doz(node, cmp_left, cmp_right);
3531 if ((relation & ir_relation_less)
3532 && get_Sub_left(val_true) == cmp_right
3533 && get_Sub_right(val_true) == cmp_left) {
3534 return create_doz(node, cmp_right, cmp_left);
3539 flags = get_flags_node(sel, &cc);
3541 if (is_Const(mux_true) && is_Const(mux_false)) {
3542 /* both are const, good */
3543 ir_tarval *tv_true = get_Const_tarval(mux_true);
3544 ir_tarval *tv_false = get_Const_tarval(mux_false);
3545 setcc_transform_t res;
3548 find_const_transform(cc, tv_true, tv_false, &res);
3550 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3553 switch (res.steps[step].transform) {
3555 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3556 add_ia32_am_offs_int(new_node, res.steps[step].val);
3558 case SETCC_TR_ADDxx:
3559 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3562 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3563 set_ia32_am_scale(new_node, res.steps[step].scale);
3564 set_ia32_am_offs_int(new_node, res.steps[step].val);
3566 case SETCC_TR_LEAxx:
3567 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3568 set_ia32_am_scale(new_node, res.steps[step].scale);
3569 set_ia32_am_offs_int(new_node, res.steps[step].val);
3572 imm = ia32_immediate_from_long(res.steps[step].scale);
3573 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3576 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3579 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3582 imm = ia32_immediate_from_long(res.steps[step].val);
3583 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3586 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3589 panic("unknown setcc transform");
3593 new_node = create_CMov(node, sel, flags, cc);
3600 * Create a conversion from x87 state register to general purpose.
3602 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3604 ir_node *block = be_transform_node(get_nodes_block(node));
3605 ir_node *op = get_Conv_op(node);
3606 ir_node *new_op = be_transform_node(op);
3607 ir_graph *irg = current_ir_graph;
3608 dbg_info *dbgi = get_irn_dbg_info(node);
3609 ir_mode *mode = get_irn_mode(node);
3610 ir_node *frame = get_irg_frame(irg);
3611 ir_node *fist, *load, *mem;
3613 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3614 set_irn_pinned(fist, op_pin_state_floats);
3615 set_ia32_use_frame(fist);
3616 set_ia32_op_type(fist, ia32_AddrModeD);
3618 assert((long)pn_ia32_fist_M == (long) pn_ia32_fisttp_M);
3619 mem = new_r_Proj(fist, mode_M, pn_ia32_fist_M);
3621 assert(get_mode_size_bits(mode) <= 32);
3622 /* exception we can only store signed 32 bit integers, so for unsigned
3623 we store a 64bit (signed) integer and load the lower bits */
3624 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3625 set_ia32_ls_mode(fist, mode_Ls);
3627 set_ia32_ls_mode(fist, mode_Is);
3629 SET_IA32_ORIG_NODE(fist, node);
3632 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3634 set_irn_pinned(load, op_pin_state_floats);
3635 set_ia32_use_frame(load);
3636 set_ia32_op_type(load, ia32_AddrModeS);
3637 set_ia32_ls_mode(load, mode_Is);
3638 if (get_ia32_ls_mode(fist) == mode_Ls) {
3639 ia32_attr_t *attr = get_ia32_attr(load);
3640 attr->data.need_64bit_stackent = 1;
3642 ia32_attr_t *attr = get_ia32_attr(load);
3643 attr->data.need_32bit_stackent = 1;
3645 SET_IA32_ORIG_NODE(load, node);
3647 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3651 * Creates a x87 Conv by placing a Store and a Load
3653 static ir_node *gen_x87_conv(ir_mode *tgt_mode, ir_node *node)
3655 ir_node *block = get_nodes_block(node);
3656 ir_graph *irg = get_Block_irg(block);
3657 dbg_info *dbgi = get_irn_dbg_info(node);
3658 ir_node *frame = get_irg_frame(irg);
3660 ir_node *store, *load;
3663 store = new_bd_ia32_fst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3664 set_ia32_use_frame(store);
3665 set_ia32_op_type(store, ia32_AddrModeD);
3666 SET_IA32_ORIG_NODE(store, node);
3668 store_mem = new_r_Proj(store, mode_M, pn_ia32_fst_M);
3670 load = new_bd_ia32_fld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3671 set_ia32_use_frame(load);
3672 set_ia32_op_type(load, ia32_AddrModeS);
3673 SET_IA32_ORIG_NODE(load, node);
3675 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_fld_res);
3679 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3680 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3682 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3684 func = get_mode_size_bits(mode) == 8 ?
3685 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3686 return func(dbgi, block, base, index, mem, val, mode);
3690 * Create a conversion from general purpose to x87 register
3692 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3694 ir_node *src_block = get_nodes_block(node);
3695 ir_node *block = be_transform_node(src_block);
3696 ir_graph *irg = get_Block_irg(block);
3697 dbg_info *dbgi = get_irn_dbg_info(node);
3698 ir_node *op = get_Conv_op(node);
3699 ir_node *new_op = NULL;
3701 ir_mode *store_mode;
3707 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3708 if (possible_int_mode_for_fp(src_mode)) {
3709 ia32_address_mode_t am;
3711 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am | match_upconv);
3712 if (am.op_type == ia32_AddrModeS) {
3713 ia32_address_t *addr = &am.addr;
3715 fild = new_bd_ia32_fild(dbgi, block, addr->base, addr->index, addr->mem);
3716 new_node = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
3718 set_am_attributes(fild, &am);
3719 SET_IA32_ORIG_NODE(fild, node);
3721 fix_mem_proj(fild, &am);
3726 if (new_op == NULL) {
3727 new_op = be_transform_node(op);
3730 mode = get_irn_mode(op);
3732 /* first convert to 32 bit signed if necessary */
3733 if (get_mode_size_bits(src_mode) < 32) {
3734 if (!be_upper_bits_clean(op, src_mode)) {
3735 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3736 SET_IA32_ORIG_NODE(new_op, node);
3741 assert(get_mode_size_bits(mode) == 32);
3744 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3746 set_ia32_use_frame(store);
3747 set_ia32_op_type(store, ia32_AddrModeD);
3748 set_ia32_ls_mode(store, mode_Iu);
3750 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3752 /* exception for 32bit unsigned, do a 64bit spill+load */
3753 if (!mode_is_signed(mode)) {
3756 ir_node *zero_const = ia32_create_Immediate(irg, NULL, 0, 0);
3758 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3759 noreg_GP, nomem, zero_const);
3760 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3762 set_ia32_use_frame(zero_store);
3763 set_ia32_op_type(zero_store, ia32_AddrModeD);
3764 add_ia32_am_offs_int(zero_store, 4);
3765 set_ia32_ls_mode(zero_store, mode_Iu);
3767 in[0] = zero_store_mem;
3770 store_mem = new_rd_Sync(dbgi, block, 2, in);
3771 store_mode = mode_Ls;
3773 store_mode = mode_Is;
3777 fild = new_bd_ia32_fild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3779 set_ia32_use_frame(fild);
3780 set_ia32_op_type(fild, ia32_AddrModeS);
3781 set_ia32_ls_mode(fild, store_mode);
3783 new_node = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
3789 * Create a conversion from one integer mode into another one
3791 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3792 dbg_info *dbgi, ir_node *block, ir_node *op,
3795 ir_node *new_block = be_transform_node(block);
3797 ia32_address_mode_t am;
3798 ia32_address_t *addr = &am.addr;
3801 assert(get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode));
3803 #ifdef DEBUG_libfirm
3805 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3810 if (be_upper_bits_clean(op, src_mode)) {
3811 return be_transform_node(op);
3814 match_arguments(&am, block, NULL, op, NULL,
3815 match_am | match_8bit_am | match_16bit_am);
3817 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3818 addr->mem, am.new_op2, src_mode);
3819 set_am_attributes(new_node, &am);
3820 /* match_arguments assume that out-mode = in-mode, this isn't true here
3822 set_ia32_ls_mode(new_node, src_mode);
3823 SET_IA32_ORIG_NODE(new_node, node);
3824 new_node = fix_mem_proj(new_node, &am);
3829 * Transforms a Conv node.
3831 * @return The created ia32 Conv node
3833 static ir_node *gen_Conv(ir_node *node)
3835 ir_node *block = get_nodes_block(node);
3836 ir_node *new_block = be_transform_node(block);
3837 ir_node *op = get_Conv_op(node);
3838 ir_node *new_op = NULL;
3839 dbg_info *dbgi = get_irn_dbg_info(node);
3840 ir_mode *src_mode = get_irn_mode(op);
3841 ir_mode *tgt_mode = get_irn_mode(node);
3842 int src_bits = get_mode_size_bits(src_mode);
3843 int tgt_bits = get_mode_size_bits(tgt_mode);
3844 ir_node *res = NULL;
3846 assert(!mode_is_int(src_mode) || src_bits <= 32);
3847 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3849 /* modeB -> X should already be lowered by the lower_mode_b pass */
3850 if (src_mode == mode_b) {
3851 panic("ConvB not lowered %+F", node);
3854 if (src_mode == tgt_mode) {
3855 /* this should be optimized already, but who knows... */
3856 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3857 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3858 return be_transform_node(op);
3861 if (mode_is_float(src_mode)) {
3862 new_op = be_transform_node(op);
3863 /* we convert from float ... */
3864 if (mode_is_float(tgt_mode)) {
3866 if (ia32_cg_config.use_sse2) {
3867 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3868 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3870 set_ia32_ls_mode(res, tgt_mode);
3872 if (src_bits < tgt_bits) {
3873 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3876 res = gen_x87_conv(tgt_mode, new_op);
3877 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3883 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3884 if (ia32_cg_config.use_sse2) {
3885 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3887 set_ia32_ls_mode(res, src_mode);
3889 return gen_x87_fp_to_gp(node);
3893 /* we convert from int ... */
3894 if (mode_is_float(tgt_mode)) {
3896 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3897 if (ia32_cg_config.use_sse2) {
3898 new_op = be_transform_node(op);
3899 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3901 set_ia32_ls_mode(res, tgt_mode);
3903 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3904 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3905 res = gen_x87_gp_to_fp(node, src_mode);
3907 /* we need a float-conv, if the int mode has more bits than the
3909 if (float_mantissa < int_mantissa) {
3910 res = gen_x87_conv(tgt_mode, res);
3911 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3915 } else if (tgt_mode == mode_b) {
3916 /* mode_b lowering already took care that we only have 0/1 values */
3917 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3918 src_mode, tgt_mode));
3919 return be_transform_node(op);
3922 if (src_bits >= tgt_bits) {
3923 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3924 src_mode, tgt_mode));
3925 return be_transform_node(op);
3928 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3936 static ir_node *create_immediate_or_transform(ir_node *const node)
3938 ir_node *new_node = ia32_try_create_Immediate(node, 'i');
3939 if (new_node == NULL) {
3940 new_node = be_transform_node(node);
3946 * Transforms a FrameAddr into an ia32 Add.
3948 static ir_node *gen_be_FrameAddr(ir_node *node)
3950 ir_node *block = be_transform_node(get_nodes_block(node));
3951 ir_node *op = be_get_FrameAddr_frame(node);
3952 ir_node *new_op = be_transform_node(op);
3953 dbg_info *dbgi = get_irn_dbg_info(node);
3956 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3957 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3958 set_ia32_use_frame(new_node);
3960 SET_IA32_ORIG_NODE(new_node, node);
3966 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3968 static ir_node *gen_be_Return(ir_node *node)
3970 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
3971 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
3972 ir_node *new_ret_val = be_transform_node(ret_val);
3973 ir_node *new_ret_mem = be_transform_node(ret_mem);
3974 dbg_info *dbgi = get_irn_dbg_info(node);
3975 ir_node *block = be_transform_node(get_nodes_block(node));
3976 ir_graph *irg = get_Block_irg(block);
3977 ir_entity *ent = get_irg_entity(irg);
3978 ir_type *tp = get_entity_type(ent);
3992 assert(ret_val != NULL);
3993 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3994 return be_duplicate_node(node);
3997 res_type = get_method_res_type(tp, 0);
3999 if (! is_Primitive_type(res_type)) {
4000 return be_duplicate_node(node);
4003 mode = get_type_mode(res_type);
4004 if (! mode_is_float(mode)) {
4005 return be_duplicate_node(node);
4008 assert(get_method_n_ress(tp) == 1);
4010 frame = get_irg_frame(irg);
4012 /* store xmm0 onto stack */
4013 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4014 new_ret_mem, new_ret_val);
4015 set_ia32_ls_mode(sse_store, mode);
4016 set_ia32_op_type(sse_store, ia32_AddrModeD);
4017 set_ia32_use_frame(sse_store);
4018 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4020 /* load into x87 register */
4021 fld = new_bd_ia32_fld(dbgi, block, frame, noreg_GP, store_mem, mode);
4022 set_ia32_op_type(fld, ia32_AddrModeS);
4023 set_ia32_use_frame(fld);
4025 mproj = new_r_Proj(fld, mode_M, pn_ia32_fld_M);
4026 fld = new_r_Proj(fld, mode_fp, pn_ia32_fld_res);
4028 /* create a new return */
4029 arity = get_irn_arity(node);
4030 in = ALLOCAN(ir_node*, arity);
4031 pop = be_Return_get_pop(node);
4032 for (i = 0; i < arity; ++i) {
4033 ir_node *op = get_irn_n(node, i);
4034 if (op == ret_val) {
4036 } else if (op == ret_mem) {
4039 in[i] = be_transform_node(op);
4042 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4043 copy_node_attr(irg, node, new_node);
4049 * Transform a be_AddSP into an ia32_SubSP.
4051 static ir_node *gen_be_AddSP(ir_node *node)
4053 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4054 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4056 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4057 match_am | match_immediate);
4058 assert(is_ia32_SubSP(new_node));
4059 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4060 &ia32_registers[REG_ESP]);
4065 * Transform a be_SubSP into an ia32_AddSP
4067 static ir_node *gen_be_SubSP(ir_node *node)
4069 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4070 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4072 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4073 match_am | match_immediate);
4074 assert(is_ia32_AddSP(new_node));
4075 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4076 &ia32_registers[REG_ESP]);
4080 static ir_node *gen_Phi(ir_node *node)
4082 ir_mode *mode = get_irn_mode(node);
4083 const arch_register_req_t *req;
4084 if (ia32_mode_needs_gp_reg(mode)) {
4085 /* we shouldn't have any 64bit stuff around anymore */
4086 assert(get_mode_size_bits(mode) <= 32);
4087 /* all integer operations are on 32bit registers now */
4089 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4090 } else if (mode_is_float(mode)) {
4091 if (ia32_cg_config.use_sse2) {
4093 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4096 req = ia32_reg_classes[CLASS_ia32_fp].class_req;
4099 req = arch_no_register_req;
4102 return be_transform_phi(node, req);
4105 static ir_node *gen_Jmp(ir_node *node)
4107 ir_node *block = get_nodes_block(node);
4108 ir_node *new_block = be_transform_node(block);
4109 dbg_info *dbgi = get_irn_dbg_info(node);
4112 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4113 SET_IA32_ORIG_NODE(new_node, node);
4121 static ir_node *gen_IJmp(ir_node *node)
4123 ir_node *block = get_nodes_block(node);
4124 ir_node *new_block = be_transform_node(block);
4125 dbg_info *dbgi = get_irn_dbg_info(node);
4126 ir_node *op = get_IJmp_target(node);
4128 ia32_address_mode_t am;
4129 ia32_address_t *addr = &am.addr;
4131 assert(get_irn_mode(op) == mode_P);
4133 match_arguments(&am, block, NULL, op, NULL,
4134 match_am | match_immediate | match_upconv);
4136 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4137 addr->mem, am.new_op2);
4138 set_am_attributes(new_node, &am);
4139 SET_IA32_ORIG_NODE(new_node, node);
4141 new_node = fix_mem_proj(new_node, &am);
4146 static ir_node *gen_ia32_l_Add(ir_node *node)
4148 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4149 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4150 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4151 match_commutative | match_am | match_immediate |
4152 match_mode_neutral);
4154 if (is_Proj(lowered)) {
4155 lowered = get_Proj_pred(lowered);
4157 assert(is_ia32_Add(lowered));
4158 set_irn_mode(lowered, mode_T);
4164 static ir_node *gen_ia32_l_Adc(ir_node *node)
4166 return gen_binop_flags(node, new_bd_ia32_Adc,
4167 match_commutative | match_am | match_immediate |
4168 match_mode_neutral);
4172 * Transforms a l_MulS into a "real" MulS node.
4174 * @return the created ia32 Mul node
4176 static ir_node *gen_ia32_l_Mul(ir_node *node)
4178 ir_node *left = get_binop_left(node);
4179 ir_node *right = get_binop_right(node);
4181 return gen_binop(node, left, right, new_bd_ia32_Mul,
4182 match_commutative | match_am | match_mode_neutral);
4186 * Transforms a l_IMulS into a "real" IMul1OPS node.
4188 * @return the created ia32 IMul1OP node
4190 static ir_node *gen_ia32_l_IMul(ir_node *node)
4192 ir_node *left = get_binop_left(node);
4193 ir_node *right = get_binop_right(node);
4195 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4196 match_commutative | match_am | match_mode_neutral);
4199 static ir_node *gen_ia32_l_Sub(ir_node *node)
4201 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4202 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4203 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4204 match_am | match_immediate | match_mode_neutral);
4206 if (is_Proj(lowered)) {
4207 lowered = get_Proj_pred(lowered);
4209 assert(is_ia32_Sub(lowered));
4210 set_irn_mode(lowered, mode_T);
4216 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4218 return gen_binop_flags(node, new_bd_ia32_Sbb,
4219 match_am | match_immediate | match_mode_neutral);
4222 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4224 ir_node *src_block = get_nodes_block(node);
4225 ir_node *block = be_transform_node(src_block);
4226 ir_graph *irg = get_Block_irg(block);
4227 dbg_info *dbgi = get_irn_dbg_info(node);
4228 ir_node *frame = get_irg_frame(irg);
4229 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4230 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4231 ir_node *new_val_low = be_transform_node(val_low);
4232 ir_node *new_val_high = be_transform_node(val_high);
4234 ir_node *sync, *fild, *res;
4236 ir_node *store_high;
4240 if (ia32_cg_config.use_sse2) {
4241 panic("not implemented for SSE2");
4245 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4247 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4249 SET_IA32_ORIG_NODE(store_low, node);
4250 SET_IA32_ORIG_NODE(store_high, node);
4252 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4253 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4255 set_ia32_use_frame(store_low);
4256 set_ia32_use_frame(store_high);
4257 set_ia32_op_type(store_low, ia32_AddrModeD);
4258 set_ia32_op_type(store_high, ia32_AddrModeD);
4259 set_ia32_ls_mode(store_low, mode_Iu);
4260 set_ia32_ls_mode(store_high, mode_Is);
4261 add_ia32_am_offs_int(store_high, 4);
4265 sync = new_rd_Sync(dbgi, block, 2, in);
4268 fild = new_bd_ia32_fild(dbgi, block, frame, noreg_GP, sync);
4270 set_ia32_use_frame(fild);
4271 set_ia32_op_type(fild, ia32_AddrModeS);
4272 set_ia32_ls_mode(fild, mode_Ls);
4274 SET_IA32_ORIG_NODE(fild, node);
4276 res = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
4278 if (! mode_is_signed(get_irn_mode(val_high))) {
4279 ia32_address_mode_t am;
4281 ir_node *count = ia32_create_Immediate(irg, NULL, 0, 31);
4284 am.addr.base = get_symconst_base();
4285 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4286 am.addr.mem = nomem;
4289 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4290 am.addr.tls_segment = false;
4291 am.addr.use_frame = 0;
4292 am.addr.frame_entity = NULL;
4293 am.addr.symconst_sign = 0;
4294 am.ls_mode = mode_F;
4295 am.mem_proj = nomem;
4296 am.op_type = ia32_AddrModeS;
4298 am.new_op2 = ia32_new_NoReg_fp(irg);
4299 am.pinned = op_pin_state_floats;
4301 am.ins_permuted = false;
4303 fadd = new_bd_ia32_fadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4304 am.new_op1, am.new_op2, get_fpcw());
4305 set_am_attributes(fadd, &am);
4307 set_irn_mode(fadd, mode_T);
4308 res = new_rd_Proj(NULL, fadd, mode_fp, pn_ia32_res);
4313 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4315 ir_node *src_block = get_nodes_block(node);
4316 ir_node *block = be_transform_node(src_block);
4317 ir_graph *irg = get_Block_irg(block);
4318 dbg_info *dbgi = get_irn_dbg_info(node);
4319 ir_node *frame = get_irg_frame(irg);
4320 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4321 ir_node *new_val = be_transform_node(val);
4324 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4325 SET_IA32_ORIG_NODE(fist, node);
4326 set_ia32_use_frame(fist);
4327 set_ia32_op_type(fist, ia32_AddrModeD);
4328 set_ia32_ls_mode(fist, mode_Ls);
4330 assert((long)pn_ia32_fist_M == (long) pn_ia32_fisttp_M);
4331 return new_r_Proj(fist, mode_M, pn_ia32_fist_M);
4334 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4336 ir_node *block = be_transform_node(get_nodes_block(node));
4337 ir_graph *irg = get_Block_irg(block);
4338 ir_node *pred = get_Proj_pred(node);
4339 ir_node *new_pred = be_transform_node(pred);
4340 ir_node *frame = get_irg_frame(irg);
4341 dbg_info *dbgi = get_irn_dbg_info(node);
4342 long pn = get_Proj_proj(node);
4347 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4348 SET_IA32_ORIG_NODE(load, node);
4349 set_ia32_use_frame(load);
4350 set_ia32_op_type(load, ia32_AddrModeS);
4351 set_ia32_ls_mode(load, mode_Iu);
4352 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4353 * 32 bit from it with this particular load */
4354 attr = get_ia32_attr(load);
4355 attr->data.need_64bit_stackent = 1;
4357 if (pn == pn_ia32_l_FloattoLL_res_high) {
4358 add_ia32_am_offs_int(load, 4);
4360 assert(pn == pn_ia32_l_FloattoLL_res_low);
4363 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4369 * Transform the Projs of an AddSP.
4371 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4373 ir_node *pred = get_Proj_pred(node);
4374 ir_node *new_pred = be_transform_node(pred);
4375 dbg_info *dbgi = get_irn_dbg_info(node);
4376 long proj = get_Proj_proj(node);
4378 if (proj == pn_be_AddSP_sp) {
4379 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4380 pn_ia32_SubSP_stack);
4381 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4383 } else if (proj == pn_be_AddSP_res) {
4384 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4385 pn_ia32_SubSP_addr);
4386 } else if (proj == pn_be_AddSP_M) {
4387 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4390 panic("No idea how to transform proj->AddSP");
4394 * Transform the Projs of a SubSP.
4396 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4398 ir_node *pred = get_Proj_pred(node);
4399 ir_node *new_pred = be_transform_node(pred);
4400 dbg_info *dbgi = get_irn_dbg_info(node);
4401 long proj = get_Proj_proj(node);
4403 if (proj == pn_be_SubSP_sp) {
4404 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4405 pn_ia32_AddSP_stack);
4406 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4408 } else if (proj == pn_be_SubSP_M) {
4409 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4412 panic("No idea how to transform proj->SubSP");
4416 * Transform and renumber the Projs from a Load.
4418 static ir_node *gen_Proj_Load(ir_node *node)
4421 ir_node *pred = get_Proj_pred(node);
4422 dbg_info *dbgi = get_irn_dbg_info(node);
4423 long proj = get_Proj_proj(node);
4425 /* loads might be part of source address mode matches, so we don't
4426 * transform the ProjMs yet (with the exception of loads whose result is
4429 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4432 /* this is needed, because sometimes we have loops that are only
4433 reachable through the ProjM */
4434 be_enqueue_preds(node);
4435 /* do it in 2 steps, to silence firm verifier */
4436 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4437 set_Proj_proj(res, pn_ia32_mem);
4441 /* renumber the proj */
4442 new_pred = be_transform_node(pred);
4443 if (is_ia32_Load(new_pred)) {
4444 switch ((pn_Load)proj) {
4446 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4448 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4449 case pn_Load_X_except:
4450 /* This Load might raise an exception. Mark it. */
4451 set_ia32_exc_label(new_pred, 1);
4452 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4453 case pn_Load_X_regular:
4454 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4456 } else if (is_ia32_Conv_I2I(new_pred) ||
4457 is_ia32_Conv_I2I8Bit(new_pred)) {
4458 set_irn_mode(new_pred, mode_T);
4459 switch ((pn_Load)proj) {
4461 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4463 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4464 case pn_Load_X_except:
4465 /* This Load might raise an exception. Mark it. */
4466 set_ia32_exc_label(new_pred, 1);
4467 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4468 case pn_Load_X_regular:
4469 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4471 } else if (is_ia32_xLoad(new_pred)) {
4472 switch ((pn_Load)proj) {
4474 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4476 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4477 case pn_Load_X_except:
4478 /* This Load might raise an exception. Mark it. */
4479 set_ia32_exc_label(new_pred, 1);
4480 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4481 case pn_Load_X_regular:
4482 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4484 } else if (is_ia32_fld(new_pred)) {
4485 switch ((pn_Load)proj) {
4487 return new_rd_Proj(dbgi, new_pred, mode_fp, pn_ia32_fld_res);
4489 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fld_M);
4490 case pn_Load_X_except:
4491 /* This Load might raise an exception. Mark it. */
4492 set_ia32_exc_label(new_pred, 1);
4493 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fld_X_except);
4494 case pn_Load_X_regular:
4495 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fld_X_regular);
4498 /* can happen for ProJMs when source address mode happened for the
4501 /* however it should not be the result proj, as that would mean the
4502 load had multiple users and should not have been used for
4504 if (proj != pn_Load_M) {
4505 panic("internal error: transformed node not a Load");
4507 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4510 panic("No idea how to transform Proj(Load) %+F", node);
4513 static ir_node *gen_Proj_Store(ir_node *node)
4515 ir_node *pred = get_Proj_pred(node);
4516 ir_node *new_pred = be_transform_node(pred);
4517 dbg_info *dbgi = get_irn_dbg_info(node);
4518 long pn = get_Proj_proj(node);
4520 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4521 switch ((pn_Store)pn) {
4523 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4524 case pn_Store_X_except:
4525 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4526 case pn_Store_X_regular:
4527 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4529 } else if (is_ia32_fist(new_pred)) {
4530 switch ((pn_Store)pn) {
4532 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fist_M);
4533 case pn_Store_X_except:
4534 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fist_X_except);
4535 case pn_Store_X_regular:
4536 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fist_X_regular);
4538 } else if (is_ia32_fisttp(new_pred)) {
4539 switch ((pn_Store)pn) {
4541 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fisttp_M);
4542 case pn_Store_X_except:
4543 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fisttp_X_except);
4544 case pn_Store_X_regular:
4545 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fisttp_X_regular);
4547 } else if (is_ia32_fst(new_pred)) {
4548 switch ((pn_Store)pn) {
4550 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fst_M);
4551 case pn_Store_X_except:
4552 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fst_X_except);
4553 case pn_Store_X_regular:
4554 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fst_X_regular);
4556 } else if (is_ia32_xStore(new_pred)) {
4557 switch ((pn_Store)pn) {
4559 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4560 case pn_Store_X_except:
4561 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4562 case pn_Store_X_regular:
4563 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4565 } else if (is_Sync(new_pred)) {
4566 /* hack for the case that gen_float_const_Store produced a Sync */
4567 if (pn == pn_Store_M) {
4570 panic("exception control flow not implemented yet");
4571 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4572 /* destination address mode */
4573 if (pn == pn_Store_M) {
4576 panic("exception control flow for destination AM not implemented yet");
4579 panic("No idea how to transform Proj(Store) %+F", node);
4583 * Transform and renumber the Projs from a Div or Mod instruction.
4585 static ir_node *gen_Proj_Div(ir_node *node)
4587 ir_node *pred = get_Proj_pred(node);
4588 ir_node *new_pred = be_transform_node(pred);
4589 dbg_info *dbgi = get_irn_dbg_info(node);
4590 long proj = get_Proj_proj(node);
4592 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4593 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4595 switch ((pn_Div)proj) {
4597 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4598 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4599 } else if (is_ia32_xDiv(new_pred)) {
4600 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4601 } else if (is_ia32_fdiv(new_pred)) {
4602 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fdiv_M);
4604 panic("Div transformed to unexpected thing %+F", new_pred);
4607 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4608 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4609 } else if (is_ia32_xDiv(new_pred)) {
4610 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4611 } else if (is_ia32_fdiv(new_pred)) {
4612 return new_rd_Proj(dbgi, new_pred, mode_fp, pn_ia32_fdiv_res);
4614 panic("Div transformed to unexpected thing %+F", new_pred);
4616 case pn_Div_X_except:
4617 set_ia32_exc_label(new_pred, 1);
4618 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4619 case pn_Div_X_regular:
4620 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4623 panic("No idea how to transform proj->Div");
4627 * Transform and renumber the Projs from a Div or Mod instruction.
4629 static ir_node *gen_Proj_Mod(ir_node *node)
4631 ir_node *pred = get_Proj_pred(node);
4632 ir_node *new_pred = be_transform_node(pred);
4633 dbg_info *dbgi = get_irn_dbg_info(node);
4634 long proj = get_Proj_proj(node);
4636 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4637 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4638 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4640 switch ((pn_Mod)proj) {
4642 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4644 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4645 case pn_Mod_X_except:
4646 set_ia32_exc_label(new_pred, 1);
4647 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4648 case pn_Mod_X_regular:
4649 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4651 panic("No idea how to transform proj->Mod");
4655 * Transform and renumber the Projs from a CopyB.
4657 static ir_node *gen_Proj_CopyB(ir_node *node)
4659 ir_node *pred = get_Proj_pred(node);
4660 ir_node *new_pred = be_transform_node(pred);
4661 dbg_info *dbgi = get_irn_dbg_info(node);
4662 long proj = get_Proj_proj(node);
4664 switch ((pn_CopyB)proj) {
4666 if (is_ia32_CopyB_i(new_pred)) {
4667 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4668 } else if (is_ia32_CopyB(new_pred)) {
4669 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4672 case pn_CopyB_X_regular:
4673 if (is_ia32_CopyB_i(new_pred)) {
4674 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4675 } else if (is_ia32_CopyB(new_pred)) {
4676 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4679 case pn_CopyB_X_except:
4680 if (is_ia32_CopyB_i(new_pred)) {
4681 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4682 } else if (is_ia32_CopyB(new_pred)) {
4683 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4688 panic("No idea how to transform proj->CopyB");
4691 static ir_node *gen_be_Call(ir_node *node)
4693 dbg_info *const dbgi = get_irn_dbg_info(node);
4694 ir_node *const src_block = get_nodes_block(node);
4695 ir_node *const block = be_transform_node(src_block);
4696 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4697 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4698 ir_node *const sp = be_transform_node(src_sp);
4699 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4700 ia32_address_mode_t am;
4701 ia32_address_t *const addr = &am.addr;
4706 ir_node * eax = noreg_GP;
4707 ir_node * ecx = noreg_GP;
4708 ir_node * edx = noreg_GP;
4709 unsigned const pop = be_Call_get_pop(node);
4710 ir_type *const call_tp = be_Call_get_type(node);
4711 int old_no_pic_adjust;
4712 int throws_exception = ir_throws_exception(node);
4714 /* Run the x87 simulator if the call returns a float value */
4715 if (get_method_n_ress(call_tp) > 0) {
4716 ir_type *const res_type = get_method_res_type(call_tp, 0);
4717 ir_mode *const res_mode = get_type_mode(res_type);
4719 if (res_mode != NULL && mode_is_float(res_mode)) {
4720 ir_graph *irg = get_Block_irg(block);
4721 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4722 irg_data->do_x87_sim = 1;
4726 /* We do not want be_Call direct calls */
4727 assert(be_Call_get_entity(node) == NULL);
4729 /* special case for PIC trampoline calls */
4730 old_no_pic_adjust = ia32_no_pic_adjust;
4731 ia32_no_pic_adjust = be_options.pic;
4733 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4734 match_am | match_immediate | match_upconv);
4736 ia32_no_pic_adjust = old_no_pic_adjust;
4738 i = get_irn_arity(node) - 1;
4739 fpcw = be_transform_node(get_irn_n(node, i--));
4740 for (; i >= n_be_Call_first_arg; --i) {
4741 arch_register_req_t const *const req
4742 = arch_get_irn_register_req_in(node, i);
4743 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4745 assert(req->type == arch_register_req_type_limited);
4746 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4748 switch (*req->limited) {
4749 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4750 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4751 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4752 default: panic("Invalid GP register for register parameter");
4756 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4757 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4758 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4759 ir_set_throws_exception(call, throws_exception);
4760 set_am_attributes(call, &am);
4761 call = fix_mem_proj(call, &am);
4763 if (get_irn_pinned(node) == op_pin_state_pinned)
4764 set_irn_pinned(call, op_pin_state_pinned);
4766 SET_IA32_ORIG_NODE(call, node);
4768 if (ia32_cg_config.use_sse2) {
4769 /* remember this call for post-processing */
4770 ARR_APP1(ir_node *, call_list, call);
4771 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4778 * Transform Builtin trap
4780 static ir_node *gen_trap(ir_node *node)
4782 dbg_info *dbgi = get_irn_dbg_info(node);
4783 ir_node *block = be_transform_node(get_nodes_block(node));
4784 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4786 return new_bd_ia32_UD2(dbgi, block, mem);
4790 * Transform Builtin debugbreak
4792 static ir_node *gen_debugbreak(ir_node *node)
4794 dbg_info *dbgi = get_irn_dbg_info(node);
4795 ir_node *block = be_transform_node(get_nodes_block(node));
4796 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4798 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4802 * Transform Builtin return_address
4804 static ir_node *gen_return_address(ir_node *node)
4806 ir_node *param = get_Builtin_param(node, 0);
4807 ir_node *frame = get_Builtin_param(node, 1);
4808 dbg_info *dbgi = get_irn_dbg_info(node);
4809 ir_tarval *tv = get_Const_tarval(param);
4810 ir_graph *irg = get_irn_irg(node);
4811 unsigned long value = get_tarval_long(tv);
4813 ir_node *block = be_transform_node(get_nodes_block(node));
4814 ir_node *ptr = be_transform_node(frame);
4818 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4819 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4820 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4823 /* load the return address from this frame */
4824 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4826 set_irn_pinned(load, get_irn_pinned(node));
4827 set_ia32_op_type(load, ia32_AddrModeS);
4828 set_ia32_ls_mode(load, mode_Iu);
4830 set_ia32_am_offs_int(load, 0);
4831 set_ia32_use_frame(load);
4832 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4834 if (get_irn_pinned(node) == op_pin_state_floats) {
4835 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
4836 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
4837 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4838 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4841 SET_IA32_ORIG_NODE(load, node);
4842 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4846 * Transform Builtin frame_address
4848 static ir_node *gen_frame_address(ir_node *node)
4850 ir_node *param = get_Builtin_param(node, 0);
4851 ir_node *frame = get_Builtin_param(node, 1);
4852 dbg_info *dbgi = get_irn_dbg_info(node);
4853 ir_tarval *tv = get_Const_tarval(param);
4854 ir_graph *irg = get_irn_irg(node);
4855 unsigned long value = get_tarval_long(tv);
4857 ir_node *block = be_transform_node(get_nodes_block(node));
4858 ir_node *ptr = be_transform_node(frame);
4863 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4864 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4865 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4868 /* load the frame address from this frame */
4869 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4871 set_irn_pinned(load, get_irn_pinned(node));
4872 set_ia32_op_type(load, ia32_AddrModeS);
4873 set_ia32_ls_mode(load, mode_Iu);
4875 ent = ia32_get_frame_address_entity(irg);
4877 set_ia32_am_offs_int(load, 0);
4878 set_ia32_use_frame(load);
4879 set_ia32_frame_ent(load, ent);
4881 /* will fail anyway, but gcc does this: */
4882 set_ia32_am_offs_int(load, 0);
4885 if (get_irn_pinned(node) == op_pin_state_floats) {
4886 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
4887 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
4888 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4889 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4892 SET_IA32_ORIG_NODE(load, node);
4893 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4897 * Transform Builtin frame_address
4899 static ir_node *gen_prefetch(ir_node *node)
4902 ir_node *ptr, *block, *mem, *base, *idx;
4903 ir_node *param, *new_node;
4906 ia32_address_t addr;
4908 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4909 /* no prefetch at all, route memory */
4910 return be_transform_node(get_Builtin_mem(node));
4913 param = get_Builtin_param(node, 1);
4914 tv = get_Const_tarval(param);
4915 rw = get_tarval_long(tv);
4917 /* construct load address */
4918 memset(&addr, 0, sizeof(addr));
4919 ptr = get_Builtin_param(node, 0);
4920 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4927 base = be_transform_node(base);
4933 idx = be_transform_node(idx);
4936 dbgi = get_irn_dbg_info(node);
4937 block = be_transform_node(get_nodes_block(node));
4938 mem = be_transform_node(get_Builtin_mem(node));
4940 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4941 /* we have 3DNow!, this was already checked above */
4942 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4943 } else if (ia32_cg_config.use_sse_prefetch) {
4944 /* note: rw == 1 is IGNORED in that case */
4945 param = get_Builtin_param(node, 2);
4946 tv = get_Const_tarval(param);
4947 locality = get_tarval_long(tv);
4949 /* SSE style prefetch */
4952 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
4955 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
4958 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
4961 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
4965 assert(ia32_cg_config.use_3dnow_prefetch);
4966 /* 3DNow! style prefetch */
4967 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
4970 set_irn_pinned(new_node, get_irn_pinned(node));
4971 set_ia32_op_type(new_node, ia32_AddrModeS);
4972 set_ia32_ls_mode(new_node, mode_Bu);
4973 set_address(new_node, &addr);
4975 SET_IA32_ORIG_NODE(new_node, node);
4977 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4981 * Transform bsf like node
4983 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4985 ir_node *param = get_Builtin_param(node, 0);
4986 dbg_info *dbgi = get_irn_dbg_info(node);
4988 ir_node *block = get_nodes_block(node);
4989 ir_node *new_block = be_transform_node(block);
4991 ia32_address_mode_t am;
4992 ia32_address_t *addr = &am.addr;
4995 match_arguments(&am, block, NULL, param, NULL, match_am);
4997 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4998 set_am_attributes(cnt, &am);
4999 set_ia32_ls_mode(cnt, get_irn_mode(param));
5001 SET_IA32_ORIG_NODE(cnt, node);
5002 return fix_mem_proj(cnt, &am);
5006 * Transform builtin ffs.
5008 static ir_node *gen_ffs(ir_node *node)
5010 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5011 ir_node *real = skip_Proj(bsf);
5012 dbg_info *dbgi = get_irn_dbg_info(real);
5013 ir_node *block = get_nodes_block(real);
5014 ir_node *flag, *set, *conv, *neg, *orn, *add;
5017 if (get_irn_mode(real) != mode_T) {
5018 set_irn_mode(real, mode_T);
5019 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5022 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5025 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5026 SET_IA32_ORIG_NODE(set, node);
5029 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5030 SET_IA32_ORIG_NODE(conv, node);
5033 neg = new_bd_ia32_Neg(dbgi, block, conv);
5036 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5037 set_ia32_ls_mode(orn, mode_Iu);
5038 set_ia32_commutative(orn);
5041 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5042 add_ia32_am_offs_int(add, 1);
5047 * Transform builtin clz.
5049 static ir_node *gen_clz(ir_node *node)
5051 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5052 ir_node *real = skip_Proj(bsr);
5053 dbg_info *dbgi = get_irn_dbg_info(real);
5054 ir_node *block = get_nodes_block(real);
5055 ir_graph *irg = get_Block_irg(block);
5056 ir_node *imm = ia32_create_Immediate(irg, NULL, 0, 31);
5058 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5062 * Transform builtin ctz.
5064 static ir_node *gen_ctz(ir_node *node)
5066 return gen_unop_AM(node, new_bd_ia32_Bsf);
5070 * Transform builtin parity.
5072 static ir_node *gen_parity(ir_node *node)
5074 dbg_info *dbgi = get_irn_dbg_info(node);
5075 ir_node *block = get_nodes_block(node);
5076 ir_node *new_block = be_transform_node(block);
5077 ir_node *param = get_Builtin_param(node, 0);
5078 ir_node *new_param = be_transform_node(param);
5081 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5082 * so we have to do complicated xoring first.
5083 * (we should also better lower this before the backend so we still have a
5084 * chance for CSE, constant folding and other goodies for some of these
5087 ir_graph *const irg = get_Block_irg(new_block);
5088 ir_node *const count = ia32_create_Immediate(irg, NULL, 0, 16);
5089 ir_node *const shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5090 ir_node *const xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem, shr, new_param);
5091 ir_node *const xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xorn);
5094 set_ia32_ls_mode(xorn, mode_Iu);
5095 set_ia32_commutative(xorn);
5097 set_irn_mode(xor2, mode_T);
5098 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5101 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5102 SET_IA32_ORIG_NODE(new_node, node);
5105 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5106 nomem, new_node, mode_Bu);
5107 SET_IA32_ORIG_NODE(new_node, node);
5112 * Transform builtin popcount
5114 static ir_node *gen_popcount(ir_node *node)
5116 ir_node *param = get_Builtin_param(node, 0);
5117 dbg_info *dbgi = get_irn_dbg_info(node);
5119 ir_node *block = get_nodes_block(node);
5120 ir_node *new_block = be_transform_node(block);
5123 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5125 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5126 if (ia32_cg_config.use_popcnt) {
5127 ia32_address_mode_t am;
5128 ia32_address_t *addr = &am.addr;
5131 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am | match_upconv);
5133 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5134 set_am_attributes(cnt, &am);
5135 set_ia32_ls_mode(cnt, get_irn_mode(param));
5137 SET_IA32_ORIG_NODE(cnt, node);
5138 return fix_mem_proj(cnt, &am);
5141 new_param = be_transform_node(param);
5143 /* do the standard popcount algo */
5144 /* TODO: This is stupid, we should transform this before the backend,
5145 * to get CSE, localopts, etc. for the operations
5146 * TODO: This is also not the optimal algorithm (it is just the starting
5147 * example in hackers delight, they optimize it more on the following page)
5148 * But I'm too lazy to fix this now, as the code should get lowered before
5149 * the backend anyway.
5151 ir_graph *const irg = get_Block_irg(new_block);
5153 /* m1 = x & 0x55555555 */
5154 imm = ia32_create_Immediate(irg, NULL, 0, 0x55555555);
5155 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5158 simm = ia32_create_Immediate(irg, NULL, 0, 1);
5159 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5161 /* m2 = s1 & 0x55555555 */
5162 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5165 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5167 /* m4 = m3 & 0x33333333 */
5168 imm = ia32_create_Immediate(irg, NULL, 0, 0x33333333);
5169 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5172 simm = ia32_create_Immediate(irg, NULL, 0, 2);
5173 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5175 /* m5 = s2 & 0x33333333 */
5176 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5179 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5181 /* m7 = m6 & 0x0F0F0F0F */
5182 imm = ia32_create_Immediate(irg, NULL, 0, 0x0F0F0F0F);
5183 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5186 simm = ia32_create_Immediate(irg, NULL, 0, 4);
5187 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5189 /* m8 = s3 & 0x0F0F0F0F */
5190 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5193 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5195 /* m10 = m9 & 0x00FF00FF */
5196 imm = ia32_create_Immediate(irg, NULL, 0, 0x00FF00FF);
5197 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5200 simm = ia32_create_Immediate(irg, NULL, 0, 8);
5201 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5203 /* m11 = s4 & 0x00FF00FF */
5204 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5206 /* m12 = m10 + m11 */
5207 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5209 /* m13 = m12 & 0x0000FFFF */
5210 imm = ia32_create_Immediate(irg, NULL, 0, 0x0000FFFF);
5211 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5213 /* s5 = m12 >> 16 */
5214 simm = ia32_create_Immediate(irg, NULL, 0, 16);
5215 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5217 /* res = m13 + s5 */
5218 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5222 * Transform builtin byte swap.
5224 static ir_node *gen_bswap(ir_node *node)
5226 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5227 dbg_info *dbgi = get_irn_dbg_info(node);
5229 ir_node *block = get_nodes_block(node);
5230 ir_node *new_block = be_transform_node(block);
5231 ir_mode *mode = get_irn_mode(param);
5232 unsigned size = get_mode_size_bits(mode);
5236 if (ia32_cg_config.use_bswap) {
5237 /* swap available */
5238 return new_bd_ia32_Bswap(dbgi, new_block, param);
5240 ir_graph *const irg = get_Block_irg(new_block);
5241 ir_node *const i8 = ia32_create_Immediate(irg, NULL, 0, 8);
5242 ir_node *const rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5243 ir_node *const i16 = ia32_create_Immediate(irg, NULL, 0, 16);
5244 ir_node *const rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5245 ir_node *const rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5246 set_ia32_ls_mode(rol1, mode_Hu);
5247 set_ia32_ls_mode(rol2, mode_Iu);
5248 set_ia32_ls_mode(rol3, mode_Hu);
5253 /* swap16 always available */
5254 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5257 panic("Invalid bswap size (%d)", size);
5262 * Transform builtin outport.
5264 static ir_node *gen_outport(ir_node *node)
5266 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0));
5267 ir_node *oldv = get_Builtin_param(node, 1);
5268 ir_mode *mode = get_irn_mode(oldv);
5269 ir_node *value = be_transform_node(oldv);
5270 ir_node *block = be_transform_node(get_nodes_block(node));
5271 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5272 dbg_info *dbgi = get_irn_dbg_info(node);
5274 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5275 set_ia32_ls_mode(res, mode);
5280 * Transform builtin inport.
5282 static ir_node *gen_inport(ir_node *node)
5284 ir_type *tp = get_Builtin_type(node);
5285 ir_type *rstp = get_method_res_type(tp, 0);
5286 ir_mode *mode = get_type_mode(rstp);
5287 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0));
5288 ir_node *block = be_transform_node(get_nodes_block(node));
5289 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5290 dbg_info *dbgi = get_irn_dbg_info(node);
5292 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5293 set_ia32_ls_mode(res, mode);
5295 /* check for missing Result Proj */
5300 * Transform a builtin inner trampoline
5302 static ir_node *gen_inner_trampoline(ir_node *node)
5304 ir_node *ptr = get_Builtin_param(node, 0);
5305 ir_node *callee = get_Builtin_param(node, 1);
5306 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5307 ir_node *mem = get_Builtin_mem(node);
5308 ir_node *block = get_nodes_block(node);
5309 ir_node *new_block = be_transform_node(block);
5313 ir_node *trampoline;
5315 dbg_info *dbgi = get_irn_dbg_info(node);
5316 ia32_address_t addr;
5318 /* construct store address */
5319 memset(&addr, 0, sizeof(addr));
5320 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5322 if (addr.base == NULL) {
5323 addr.base = noreg_GP;
5325 addr.base = be_transform_node(addr.base);
5328 if (addr.index == NULL) {
5329 addr.index = noreg_GP;
5331 addr.index = be_transform_node(addr.index);
5333 addr.mem = be_transform_node(mem);
5335 ir_graph *const irg = get_Block_irg(new_block);
5336 /* mov ecx, <env> */
5337 val = ia32_create_Immediate(irg, NULL, 0, 0xB9);
5338 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5339 addr.index, addr.mem, val);
5340 set_irn_pinned(store, get_irn_pinned(node));
5341 set_ia32_op_type(store, ia32_AddrModeD);
5342 set_ia32_ls_mode(store, mode_Bu);
5343 set_address(store, &addr);
5347 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5348 addr.index, addr.mem, env);
5349 set_irn_pinned(store, get_irn_pinned(node));
5350 set_ia32_op_type(store, ia32_AddrModeD);
5351 set_ia32_ls_mode(store, mode_Iu);
5352 set_address(store, &addr);
5356 /* jmp rel <callee> */
5357 val = ia32_create_Immediate(irg, NULL, 0, 0xE9);
5358 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5359 addr.index, addr.mem, val);
5360 set_irn_pinned(store, get_irn_pinned(node));
5361 set_ia32_op_type(store, ia32_AddrModeD);
5362 set_ia32_ls_mode(store, mode_Bu);
5363 set_address(store, &addr);
5367 trampoline = be_transform_node(ptr);
5369 /* the callee is typically an immediate */
5370 if (is_SymConst(callee)) {
5371 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5373 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5374 add_ia32_am_offs_int(rel, -10);
5376 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5378 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5379 addr.index, addr.mem, rel);
5380 set_irn_pinned(store, get_irn_pinned(node));
5381 set_ia32_op_type(store, ia32_AddrModeD);
5382 set_ia32_ls_mode(store, mode_Iu);
5383 set_address(store, &addr);
5388 return new_r_Tuple(new_block, 2, in);
5392 * Transform Builtin node.
5394 static ir_node *gen_Builtin(ir_node *node)
5396 ir_builtin_kind kind = get_Builtin_kind(node);
5400 return gen_trap(node);
5401 case ir_bk_debugbreak:
5402 return gen_debugbreak(node);
5403 case ir_bk_return_address:
5404 return gen_return_address(node);
5405 case ir_bk_frame_address:
5406 return gen_frame_address(node);
5407 case ir_bk_prefetch:
5408 return gen_prefetch(node);
5410 return gen_ffs(node);
5412 return gen_clz(node);
5414 return gen_ctz(node);
5416 return gen_parity(node);
5417 case ir_bk_popcount:
5418 return gen_popcount(node);
5420 return gen_bswap(node);
5422 return gen_outport(node);
5424 return gen_inport(node);
5425 case ir_bk_inner_trampoline:
5426 return gen_inner_trampoline(node);
5428 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5432 * Transform Proj(Builtin) node.
5434 static ir_node *gen_Proj_Builtin(ir_node *proj)
5436 ir_node *node = get_Proj_pred(proj);
5437 ir_node *new_node = be_transform_node(node);
5438 ir_builtin_kind kind = get_Builtin_kind(node);
5441 case ir_bk_return_address:
5442 case ir_bk_frame_address:
5447 case ir_bk_popcount:
5449 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5452 case ir_bk_debugbreak:
5453 case ir_bk_prefetch:
5455 assert(get_Proj_proj(proj) == pn_Builtin_M);
5458 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5459 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5461 assert(get_Proj_proj(proj) == pn_Builtin_M);
5462 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5464 case ir_bk_inner_trampoline:
5465 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5466 return get_Tuple_pred(new_node, 1);
5468 assert(get_Proj_proj(proj) == pn_Builtin_M);
5469 return get_Tuple_pred(new_node, 0);
5472 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5475 static ir_node *gen_be_IncSP(ir_node *node)
5477 ir_node *res = be_duplicate_node(node);
5478 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5484 * Transform the Projs from a be_Call.
5486 static ir_node *gen_Proj_be_Call(ir_node *node)
5488 ir_node *call = get_Proj_pred(node);
5489 ir_node *new_call = be_transform_node(call);
5490 dbg_info *dbgi = get_irn_dbg_info(node);
5491 long proj = get_Proj_proj(node);
5492 ir_mode *mode = get_irn_mode(node);
5495 if (proj == pn_be_Call_M) {
5496 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5498 /* transform call modes */
5499 if (mode_is_data(mode)) {
5500 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5504 /* Map from be_Call to ia32_Call proj number */
5505 if (proj == pn_be_Call_sp) {
5506 proj = pn_ia32_Call_stack;
5507 } else if (proj == pn_be_Call_M) {
5508 proj = pn_ia32_Call_M;
5509 } else if (proj == pn_be_Call_X_except) {
5510 proj = pn_ia32_Call_X_except;
5511 } else if (proj == pn_be_Call_X_regular) {
5512 proj = pn_ia32_Call_X_regular;
5514 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5515 int const n_outs = arch_get_irn_n_outs(new_call);
5518 assert(proj >= pn_be_Call_first_res);
5519 assert(arch_register_req_is(req, limited));
5521 for (i = 0; i < n_outs; ++i) {
5522 arch_register_req_t const *const new_req = arch_get_irn_register_req_out(new_call, i);
5523 if (!arch_register_req_is(new_req, limited) ||
5524 new_req->cls != req->cls ||
5525 *new_req->limited != *req->limited)
5534 res = new_rd_Proj(dbgi, new_call, mode, proj);
5536 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5538 case pn_ia32_Call_stack:
5539 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5542 case pn_ia32_Call_fpcw:
5543 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5550 static ir_node *gen_Proj_ASM(ir_node *node)
5552 ir_mode *mode = get_irn_mode(node);
5553 ir_node *pred = get_Proj_pred(node);
5554 ir_node *new_pred = be_transform_node(pred);
5555 long pos = get_Proj_proj(node);
5557 if (mode == mode_M) {
5558 pos = arch_get_irn_n_outs(new_pred)-1;
5559 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5561 } else if (mode_is_float(mode)) {
5564 panic("unexpected proj mode at ASM");
5567 return new_r_Proj(new_pred, mode, pos);
5571 * Transform and potentially renumber Proj nodes.
5573 static ir_node *gen_Proj(ir_node *node)
5575 ir_node *pred = get_Proj_pred(node);
5578 switch (get_irn_opcode(pred)) {
5580 return gen_Proj_Load(node);
5582 return gen_Proj_Store(node);
5584 return gen_Proj_ASM(node);
5586 return gen_Proj_Builtin(node);
5588 return gen_Proj_Div(node);
5590 return gen_Proj_Mod(node);
5592 return gen_Proj_CopyB(node);
5594 return gen_Proj_be_SubSP(node);
5596 return gen_Proj_be_AddSP(node);
5598 return gen_Proj_be_Call(node);
5600 proj = get_Proj_proj(node);
5602 case pn_Start_X_initial_exec: {
5603 ir_node *block = get_nodes_block(pred);
5604 ir_node *new_block = be_transform_node(block);
5605 dbg_info *dbgi = get_irn_dbg_info(node);
5606 /* we exchange the ProjX with a jump */
5607 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5615 if (is_ia32_l_FloattoLL(pred)) {
5616 return gen_Proj_l_FloattoLL(node);
5618 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5622 ir_mode *mode = get_irn_mode(node);
5623 if (ia32_mode_needs_gp_reg(mode)) {
5624 ir_node *new_pred = be_transform_node(pred);
5625 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5626 get_Proj_proj(node));
5627 new_proj->node_nr = node->node_nr;
5632 return be_duplicate_node(node);
5636 * Enters all transform functions into the generic pointer
5638 static void register_transformers(void)
5640 /* first clear the generic function pointer for all ops */
5641 be_start_transform_setup();
5643 be_set_transform_function(op_Add, gen_Add);
5644 be_set_transform_function(op_And, gen_And);
5645 be_set_transform_function(op_ASM, ia32_gen_ASM);
5646 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5647 be_set_transform_function(op_be_Call, gen_be_Call);
5648 be_set_transform_function(op_be_Copy, gen_be_Copy);
5649 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5650 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5651 be_set_transform_function(op_be_Return, gen_be_Return);
5652 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5653 be_set_transform_function(op_Builtin, gen_Builtin);
5654 be_set_transform_function(op_Cmp, gen_Cmp);
5655 be_set_transform_function(op_Cond, gen_Cond);
5656 be_set_transform_function(op_Const, gen_Const);
5657 be_set_transform_function(op_Conv, gen_Conv);
5658 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5659 be_set_transform_function(op_Div, gen_Div);
5660 be_set_transform_function(op_Eor, gen_Eor);
5661 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5662 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5663 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5664 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5665 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5666 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5667 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5668 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5669 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5670 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5671 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5672 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5673 be_set_transform_function(op_ia32_NoReg_FP, be_duplicate_node);
5674 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5675 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5676 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5677 be_set_transform_function(op_IJmp, gen_IJmp);
5678 be_set_transform_function(op_Jmp, gen_Jmp);
5679 be_set_transform_function(op_Load, gen_Load);
5680 be_set_transform_function(op_Minus, gen_Minus);
5681 be_set_transform_function(op_Mod, gen_Mod);
5682 be_set_transform_function(op_Mul, gen_Mul);
5683 be_set_transform_function(op_Mulh, gen_Mulh);
5684 be_set_transform_function(op_Mux, gen_Mux);
5685 be_set_transform_function(op_Not, gen_Not);
5686 be_set_transform_function(op_Or, gen_Or);
5687 be_set_transform_function(op_Phi, gen_Phi);
5688 be_set_transform_function(op_Proj, gen_Proj);
5689 be_set_transform_function(op_Rotl, gen_Rotl);
5690 be_set_transform_function(op_Shl, gen_Shl);
5691 be_set_transform_function(op_Shr, gen_Shr);
5692 be_set_transform_function(op_Shrs, gen_Shrs);
5693 be_set_transform_function(op_Store, gen_Store);
5694 be_set_transform_function(op_Sub, gen_Sub);
5695 be_set_transform_function(op_Switch, gen_Switch);
5696 be_set_transform_function(op_SymConst, gen_SymConst);
5697 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5699 be_set_upper_bits_clean_function(op_Mux, ia32_mux_upper_bits_clean);
5703 * Pre-transform all unknown and noreg nodes.
5705 static void ia32_pretransform_node(void)
5707 ir_graph *irg = current_ir_graph;
5708 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
5710 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5711 irg_data->noreg_fp = be_pre_transform_node(irg_data->noreg_fp);
5712 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5713 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5714 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5716 nomem = get_irg_no_mem(irg);
5717 noreg_GP = ia32_new_NoReg_gp(irg);
5721 * Post-process all calls if we are in SSE mode.
5722 * The ABI requires that the results are in st0, copy them
5723 * to a xmm register.
5725 static void postprocess_fp_call_results(void)
5729 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5730 ir_node *call = call_list[i];
5731 ir_type *mtp = call_types[i];
5734 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5735 ir_type *res_tp = get_method_res_type(mtp, j);
5736 ir_node *res, *new_res;
5739 if (! is_atomic_type(res_tp)) {
5740 /* no floating point return */
5743 res_mode = get_type_mode(res_tp);
5744 if (! mode_is_float(res_mode)) {
5745 /* no floating point return */
5749 res = be_get_Proj_for_pn(call, pn_ia32_Call_st0 + j);
5752 /* now patch the users */
5753 foreach_out_edge_safe(res, edge) {
5754 ir_node *succ = get_edge_src_irn(edge);
5757 if (be_is_Keep(succ))
5760 if (is_ia32_xStore(succ)) {
5761 /* an xStore can be patched into an vfst */
5762 dbg_info *db = get_irn_dbg_info(succ);
5763 ir_node *block = get_nodes_block(succ);
5764 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5765 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5766 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5767 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5768 ir_mode *mode = get_ia32_ls_mode(succ);
5770 ir_node *st = new_bd_ia32_fst(db, block, base, idx, mem, value, mode);
5771 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_fst_M);
5772 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5773 if (is_ia32_use_frame(succ))
5774 set_ia32_use_frame(st);
5775 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5776 set_irn_pinned(st, get_irn_pinned(succ));
5777 set_ia32_op_type(st, ia32_AddrModeD);
5779 assert((long)pn_ia32_xStore_M == (long)pn_ia32_fst_M);
5780 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_fst_X_regular);
5781 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_fst_X_except);
5788 if (new_res == NULL) {
5789 dbg_info *db = get_irn_dbg_info(call);
5790 ir_node *block = get_nodes_block(call);
5791 ir_node *frame = get_irg_frame(current_ir_graph);
5792 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5793 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5794 ir_node *vfst, *xld, *new_mem;
5797 /* store st(0) on stack */
5798 vfst = new_bd_ia32_fst(db, block, frame, noreg_GP, call_mem,
5800 set_ia32_op_type(vfst, ia32_AddrModeD);
5801 set_ia32_use_frame(vfst);
5803 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_fst_M);
5805 /* load into SSE register */
5806 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5808 set_ia32_op_type(xld, ia32_AddrModeS);
5809 set_ia32_use_frame(xld);
5811 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5812 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5814 if (old_mem != NULL) {
5815 edges_reroute(old_mem, new_mem);
5819 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5825 /* do the transformation */
5826 void ia32_transform_graph(ir_graph *irg)
5830 register_transformers();
5831 initial_fpcw = NULL;
5832 ia32_no_pic_adjust = 0;
5834 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5836 be_timer_push(T_HEIGHTS);
5837 ia32_heights = heights_new(irg);
5838 be_timer_pop(T_HEIGHTS);
5839 ia32_calculate_non_address_mode_nodes(irg);
5841 /* the transform phase is not safe for CSE (yet) because several nodes get
5842 * attributes set after their creation */
5843 cse_last = get_opt_cse();
5846 call_list = NEW_ARR_F(ir_node *, 0);
5847 call_types = NEW_ARR_F(ir_type *, 0);
5848 be_transform_graph(irg, ia32_pretransform_node);
5850 if (ia32_cg_config.use_sse2)
5851 postprocess_fp_call_results();
5852 DEL_ARR_F(call_types);
5853 DEL_ARR_F(call_list);
5855 set_opt_cse(cse_last);
5857 ia32_free_non_address_mode_nodes();
5858 heights_free(ia32_heights);
5859 ia32_heights = NULL;
5862 void ia32_init_transform(void)
5864 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");