2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
56 #include "betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
74 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
75 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
77 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
79 static ir_node *old_initial_fpcw = NULL;
80 static ir_node *initial_fpcw = NULL;
81 int ia32_no_pic_adjust;
83 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
84 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
87 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
88 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
91 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
92 ir_node *op1, ir_node *op2);
94 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
95 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
97 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem);
100 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
101 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
104 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
106 static ir_node *create_immediate_or_transform(ir_node *node,
107 char immediate_constraint_type);
109 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
110 dbg_info *dbgi, ir_node *block,
111 ir_node *op, ir_node *orig_node);
113 /* its enough to have those once */
114 static ir_node *nomem, *noreg_GP;
116 /** a list to postprocess all calls */
117 static ir_node **call_list;
118 static ir_type **call_types;
120 /** Return non-zero is a node represents the 0 constant. */
121 static bool is_Const_0(ir_node *node)
123 return is_Const(node) && is_Const_null(node);
126 /** Return non-zero is a node represents the 1 constant. */
127 static bool is_Const_1(ir_node *node)
129 return is_Const(node) && is_Const_one(node);
132 /** Return non-zero is a node represents the -1 constant. */
133 static bool is_Const_Minus_1(ir_node *node)
135 return is_Const(node) && is_Const_all_one(node);
139 * returns true if constant can be created with a simple float command
141 static bool is_simple_x87_Const(ir_node *node)
143 ir_tarval *tv = get_Const_tarval(node);
144 if (tarval_is_null(tv) || tarval_is_one(tv))
147 /* TODO: match all the other float constants */
152 * returns true if constant can be created with a simple float command
154 static bool is_simple_sse_Const(ir_node *node)
156 ir_tarval *tv = get_Const_tarval(node);
157 ir_mode *mode = get_tarval_mode(tv);
162 if (tarval_is_null(tv)
163 #ifdef CONSTRUCT_SSE_CONST
168 #ifdef CONSTRUCT_SSE_CONST
169 if (mode == mode_D) {
170 unsigned val = get_tarval_sub_bits(tv, 0) |
171 (get_tarval_sub_bits(tv, 1) << 8) |
172 (get_tarval_sub_bits(tv, 2) << 16) |
173 (get_tarval_sub_bits(tv, 3) << 24);
175 /* lower 32bit are zero, really a 32bit constant */
178 #endif /* CONSTRUCT_SSE_CONST */
179 /* TODO: match all the other float constants */
184 * return NoREG or pic_base in case of PIC.
185 * This is necessary as base address for newly created symbols
187 static ir_node *get_symconst_base(void)
189 ir_graph *irg = current_ir_graph;
191 if (be_get_irg_options(irg)->pic) {
192 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
193 return arch_env->impl->get_pic_base(irg);
200 * Transforms a Const.
202 static ir_node *gen_Const(ir_node *node)
204 ir_node *old_block = get_nodes_block(node);
205 ir_node *block = be_transform_node(old_block);
206 dbg_info *dbgi = get_irn_dbg_info(node);
207 ir_mode *mode = get_irn_mode(node);
208 ir_tarval *tv = get_Const_tarval(node);
210 assert(is_Const(node));
212 if (mode_is_float(mode)) {
213 ir_graph *irg = get_irn_irg(node);
214 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
215 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
220 if (ia32_cg_config.use_sse2) {
221 if (tarval_is_null(tv)) {
222 load = new_bd_ia32_xZero(dbgi, block);
223 set_ia32_ls_mode(load, mode);
225 #ifdef CONSTRUCT_SSE_CONST
226 } else if (tarval_is_one(tv)) {
227 int cnst = mode == mode_F ? 26 : 55;
228 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
229 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
230 ir_node *pslld, *psrld;
232 load = new_bd_ia32_xAllOnes(dbgi, block);
233 set_ia32_ls_mode(load, mode);
234 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
235 set_ia32_ls_mode(pslld, mode);
236 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
237 set_ia32_ls_mode(psrld, mode);
239 #endif /* CONSTRUCT_SSE_CONST */
240 } else if (mode == mode_F) {
241 /* we can place any 32bit constant by using a movd gp, sse */
242 unsigned val = get_tarval_sub_bits(tv, 0) |
243 (get_tarval_sub_bits(tv, 1) << 8) |
244 (get_tarval_sub_bits(tv, 2) << 16) |
245 (get_tarval_sub_bits(tv, 3) << 24);
246 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
247 load = new_bd_ia32_xMovd(dbgi, block, cnst);
248 set_ia32_ls_mode(load, mode);
252 #ifdef CONSTRUCT_SSE_CONST
253 if (mode == mode_D) {
254 unsigned val = get_tarval_sub_bits(tv, 0) |
255 (get_tarval_sub_bits(tv, 1) << 8) |
256 (get_tarval_sub_bits(tv, 2) << 16) |
257 (get_tarval_sub_bits(tv, 3) << 24);
259 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
260 ir_node *cnst, *psllq;
262 /* fine, lower 32bit are zero, produce 32bit value */
263 val = get_tarval_sub_bits(tv, 4) |
264 (get_tarval_sub_bits(tv, 5) << 8) |
265 (get_tarval_sub_bits(tv, 6) << 16) |
266 (get_tarval_sub_bits(tv, 7) << 24);
267 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
268 load = new_bd_ia32_xMovd(dbgi, block, cnst);
269 set_ia32_ls_mode(load, mode);
270 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
271 set_ia32_ls_mode(psllq, mode);
276 #endif /* CONSTRUCT_SSE_CONST */
277 floatent = ia32_create_float_const_entity(isa, tv, NULL);
279 base = get_symconst_base();
280 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
282 set_ia32_op_type(load, ia32_AddrModeS);
283 set_ia32_am_sc(load, floatent);
284 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
285 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
288 if (tarval_is_null(tv)) {
289 load = new_bd_ia32_vfldz(dbgi, block);
291 set_ia32_ls_mode(load, mode);
292 } else if (tarval_is_one(tv)) {
293 load = new_bd_ia32_vfld1(dbgi, block);
295 set_ia32_ls_mode(load, mode);
300 floatent = ia32_create_float_const_entity(isa, tv, NULL);
301 /* create_float_const_ent is smart and sometimes creates
303 ls_mode = get_type_mode(get_entity_type(floatent));
304 base = get_symconst_base();
305 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
307 set_ia32_op_type(load, ia32_AddrModeS);
308 set_ia32_am_sc(load, floatent);
309 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
310 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
313 #ifdef CONSTRUCT_SSE_CONST
315 #endif /* CONSTRUCT_SSE_CONST */
316 SET_IA32_ORIG_NODE(load, node);
318 } else { /* non-float mode */
322 tv = tarval_convert_to(tv, mode_Iu);
324 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
326 panic("couldn't convert constant tarval (%+F)", node);
328 val = get_tarval_long(tv);
330 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
331 SET_IA32_ORIG_NODE(cnst, node);
338 * Transforms a SymConst.
340 static ir_node *gen_SymConst(ir_node *node)
342 ir_node *old_block = get_nodes_block(node);
343 ir_node *block = be_transform_node(old_block);
344 dbg_info *dbgi = get_irn_dbg_info(node);
345 ir_mode *mode = get_irn_mode(node);
348 if (mode_is_float(mode)) {
349 if (ia32_cg_config.use_sse2)
350 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
352 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
353 set_ia32_am_sc(cnst, get_SymConst_entity(node));
354 set_ia32_use_frame(cnst);
358 if (get_SymConst_kind(node) != symconst_addr_ent) {
359 panic("backend only support symconst_addr_ent (at %+F)", node);
361 entity = get_SymConst_entity(node);
362 if (get_entity_owner(entity) == get_tls_type()) {
363 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
364 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
365 set_ia32_am_sc(lea, entity);
368 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
372 SET_IA32_ORIG_NODE(cnst, node);
377 static ir_type *make_array_type(ir_type *tp)
379 unsigned alignment = get_type_alignment_bytes(tp);
380 unsigned size = get_type_size_bytes(tp);
381 ir_type *res = new_type_array(1, tp);
382 set_type_alignment_bytes(res, alignment);
383 set_array_bounds_int(res, 0, 0, 2);
384 if (alignment > size)
386 set_type_size_bytes(res, 2 * size);
387 set_type_state(res, layout_fixed);
392 * Create a float[2] array type for the given atomic type.
394 * @param tp the atomic type
396 static ir_type *ia32_create_float_array(ir_type *tp)
398 ir_mode *mode = get_type_mode(tp);
401 if (mode == mode_F) {
402 static ir_type *float_F;
406 arr = float_F = make_array_type(tp);
407 } else if (mode == mode_D) {
408 static ir_type *float_D;
412 arr = float_D = make_array_type(tp);
414 static ir_type *float_E;
418 arr = float_E = make_array_type(tp);
423 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
424 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
426 static const struct {
428 const char *cnst_str;
430 } names [ia32_known_const_max] = {
431 { "C_sfp_sign", "0x80000000", 0 },
432 { "C_dfp_sign", "0x8000000000000000", 1 },
433 { "C_sfp_abs", "0x7FFFFFFF", 0 },
434 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
435 { "C_ull_bias", "0x10000000000000000", 2 }
437 static ir_entity *ent_cache[ia32_known_const_max];
439 ir_entity *ent = ent_cache[kct];
442 ir_graph *irg = current_ir_graph;
443 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
444 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
445 const char *cnst_str = names[kct].cnst_str;
446 ident *name = new_id_from_str(names[kct].name);
449 switch (names[kct].mode) {
450 case 0: mode = mode_Iu; break;
451 case 1: mode = mode_Lu; break;
452 case 2: mode = mode_F; break;
453 default: panic("internal compiler error (ia32_gen_fp_known_const)");
455 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
457 if (kct == ia32_ULLBIAS) {
458 ir_type *type = ia32_get_prim_type(mode_F);
459 ir_type *atype = ia32_create_float_array(type);
460 ir_initializer_t *initializer;
462 ent = new_entity(get_glob_type(), name, atype);
464 set_entity_ld_ident(ent, name);
465 set_entity_visibility(ent, ir_visibility_private);
466 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
468 initializer = create_initializer_compound(2);
469 set_initializer_compound_value(initializer, 0,
470 create_initializer_tarval(get_mode_null(mode)));
471 set_initializer_compound_value(initializer, 1,
472 create_initializer_tarval(tv));
473 set_entity_initializer(ent, initializer);
475 ent = ia32_create_float_const_entity(isa, tv, name);
477 /* cache the entry */
478 ent_cache[kct] = ent;
481 return ent_cache[kct];
485 * return true if the node is a Proj(Load) and could be used in source address
486 * mode for another node. Will return only true if the @p other node is not
487 * dependent on the memory of the Load (for binary operations use the other
488 * input here, for unary operations use NULL).
490 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
491 ir_node *other, ir_node *other2, match_flags_t flags)
496 /* float constants are always available */
497 if (is_Const(node)) {
498 ir_mode *mode = get_irn_mode(node);
499 if (mode_is_float(mode)) {
500 if (ia32_cg_config.use_sse2) {
501 if (is_simple_sse_Const(node))
504 if (is_simple_x87_Const(node))
507 if (get_irn_n_edges(node) > 1)
515 load = get_Proj_pred(node);
516 pn = get_Proj_proj(node);
517 if (!is_Load(load) || pn != pn_Load_res)
519 if (get_nodes_block(load) != block)
521 /* we only use address mode if we're the only user of the load */
522 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
524 /* in some edge cases with address mode we might reach the load normally
525 * and through some AM sequence, if it is already materialized then we
526 * can't create an AM node from it */
527 if (be_is_transformed(node))
530 /* don't do AM if other node inputs depend on the load (via mem-proj) */
531 if (other != NULL && ia32_prevents_AM(block, load, other))
534 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
540 typedef struct ia32_address_mode_t ia32_address_mode_t;
541 struct ia32_address_mode_t {
546 ia32_op_type_t op_type;
550 unsigned commutative : 1;
551 unsigned ins_permuted : 1;
554 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
556 /* construct load address */
557 memset(addr, 0, sizeof(addr[0]));
558 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
560 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
561 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
562 addr->mem = be_transform_node(mem);
565 static void build_address(ia32_address_mode_t *am, ir_node *node,
566 ia32_create_am_flags_t flags)
568 ia32_address_t *addr = &am->addr;
574 /* floating point immediates */
575 if (is_Const(node)) {
576 ir_graph *irg = get_irn_irg(node);
577 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
578 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
579 ir_tarval *tv = get_Const_tarval(node);
580 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
581 addr->base = get_symconst_base();
582 addr->index = noreg_GP;
584 addr->symconst_ent = entity;
585 addr->tls_segment = false;
587 am->ls_mode = get_type_mode(get_entity_type(entity));
588 am->pinned = op_pin_state_floats;
592 load = get_Proj_pred(node);
593 ptr = get_Load_ptr(load);
594 mem = get_Load_mem(load);
595 new_mem = be_transform_node(mem);
596 am->pinned = get_irn_pinned(load);
597 am->ls_mode = get_Load_mode(load);
598 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
601 /* construct load address */
602 ia32_create_address_mode(addr, ptr, flags);
604 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
605 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
609 static void set_address(ir_node *node, const ia32_address_t *addr)
611 set_ia32_am_scale(node, addr->scale);
612 set_ia32_am_sc(node, addr->symconst_ent);
613 set_ia32_am_offs_int(node, addr->offset);
614 set_ia32_am_tls_segment(node, addr->tls_segment);
615 if (addr->symconst_sign)
616 set_ia32_am_sc_sign(node);
618 set_ia32_use_frame(node);
619 set_ia32_frame_ent(node, addr->frame_entity);
623 * Apply attributes of a given address mode to a node.
625 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
627 set_address(node, &am->addr);
629 set_ia32_op_type(node, am->op_type);
630 set_ia32_ls_mode(node, am->ls_mode);
631 if (am->pinned == op_pin_state_pinned) {
632 /* beware: some nodes are already pinned and did not allow to change the state */
633 if (get_irn_pinned(node) != op_pin_state_pinned)
634 set_irn_pinned(node, op_pin_state_pinned);
637 set_ia32_commutative(node);
641 * Check, if a given node is a Down-Conv, ie. a integer Conv
642 * from a mode with a mode with more bits to a mode with lesser bits.
643 * Moreover, we return only true if the node has not more than 1 user.
645 * @param node the node
646 * @return non-zero if node is a Down-Conv
648 static int is_downconv(const ir_node *node)
656 /* we only want to skip the conv when we're the only user
657 * (because this test is used in the context of address-mode selection
658 * and we don't want to use address mode for multiple users) */
659 if (get_irn_n_edges(node) > 1)
662 src_mode = get_irn_mode(get_Conv_op(node));
663 dest_mode = get_irn_mode(node);
665 ia32_mode_needs_gp_reg(src_mode) &&
666 ia32_mode_needs_gp_reg(dest_mode) &&
667 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
670 /** Skip all Down-Conv's on a given node and return the resulting node. */
671 ir_node *ia32_skip_downconv(ir_node *node)
673 while (is_downconv(node))
674 node = get_Conv_op(node);
679 static bool is_sameconv(ir_node *node)
687 /* we only want to skip the conv when we're the only user
688 * (because this test is used in the context of address-mode selection
689 * and we don't want to use address mode for multiple users) */
690 if (get_irn_n_edges(node) > 1)
693 src_mode = get_irn_mode(get_Conv_op(node));
694 dest_mode = get_irn_mode(node);
696 ia32_mode_needs_gp_reg(src_mode) &&
697 ia32_mode_needs_gp_reg(dest_mode) &&
698 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
701 /** Skip all signedness convs */
702 static ir_node *ia32_skip_sameconv(ir_node *node)
704 while (is_sameconv(node))
705 node = get_Conv_op(node);
710 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
712 ir_mode *mode = get_irn_mode(node);
717 if (mode_is_signed(mode)) {
722 block = get_nodes_block(node);
723 dbgi = get_irn_dbg_info(node);
725 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
729 * matches operands of a node into ia32 addressing/operand modes. This covers
730 * usage of source address mode, immediates, operations with non 32-bit modes,
732 * The resulting data is filled into the @p am struct. block is the block
733 * of the node whose arguments are matched. op1, op2 are the first and second
734 * input that are matched (op1 may be NULL). other_op is another unrelated
735 * input that is not matched! but which is needed sometimes to check if AM
736 * for op1/op2 is legal.
737 * @p flags describes the supported modes of the operation in detail.
739 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
740 ir_node *op1, ir_node *op2, ir_node *other_op,
743 ia32_address_t *addr = &am->addr;
744 ir_mode *mode = get_irn_mode(op2);
745 int mode_bits = get_mode_size_bits(mode);
746 ir_node *new_op1, *new_op2;
748 unsigned commutative;
749 int use_am_and_immediates;
752 memset(am, 0, sizeof(am[0]));
754 commutative = (flags & match_commutative) != 0;
755 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
756 use_am = (flags & match_am) != 0;
757 use_immediate = (flags & match_immediate) != 0;
758 assert(!use_am_and_immediates || use_immediate);
761 assert(!commutative || op1 != NULL);
762 assert(use_am || !(flags & match_8bit_am));
763 assert(use_am || !(flags & match_16bit_am));
765 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
766 (mode_bits == 16 && !(flags & match_16bit_am))) {
770 /* we can simply skip downconvs for mode neutral nodes: the upper bits
771 * can be random for these operations */
772 if (flags & match_mode_neutral) {
773 op2 = ia32_skip_downconv(op2);
775 op1 = ia32_skip_downconv(op1);
778 op2 = ia32_skip_sameconv(op2);
780 op1 = ia32_skip_sameconv(op1);
784 /* match immediates. firm nodes are normalized: constants are always on the
787 if (!(flags & match_try_am) && use_immediate) {
788 new_op2 = ia32_try_create_Immediate(op2, 0);
791 if (new_op2 == NULL &&
792 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
793 build_address(am, op2, ia32_create_am_normal);
794 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
795 if (mode_is_float(mode)) {
796 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
800 am->op_type = ia32_AddrModeS;
801 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
803 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
805 build_address(am, op1, ia32_create_am_normal);
807 if (mode_is_float(mode)) {
808 noreg = ia32_new_NoReg_vfp(current_ir_graph);
813 if (new_op2 != NULL) {
816 new_op1 = be_transform_node(op2);
818 am->ins_permuted = true;
820 am->op_type = ia32_AddrModeS;
822 am->op_type = ia32_Normal;
824 if (flags & match_try_am) {
830 mode = get_irn_mode(op2);
831 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
832 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
834 new_op2 = create_upconv(op2, NULL);
835 am->ls_mode = mode_Iu;
837 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
839 new_op2 = be_transform_node(op2);
840 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
843 if (addr->base == NULL)
844 addr->base = noreg_GP;
845 if (addr->index == NULL)
846 addr->index = noreg_GP;
847 if (addr->mem == NULL)
850 am->new_op1 = new_op1;
851 am->new_op2 = new_op2;
852 am->commutative = commutative;
856 * "Fixes" a node that uses address mode by turning it into mode_T
857 * and returning a pn_ia32_res Proj.
859 * @param node the node
860 * @param am its address mode
862 * @return a Proj(pn_ia32_res) if a memory address mode is used,
865 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
870 if (am->mem_proj == NULL)
873 /* we have to create a mode_T so the old MemProj can attach to us */
874 mode = get_irn_mode(node);
875 load = get_Proj_pred(am->mem_proj);
877 be_set_transformed_node(load, node);
879 if (mode != mode_T) {
880 set_irn_mode(node, mode_T);
881 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
888 * Construct a standard binary operation, set AM and immediate if required.
890 * @param node The original node for which the binop is created
891 * @param op1 The first operand
892 * @param op2 The second operand
893 * @param func The node constructor function
894 * @return The constructed ia32 node.
896 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
897 construct_binop_func *func, match_flags_t flags)
900 ir_node *block, *new_block, *new_node;
901 ia32_address_mode_t am;
902 ia32_address_t *addr = &am.addr;
904 block = get_nodes_block(node);
905 match_arguments(&am, block, op1, op2, NULL, flags);
907 dbgi = get_irn_dbg_info(node);
908 new_block = be_transform_node(block);
909 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
910 am.new_op1, am.new_op2);
911 set_am_attributes(new_node, &am);
912 /* we can't use source address mode anymore when using immediates */
913 if (!(flags & match_am_and_immediates) &&
914 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
915 set_ia32_am_support(new_node, ia32_am_none);
916 SET_IA32_ORIG_NODE(new_node, node);
918 new_node = fix_mem_proj(new_node, &am);
924 * Generic names for the inputs of an ia32 binary op.
927 n_ia32_l_binop_left, /**< ia32 left input */
928 n_ia32_l_binop_right, /**< ia32 right input */
929 n_ia32_l_binop_eflags /**< ia32 eflags input */
931 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
932 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
933 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
934 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
935 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
936 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
939 * Construct a binary operation which also consumes the eflags.
941 * @param node The node to transform
942 * @param func The node constructor function
943 * @param flags The match flags
944 * @return The constructor ia32 node
946 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
949 ir_node *src_block = get_nodes_block(node);
950 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
951 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
952 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
954 ir_node *block, *new_node, *new_eflags;
955 ia32_address_mode_t am;
956 ia32_address_t *addr = &am.addr;
958 match_arguments(&am, src_block, op1, op2, eflags, flags);
960 dbgi = get_irn_dbg_info(node);
961 block = be_transform_node(src_block);
962 new_eflags = be_transform_node(eflags);
963 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
964 am.new_op1, am.new_op2, new_eflags);
965 set_am_attributes(new_node, &am);
966 /* we can't use source address mode anymore when using immediates */
967 if (!(flags & match_am_and_immediates) &&
968 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
969 set_ia32_am_support(new_node, ia32_am_none);
970 SET_IA32_ORIG_NODE(new_node, node);
972 new_node = fix_mem_proj(new_node, &am);
977 static ir_node *get_fpcw(void)
979 if (initial_fpcw != NULL)
982 initial_fpcw = be_transform_node(old_initial_fpcw);
987 * Construct a standard binary operation, set AM and immediate if required.
989 * @param op1 The first operand
990 * @param op2 The second operand
991 * @param func The node constructor function
992 * @return The constructed ia32 node.
994 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
995 construct_binop_float_func *func)
997 ir_mode *mode = get_irn_mode(node);
999 ir_node *block, *new_block, *new_node;
1000 ia32_address_mode_t am;
1001 ia32_address_t *addr = &am.addr;
1002 ia32_x87_attr_t *attr;
1003 /* All operations are considered commutative, because there are reverse
1005 match_flags_t flags = match_commutative;
1007 /* happens for div nodes... */
1008 if (mode == mode_T) {
1010 mode = get_Div_resmode(node);
1012 panic("can't determine mode");
1015 /* cannot use address mode with long double on x87 */
1016 if (get_mode_size_bits(mode) <= 64)
1019 block = get_nodes_block(node);
1020 match_arguments(&am, block, op1, op2, NULL, flags);
1022 dbgi = get_irn_dbg_info(node);
1023 new_block = be_transform_node(block);
1024 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1025 am.new_op1, am.new_op2, get_fpcw());
1026 set_am_attributes(new_node, &am);
1028 attr = get_ia32_x87_attr(new_node);
1029 attr->attr.data.ins_permuted = am.ins_permuted;
1031 SET_IA32_ORIG_NODE(new_node, node);
1033 new_node = fix_mem_proj(new_node, &am);
1039 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1041 * @param op1 The first operand
1042 * @param op2 The second operand
1043 * @param func The node constructor function
1044 * @return The constructed ia32 node.
1046 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1047 construct_shift_func *func,
1048 match_flags_t flags)
1051 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1052 ir_mode *mode = get_irn_mode(node);
1054 assert(! mode_is_float(mode));
1055 assert(flags & match_immediate);
1056 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1058 if (get_mode_modulo_shift(mode) != 32)
1059 panic("modulo shift!=32 not supported by ia32 backend");
1061 if (flags & match_mode_neutral) {
1062 op1 = ia32_skip_downconv(op1);
1063 new_op1 = be_transform_node(op1);
1064 } else if (get_mode_size_bits(mode) != 32) {
1065 new_op1 = create_upconv(op1, node);
1067 new_op1 = be_transform_node(op1);
1070 /* the shift amount can be any mode that is bigger than 5 bits, since all
1071 * other bits are ignored anyway */
1072 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1073 ir_node *const op = get_Conv_op(op2);
1074 if (mode_is_float(get_irn_mode(op)))
1077 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1079 new_op2 = create_immediate_or_transform(op2, 0);
1081 dbgi = get_irn_dbg_info(node);
1082 block = get_nodes_block(node);
1083 new_block = be_transform_node(block);
1084 new_node = func(dbgi, new_block, new_op1, new_op2);
1085 SET_IA32_ORIG_NODE(new_node, node);
1087 /* lowered shift instruction may have a dependency operand, handle it here */
1088 if (get_irn_arity(node) == 3) {
1089 /* we have a dependency */
1090 ir_node* dep = get_irn_n(node, 2);
1091 if (get_irn_n_edges(dep) > 1) {
1092 /* ... which has at least one user other than 'node' */
1093 ir_node *new_dep = be_transform_node(dep);
1094 add_irn_dep(new_node, new_dep);
1103 * Construct a standard unary operation, set AM and immediate if required.
1105 * @param op The operand
1106 * @param func The node constructor function
1107 * @return The constructed ia32 node.
1109 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1110 match_flags_t flags)
1113 ir_node *block, *new_block, *new_op, *new_node;
1115 assert(flags == 0 || flags == match_mode_neutral);
1116 if (flags & match_mode_neutral) {
1117 op = ia32_skip_downconv(op);
1120 new_op = be_transform_node(op);
1121 dbgi = get_irn_dbg_info(node);
1122 block = get_nodes_block(node);
1123 new_block = be_transform_node(block);
1124 new_node = func(dbgi, new_block, new_op);
1126 SET_IA32_ORIG_NODE(new_node, node);
1131 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1132 ia32_address_t *addr)
1142 base = be_transform_node(base);
1149 idx = be_transform_node(idx);
1152 /* segment overrides are ineffective for Leas :-( so we have to patch
1154 if (addr->tls_segment) {
1155 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1156 assert(addr->symconst_ent != NULL);
1157 if (base == noreg_GP)
1160 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1161 addr->tls_segment = false;
1164 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1165 set_address(res, addr);
1171 * Returns non-zero if a given address mode has a symbolic or
1172 * numerical offset != 0.
1174 static int am_has_immediates(const ia32_address_t *addr)
1176 return addr->offset != 0 || addr->symconst_ent != NULL
1177 || addr->frame_entity || addr->use_frame;
1180 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1181 ir_node *high, ir_node *low,
1185 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1186 * op1 - target to be shifted
1187 * op2 - contains bits to be shifted into target
1189 * Only op3 can be an immediate.
1191 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1192 ir_node *high, ir_node *low, ir_node *count,
1193 new_shiftd_func func)
1195 ir_node *new_block = be_transform_node(block);
1196 ir_node *new_high = be_transform_node(high);
1197 ir_node *new_low = be_transform_node(low);
1201 /* the shift amount can be any mode that is bigger than 5 bits, since all
1202 * other bits are ignored anyway */
1203 while (is_Conv(count) &&
1204 get_irn_n_edges(count) == 1 &&
1205 mode_is_int(get_irn_mode(count))) {
1206 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1207 count = get_Conv_op(count);
1209 new_count = create_immediate_or_transform(count, 0);
1211 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1216 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1219 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1221 if (is_Const(value1) && is_Const(value2)) {
1222 ir_tarval *tv1 = get_Const_tarval(value1);
1223 ir_tarval *tv2 = get_Const_tarval(value2);
1224 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1225 long v1 = get_tarval_long(tv1);
1226 long v2 = get_tarval_long(tv2);
1227 return v1 <= v2 && v2 == 32-v1;
1233 static ir_node *match_64bit_shift(ir_node *node)
1235 ir_node *op1 = get_binop_left(node);
1236 ir_node *op2 = get_binop_right(node);
1237 assert(is_Or(node) || is_Add(node));
1245 /* match ShlD operation */
1246 if (is_Shl(op1) && is_Shr(op2)) {
1247 ir_node *shl_right = get_Shl_right(op1);
1248 ir_node *shl_left = get_Shl_left(op1);
1249 ir_node *shr_right = get_Shr_right(op2);
1250 ir_node *shr_left = get_Shr_left(op2);
1251 /* constant ShlD operation */
1252 if (is_complementary_shifts(shl_right, shr_right)) {
1253 dbg_info *dbgi = get_irn_dbg_info(node);
1254 ir_node *block = get_nodes_block(node);
1255 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1258 /* constant ShrD operation */
1259 if (is_complementary_shifts(shr_right, shl_right)) {
1260 dbg_info *dbgi = get_irn_dbg_info(node);
1261 ir_node *block = get_nodes_block(node);
1262 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1265 /* lower_dw produces the following for ShlD:
1266 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1267 if (is_Shr(shr_left) && is_Not(shr_right)
1268 && is_Const_1(get_Shr_right(shr_left))
1269 && get_Not_op(shr_right) == shl_right) {
1270 dbg_info *dbgi = get_irn_dbg_info(node);
1271 ir_node *block = get_nodes_block(node);
1272 ir_node *val_h = get_Shr_left(shr_left);
1273 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1276 /* lower_dw produces the following for ShrD:
1277 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1278 if (is_Shl(shl_left) && is_Not(shl_right)
1279 && is_Const_1(get_Shl_right(shl_left))
1280 && get_Not_op(shl_right) == shr_right) {
1281 dbg_info *dbgi = get_irn_dbg_info(node);
1282 ir_node *block = get_nodes_block(node);
1283 ir_node *val_h = get_Shl_left(shl_left);
1284 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1293 * Creates an ia32 Add.
1295 * @return the created ia32 Add node
1297 static ir_node *gen_Add(ir_node *node)
1299 ir_mode *mode = get_irn_mode(node);
1300 ir_node *op1 = get_Add_left(node);
1301 ir_node *op2 = get_Add_right(node);
1303 ir_node *block, *new_block, *new_node, *add_immediate_op;
1304 ia32_address_t addr;
1305 ia32_address_mode_t am;
1307 new_node = match_64bit_shift(node);
1308 if (new_node != NULL)
1311 if (mode_is_float(mode)) {
1312 if (ia32_cg_config.use_sse2)
1313 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1314 match_commutative | match_am);
1316 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1319 ia32_mark_non_am(node);
1321 op2 = ia32_skip_downconv(op2);
1322 op1 = ia32_skip_downconv(op1);
1326 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1327 * 1. Add with immediate -> Lea
1328 * 2. Add with possible source address mode -> Add
1329 * 3. Otherwise -> Lea
1331 memset(&addr, 0, sizeof(addr));
1332 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1333 add_immediate_op = NULL;
1335 dbgi = get_irn_dbg_info(node);
1336 block = get_nodes_block(node);
1337 new_block = be_transform_node(block);
1340 if (addr.base == NULL && addr.index == NULL) {
1341 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1342 addr.symconst_sign, 0, addr.offset);
1343 SET_IA32_ORIG_NODE(new_node, node);
1346 /* add with immediate? */
1347 if (addr.index == NULL) {
1348 add_immediate_op = addr.base;
1349 } else if (addr.base == NULL && addr.scale == 0) {
1350 add_immediate_op = addr.index;
1353 if (add_immediate_op != NULL) {
1354 if (!am_has_immediates(&addr)) {
1355 #ifdef DEBUG_libfirm
1356 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1359 return be_transform_node(add_immediate_op);
1362 new_node = create_lea_from_address(dbgi, new_block, &addr);
1363 SET_IA32_ORIG_NODE(new_node, node);
1367 /* test if we can use source address mode */
1368 match_arguments(&am, block, op1, op2, NULL, match_commutative
1369 | match_mode_neutral | match_am | match_immediate | match_try_am);
1371 /* construct an Add with source address mode */
1372 if (am.op_type == ia32_AddrModeS) {
1373 ia32_address_t *am_addr = &am.addr;
1374 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1375 am_addr->index, am_addr->mem, am.new_op1,
1377 set_am_attributes(new_node, &am);
1378 SET_IA32_ORIG_NODE(new_node, node);
1380 new_node = fix_mem_proj(new_node, &am);
1385 /* otherwise construct a lea */
1386 new_node = create_lea_from_address(dbgi, new_block, &addr);
1387 SET_IA32_ORIG_NODE(new_node, node);
1392 * Creates an ia32 Mul.
1394 * @return the created ia32 Mul node
1396 static ir_node *gen_Mul(ir_node *node)
1398 ir_node *op1 = get_Mul_left(node);
1399 ir_node *op2 = get_Mul_right(node);
1400 ir_mode *mode = get_irn_mode(node);
1402 if (mode_is_float(mode)) {
1403 if (ia32_cg_config.use_sse2)
1404 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1405 match_commutative | match_am);
1407 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1409 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1410 match_commutative | match_am | match_mode_neutral |
1411 match_immediate | match_am_and_immediates);
1415 * Creates an ia32 Mulh.
1416 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1417 * this result while Mul returns the lower 32 bit.
1419 * @return the created ia32 Mulh node
1421 static ir_node *gen_Mulh(ir_node *node)
1423 dbg_info *dbgi = get_irn_dbg_info(node);
1424 ir_node *op1 = get_Mulh_left(node);
1425 ir_node *op2 = get_Mulh_right(node);
1426 ir_mode *mode = get_irn_mode(node);
1428 ir_node *proj_res_high;
1430 if (get_mode_size_bits(mode) != 32) {
1431 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1434 if (mode_is_signed(mode)) {
1435 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1436 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1438 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1439 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1441 return proj_res_high;
1445 * Creates an ia32 And.
1447 * @return The created ia32 And node
1449 static ir_node *gen_And(ir_node *node)
1451 ir_node *op1 = get_And_left(node);
1452 ir_node *op2 = get_And_right(node);
1453 assert(! mode_is_float(get_irn_mode(node)));
1455 /* is it a zero extension? */
1456 if (is_Const(op2)) {
1457 ir_tarval *tv = get_Const_tarval(op2);
1458 long v = get_tarval_long(tv);
1460 if (v == 0xFF || v == 0xFFFF) {
1461 dbg_info *dbgi = get_irn_dbg_info(node);
1462 ir_node *block = get_nodes_block(node);
1469 assert(v == 0xFFFF);
1472 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1477 return gen_binop(node, op1, op2, new_bd_ia32_And,
1478 match_commutative | match_mode_neutral | match_am | match_immediate);
1482 * Creates an ia32 Or.
1484 * @return The created ia32 Or node
1486 static ir_node *gen_Or(ir_node *node)
1488 ir_node *op1 = get_Or_left(node);
1489 ir_node *op2 = get_Or_right(node);
1492 res = match_64bit_shift(node);
1496 assert (! mode_is_float(get_irn_mode(node)));
1497 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1498 | match_mode_neutral | match_am | match_immediate);
1504 * Creates an ia32 Eor.
1506 * @return The created ia32 Eor node
1508 static ir_node *gen_Eor(ir_node *node)
1510 ir_node *op1 = get_Eor_left(node);
1511 ir_node *op2 = get_Eor_right(node);
1513 assert(! mode_is_float(get_irn_mode(node)));
1514 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1515 | match_mode_neutral | match_am | match_immediate);
1520 * Creates an ia32 Sub.
1522 * @return The created ia32 Sub node
1524 static ir_node *gen_Sub(ir_node *node)
1526 ir_node *op1 = get_Sub_left(node);
1527 ir_node *op2 = get_Sub_right(node);
1528 ir_mode *mode = get_irn_mode(node);
1530 if (mode_is_float(mode)) {
1531 if (ia32_cg_config.use_sse2)
1532 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1534 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1537 if (is_Const(op2)) {
1538 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1542 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1543 | match_am | match_immediate);
1546 static ir_node *transform_AM_mem(ir_node *const block,
1547 ir_node *const src_val,
1548 ir_node *const src_mem,
1549 ir_node *const am_mem)
1551 if (is_NoMem(am_mem)) {
1552 return be_transform_node(src_mem);
1553 } else if (is_Proj(src_val) &&
1555 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1556 /* avoid memory loop */
1558 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1559 ir_node *const ptr_pred = get_Proj_pred(src_val);
1560 int const arity = get_Sync_n_preds(src_mem);
1565 NEW_ARR_A(ir_node*, ins, arity + 1);
1567 /* NOTE: This sometimes produces dead-code because the old sync in
1568 * src_mem might not be used anymore, we should detect this case
1569 * and kill the sync... */
1570 for (i = arity - 1; i >= 0; --i) {
1571 ir_node *const pred = get_Sync_pred(src_mem, i);
1573 /* avoid memory loop */
1574 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1577 ins[n++] = be_transform_node(pred);
1580 if (n==1 && ins[0] == am_mem) {
1582 /* creating a new Sync and relying on CSE may fail,
1583 * if am_mem is a ProjM, which does not yet verify. */
1587 return new_r_Sync(block, n, ins);
1591 ins[0] = be_transform_node(src_mem);
1593 return new_r_Sync(block, 2, ins);
1598 * Create a 32bit to 64bit signed extension.
1600 * @param dbgi debug info
1601 * @param block the block where node nodes should be placed
1602 * @param val the value to extend
1603 * @param orig the original node
1605 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1606 ir_node *val, const ir_node *orig)
1611 if (ia32_cg_config.use_short_sex_eax) {
1612 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1613 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1615 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1616 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1618 SET_IA32_ORIG_NODE(res, orig);
1623 * Generates an ia32 Div with additional infrastructure for the
1624 * register allocator if needed.
1626 static ir_node *create_Div(ir_node *node)
1628 dbg_info *dbgi = get_irn_dbg_info(node);
1629 ir_node *block = get_nodes_block(node);
1630 ir_node *new_block = be_transform_node(block);
1631 int throws_exception = ir_throws_exception(node);
1638 ir_node *sign_extension;
1639 ia32_address_mode_t am;
1640 ia32_address_t *addr = &am.addr;
1642 /* the upper bits have random contents for smaller modes */
1643 switch (get_irn_opcode(node)) {
1645 op1 = get_Div_left(node);
1646 op2 = get_Div_right(node);
1647 mem = get_Div_mem(node);
1648 mode = get_Div_resmode(node);
1651 op1 = get_Mod_left(node);
1652 op2 = get_Mod_right(node);
1653 mem = get_Mod_mem(node);
1654 mode = get_Mod_resmode(node);
1657 panic("invalid divmod node %+F", node);
1660 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1662 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1663 is the memory of the consumed address. We can have only the second op as address
1664 in Div nodes, so check only op2. */
1665 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1667 if (mode_is_signed(mode)) {
1668 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1669 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1670 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1672 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1674 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1675 addr->index, new_mem, am.new_op2,
1676 am.new_op1, sign_extension);
1678 ir_set_throws_exception(new_node, throws_exception);
1680 set_irn_pinned(new_node, get_irn_pinned(node));
1682 set_am_attributes(new_node, &am);
1683 SET_IA32_ORIG_NODE(new_node, node);
1685 new_node = fix_mem_proj(new_node, &am);
1691 * Generates an ia32 Mod.
1693 static ir_node *gen_Mod(ir_node *node)
1695 return create_Div(node);
1699 * Generates an ia32 Div.
1701 static ir_node *gen_Div(ir_node *node)
1703 ir_mode *mode = get_Div_resmode(node);
1704 if (mode_is_float(mode)) {
1705 ir_node *op1 = get_Div_left(node);
1706 ir_node *op2 = get_Div_right(node);
1708 if (ia32_cg_config.use_sse2) {
1709 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1711 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1715 return create_Div(node);
1719 * Creates an ia32 Shl.
1721 * @return The created ia32 Shl node
1723 static ir_node *gen_Shl(ir_node *node)
1725 ir_node *left = get_Shl_left(node);
1726 ir_node *right = get_Shl_right(node);
1728 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1729 match_mode_neutral | match_immediate);
1733 * Creates an ia32 Shr.
1735 * @return The created ia32 Shr node
1737 static ir_node *gen_Shr(ir_node *node)
1739 ir_node *left = get_Shr_left(node);
1740 ir_node *right = get_Shr_right(node);
1742 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1748 * Creates an ia32 Sar.
1750 * @return The created ia32 Shrs node
1752 static ir_node *gen_Shrs(ir_node *node)
1754 ir_node *left = get_Shrs_left(node);
1755 ir_node *right = get_Shrs_right(node);
1757 if (is_Const(right)) {
1758 ir_tarval *tv = get_Const_tarval(right);
1759 long val = get_tarval_long(tv);
1761 /* this is a sign extension */
1762 dbg_info *dbgi = get_irn_dbg_info(node);
1763 ir_node *block = be_transform_node(get_nodes_block(node));
1764 ir_node *new_op = be_transform_node(left);
1766 return create_sex_32_64(dbgi, block, new_op, node);
1770 /* 8 or 16 bit sign extension? */
1771 if (is_Const(right) && is_Shl(left)) {
1772 ir_node *shl_left = get_Shl_left(left);
1773 ir_node *shl_right = get_Shl_right(left);
1774 if (is_Const(shl_right)) {
1775 ir_tarval *tv1 = get_Const_tarval(right);
1776 ir_tarval *tv2 = get_Const_tarval(shl_right);
1777 if (tv1 == tv2 && tarval_is_long(tv1)) {
1778 long val = get_tarval_long(tv1);
1779 if (val == 16 || val == 24) {
1780 dbg_info *dbgi = get_irn_dbg_info(node);
1781 ir_node *block = get_nodes_block(node);
1791 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1800 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1806 * Creates an ia32 Rol.
1808 * @param op1 The first operator
1809 * @param op2 The second operator
1810 * @return The created ia32 RotL node
1812 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1814 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1820 * Creates an ia32 Ror.
1821 * NOTE: There is no RotR with immediate because this would always be a RotL
1822 * "imm-mode_size_bits" which can be pre-calculated.
1824 * @param op1 The first operator
1825 * @param op2 The second operator
1826 * @return The created ia32 RotR node
1828 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1830 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1836 * Creates an ia32 RotR or RotL (depending on the found pattern).
1838 * @return The created ia32 RotL or RotR node
1840 static ir_node *gen_Rotl(ir_node *node)
1842 ir_node *op1 = get_Rotl_left(node);
1843 ir_node *op2 = get_Rotl_right(node);
1845 if (is_Minus(op2)) {
1846 return gen_Ror(node, op1, get_Minus_op(op2));
1849 return gen_Rol(node, op1, op2);
1855 * Transforms a Minus node.
1857 * @return The created ia32 Minus node
1859 static ir_node *gen_Minus(ir_node *node)
1861 ir_node *op = get_Minus_op(node);
1862 ir_node *block = be_transform_node(get_nodes_block(node));
1863 dbg_info *dbgi = get_irn_dbg_info(node);
1864 ir_mode *mode = get_irn_mode(node);
1869 if (mode_is_float(mode)) {
1870 ir_node *new_op = be_transform_node(op);
1871 if (ia32_cg_config.use_sse2) {
1872 /* TODO: non-optimal... if we have many xXors, then we should
1873 * rather create a load for the const and use that instead of
1874 * several AM nodes... */
1875 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1877 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1878 noreg_GP, nomem, new_op, noreg_xmm);
1880 size = get_mode_size_bits(mode);
1881 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1883 set_ia32_am_sc(new_node, ent);
1884 set_ia32_op_type(new_node, ia32_AddrModeS);
1885 set_ia32_ls_mode(new_node, mode);
1887 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1890 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1893 SET_IA32_ORIG_NODE(new_node, node);
1899 * Transforms a Not node.
1901 * @return The created ia32 Not node
1903 static ir_node *gen_Not(ir_node *node)
1905 ir_node *op = get_Not_op(node);
1907 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1908 assert (! mode_is_float(get_irn_mode(node)));
1910 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1913 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1914 bool negate, ir_node *node)
1916 ir_node *new_block = be_transform_node(block);
1917 ir_mode *mode = get_irn_mode(op);
1918 ir_node *new_op = be_transform_node(op);
1923 assert(mode_is_float(mode));
1925 if (ia32_cg_config.use_sse2) {
1926 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1927 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1928 noreg_GP, nomem, new_op, noreg_fp);
1930 size = get_mode_size_bits(mode);
1931 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1933 set_ia32_am_sc(new_node, ent);
1935 SET_IA32_ORIG_NODE(new_node, node);
1937 set_ia32_op_type(new_node, ia32_AddrModeS);
1938 set_ia32_ls_mode(new_node, mode);
1940 /* TODO, implement -Abs case */
1943 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1944 SET_IA32_ORIG_NODE(new_node, node);
1946 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1947 SET_IA32_ORIG_NODE(new_node, node);
1955 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1957 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1959 dbg_info *dbgi = get_irn_dbg_info(cmp);
1960 ir_node *block = get_nodes_block(cmp);
1961 ir_node *new_block = be_transform_node(block);
1962 ir_node *op1 = be_transform_node(x);
1963 ir_node *op2 = be_transform_node(n);
1965 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1968 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
1971 if (mode_is_float(mode)) {
1973 case ir_relation_equal: return ia32_cc_float_equal;
1974 case ir_relation_less: return ia32_cc_float_below;
1975 case ir_relation_less_equal: return ia32_cc_float_below_equal;
1976 case ir_relation_greater: return ia32_cc_float_above;
1977 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
1978 case ir_relation_less_greater: return ia32_cc_not_equal;
1979 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
1980 case ir_relation_unordered: return ia32_cc_parity;
1981 case ir_relation_unordered_equal: return ia32_cc_equal;
1982 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
1983 case ir_relation_unordered_less_equal:
1984 return ia32_cc_float_unordered_below_equal;
1985 case ir_relation_unordered_greater:
1986 return ia32_cc_float_unordered_above;
1987 case ir_relation_unordered_greater_equal:
1988 return ia32_cc_float_unordered_above_equal;
1989 case ir_relation_unordered_less_greater:
1990 return ia32_cc_float_not_equal;
1991 case ir_relation_false:
1992 case ir_relation_true:
1993 /* should we introduce a jump always/jump never? */
1996 panic("Unexpected float pnc");
1997 } else if (mode_is_signed(mode)) {
1999 case ir_relation_unordered_equal:
2000 case ir_relation_equal: return ia32_cc_equal;
2001 case ir_relation_unordered_less:
2002 case ir_relation_less: return ia32_cc_less;
2003 case ir_relation_unordered_less_equal:
2004 case ir_relation_less_equal: return ia32_cc_less_equal;
2005 case ir_relation_unordered_greater:
2006 case ir_relation_greater: return ia32_cc_greater;
2007 case ir_relation_unordered_greater_equal:
2008 case ir_relation_greater_equal: return ia32_cc_greater_equal;
2009 case ir_relation_unordered_less_greater:
2010 case ir_relation_less_greater: return ia32_cc_not_equal;
2011 case ir_relation_less_equal_greater:
2012 case ir_relation_unordered:
2013 case ir_relation_false:
2014 case ir_relation_true:
2015 /* introduce jump always/jump never? */
2018 panic("Unexpected pnc");
2021 case ir_relation_unordered_equal:
2022 case ir_relation_equal: return ia32_cc_equal;
2023 case ir_relation_unordered_less:
2024 case ir_relation_less: return ia32_cc_below;
2025 case ir_relation_unordered_less_equal:
2026 case ir_relation_less_equal: return ia32_cc_below_equal;
2027 case ir_relation_unordered_greater:
2028 case ir_relation_greater: return ia32_cc_above;
2029 case ir_relation_unordered_greater_equal:
2030 case ir_relation_greater_equal: return ia32_cc_above_equal;
2031 case ir_relation_unordered_less_greater:
2032 case ir_relation_less_greater: return ia32_cc_not_equal;
2033 case ir_relation_less_equal_greater:
2034 case ir_relation_unordered:
2035 case ir_relation_false:
2036 case ir_relation_true:
2037 /* introduce jump always/jump never? */
2040 panic("Unexpected pnc");
2044 static ir_node *get_flags_node_cmp(ir_node *cmp, ia32_condition_code_t *cc_out)
2046 /* must have a Cmp as input */
2047 ir_relation relation = get_Cmp_relation(cmp);
2048 ir_relation possible;
2049 ir_node *l = get_Cmp_left(cmp);
2050 ir_node *r = get_Cmp_right(cmp);
2051 ir_mode *mode = get_irn_mode(l);
2054 /* check for bit-test */
2055 if (ia32_cg_config.use_bt && (relation == ir_relation_equal
2056 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2057 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2059 ir_node *la = get_And_left(l);
2060 ir_node *ra = get_And_right(l);
2067 ir_node *c = get_Shl_left(la);
2068 if (is_Const_1(c) && is_Const_0(r)) {
2069 /* (1 << n) & ra) */
2070 ir_node *n = get_Shl_right(la);
2071 flags = gen_bt(cmp, ra, n);
2072 /* the bit is copied into the CF flag */
2073 if (relation & ir_relation_equal)
2074 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2076 *cc_out = ia32_cc_below; /* test for CF=1 */
2082 /* the middle-end tries to eliminate impossible relations, so a ptr != 0
2083 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2084 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2085 * a predecessor node). So add the < bit */
2086 possible = ir_get_possible_cmp_relations(l, r);
2087 if (((relation & ir_relation_less) && !(possible & ir_relation_greater))
2088 || ((relation & ir_relation_greater) && !(possible & ir_relation_less)))
2089 relation |= ir_relation_less_greater;
2091 /* just do a normal transformation of the Cmp */
2092 *cc_out = relation_to_condition_code(relation, mode);
2093 flags = be_transform_node(cmp);
2098 * Transform a node returning a "flag" result.
2100 * @param node the node to transform
2101 * @param cc_out the compare mode to use
2103 static ir_node *get_flags_node(ir_node *node, ia32_condition_code_t *cc_out)
2105 assert(is_Cmp(node));
2106 return get_flags_node_cmp(node, cc_out);
2110 * Transforms a Load.
2112 * @return the created ia32 Load node
2114 static ir_node *gen_Load(ir_node *node)
2116 ir_node *old_block = get_nodes_block(node);
2117 ir_node *block = be_transform_node(old_block);
2118 ir_node *ptr = get_Load_ptr(node);
2119 ir_node *mem = get_Load_mem(node);
2120 ir_node *new_mem = be_transform_node(mem);
2121 dbg_info *dbgi = get_irn_dbg_info(node);
2122 ir_mode *mode = get_Load_mode(node);
2123 int throws_exception = ir_throws_exception(node);
2127 ia32_address_t addr;
2129 /* construct load address */
2130 memset(&addr, 0, sizeof(addr));
2131 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2138 base = be_transform_node(base);
2144 idx = be_transform_node(idx);
2147 if (mode_is_float(mode)) {
2148 if (ia32_cg_config.use_sse2) {
2149 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2152 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2156 assert(mode != mode_b);
2158 /* create a conv node with address mode for smaller modes */
2159 if (get_mode_size_bits(mode) < 32) {
2160 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2161 new_mem, noreg_GP, mode);
2163 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2166 ir_set_throws_exception(new_node, throws_exception);
2168 set_irn_pinned(new_node, get_irn_pinned(node));
2169 set_ia32_op_type(new_node, ia32_AddrModeS);
2170 set_ia32_ls_mode(new_node, mode);
2171 set_address(new_node, &addr);
2173 if (get_irn_pinned(node) == op_pin_state_floats) {
2174 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2175 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2176 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2177 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2180 SET_IA32_ORIG_NODE(new_node, node);
2185 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2186 ir_node *ptr, ir_node *other)
2193 /* we only use address mode if we're the only user of the load */
2194 if (get_irn_n_edges(node) > 1)
2197 load = get_Proj_pred(node);
2200 if (get_nodes_block(load) != block)
2203 /* store should have the same pointer as the load */
2204 if (get_Load_ptr(load) != ptr)
2207 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2208 if (other != NULL &&
2209 get_nodes_block(other) == block &&
2210 heights_reachable_in_block(ia32_heights, other, load)) {
2214 if (ia32_prevents_AM(block, load, mem))
2216 /* Store should be attached to the load via mem */
2217 assert(heights_reachable_in_block(ia32_heights, mem, load));
2222 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2223 ir_node *mem, ir_node *ptr, ir_mode *mode,
2224 construct_binop_dest_func *func,
2225 construct_binop_dest_func *func8bit,
2226 match_flags_t flags)
2228 ir_node *src_block = get_nodes_block(node);
2236 ia32_address_mode_t am;
2237 ia32_address_t *addr = &am.addr;
2238 memset(&am, 0, sizeof(am));
2240 assert(flags & match_immediate); /* there is no destam node without... */
2241 commutative = (flags & match_commutative) != 0;
2243 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2244 build_address(&am, op1, ia32_create_am_double_use);
2245 new_op = create_immediate_or_transform(op2, 0);
2246 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2247 build_address(&am, op2, ia32_create_am_double_use);
2248 new_op = create_immediate_or_transform(op1, 0);
2253 if (addr->base == NULL)
2254 addr->base = noreg_GP;
2255 if (addr->index == NULL)
2256 addr->index = noreg_GP;
2257 if (addr->mem == NULL)
2260 dbgi = get_irn_dbg_info(node);
2261 block = be_transform_node(src_block);
2262 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2264 if (get_mode_size_bits(mode) == 8) {
2265 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2267 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2269 set_address(new_node, addr);
2270 set_ia32_op_type(new_node, ia32_AddrModeD);
2271 set_ia32_ls_mode(new_node, mode);
2272 SET_IA32_ORIG_NODE(new_node, node);
2274 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2275 mem_proj = be_transform_node(am.mem_proj);
2276 be_set_transformed_node(am.mem_proj, new_node);
2277 be_set_transformed_node(mem_proj, new_node);
2282 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2283 ir_node *ptr, ir_mode *mode,
2284 construct_unop_dest_func *func)
2286 ir_node *src_block = get_nodes_block(node);
2292 ia32_address_mode_t am;
2293 ia32_address_t *addr = &am.addr;
2295 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2298 memset(&am, 0, sizeof(am));
2299 build_address(&am, op, ia32_create_am_double_use);
2301 dbgi = get_irn_dbg_info(node);
2302 block = be_transform_node(src_block);
2303 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2304 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2305 set_address(new_node, addr);
2306 set_ia32_op_type(new_node, ia32_AddrModeD);
2307 set_ia32_ls_mode(new_node, mode);
2308 SET_IA32_ORIG_NODE(new_node, node);
2310 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2311 mem_proj = be_transform_node(am.mem_proj);
2312 be_set_transformed_node(am.mem_proj, new_node);
2313 be_set_transformed_node(mem_proj, new_node);
2318 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2320 ir_mode *mode = get_irn_mode(node);
2321 ir_node *mux_true = get_Mux_true(node);
2322 ir_node *mux_false = get_Mux_false(node);
2330 ia32_condition_code_t cc;
2331 ia32_address_t addr;
2333 if (get_mode_size_bits(mode) != 8)
2336 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2338 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2344 cond = get_Mux_sel(node);
2345 flags = get_flags_node(cond, &cc);
2346 /* we can't handle the float special cases with SetM */
2347 if (cc & ia32_cc_additional_float_cases)
2350 cc = ia32_negate_condition_code(cc);
2352 build_address_ptr(&addr, ptr, mem);
2354 dbgi = get_irn_dbg_info(node);
2355 block = get_nodes_block(node);
2356 new_block = be_transform_node(block);
2357 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2358 addr.index, addr.mem, flags, cc);
2359 set_address(new_node, &addr);
2360 set_ia32_op_type(new_node, ia32_AddrModeD);
2361 set_ia32_ls_mode(new_node, mode);
2362 SET_IA32_ORIG_NODE(new_node, node);
2367 static ir_node *try_create_dest_am(ir_node *node)
2369 ir_node *val = get_Store_value(node);
2370 ir_node *mem = get_Store_mem(node);
2371 ir_node *ptr = get_Store_ptr(node);
2372 ir_mode *mode = get_irn_mode(val);
2373 unsigned bits = get_mode_size_bits(mode);
2378 /* handle only GP modes for now... */
2379 if (!ia32_mode_needs_gp_reg(mode))
2383 /* store must be the only user of the val node */
2384 if (get_irn_n_edges(val) > 1)
2386 /* skip pointless convs */
2388 ir_node *conv_op = get_Conv_op(val);
2389 ir_mode *pred_mode = get_irn_mode(conv_op);
2390 if (!ia32_mode_needs_gp_reg(pred_mode))
2392 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2400 /* value must be in the same block */
2401 if (get_nodes_block(node) != get_nodes_block(val))
2404 switch (get_irn_opcode(val)) {
2406 op1 = get_Add_left(val);
2407 op2 = get_Add_right(val);
2408 if (ia32_cg_config.use_incdec) {
2409 if (is_Const_1(op2)) {
2410 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2412 } else if (is_Const_Minus_1(op2)) {
2413 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2417 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2418 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2419 match_commutative | match_immediate);
2422 op1 = get_Sub_left(val);
2423 op2 = get_Sub_right(val);
2424 if (is_Const(op2)) {
2425 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2427 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2428 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2432 op1 = get_And_left(val);
2433 op2 = get_And_right(val);
2434 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2435 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2436 match_commutative | match_immediate);
2439 op1 = get_Or_left(val);
2440 op2 = get_Or_right(val);
2441 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2442 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2443 match_commutative | match_immediate);
2446 op1 = get_Eor_left(val);
2447 op2 = get_Eor_right(val);
2448 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2449 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2450 match_commutative | match_immediate);
2453 op1 = get_Shl_left(val);
2454 op2 = get_Shl_right(val);
2455 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2456 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2460 op1 = get_Shr_left(val);
2461 op2 = get_Shr_right(val);
2462 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2463 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2467 op1 = get_Shrs_left(val);
2468 op2 = get_Shrs_right(val);
2469 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2470 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2474 op1 = get_Rotl_left(val);
2475 op2 = get_Rotl_right(val);
2476 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2477 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2480 /* TODO: match ROR patterns... */
2482 new_node = try_create_SetMem(val, ptr, mem);
2486 op1 = get_Minus_op(val);
2487 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2490 /* should be lowered already */
2491 assert(mode != mode_b);
2492 op1 = get_Not_op(val);
2493 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2499 if (new_node != NULL) {
2500 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2501 get_irn_pinned(node) == op_pin_state_pinned) {
2502 set_irn_pinned(new_node, op_pin_state_pinned);
2509 static bool possible_int_mode_for_fp(ir_mode *mode)
2513 if (!mode_is_signed(mode))
2515 size = get_mode_size_bits(mode);
2516 if (size != 16 && size != 32)
2521 static int is_float_to_int_conv(const ir_node *node)
2523 ir_mode *mode = get_irn_mode(node);
2527 if (!possible_int_mode_for_fp(mode))
2532 conv_op = get_Conv_op(node);
2533 conv_mode = get_irn_mode(conv_op);
2535 if (!mode_is_float(conv_mode))
2542 * Transform a Store(floatConst) into a sequence of
2545 * @return the created ia32 Store node
2547 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2549 ir_mode *mode = get_irn_mode(cns);
2550 unsigned size = get_mode_size_bytes(mode);
2551 ir_tarval *tv = get_Const_tarval(cns);
2552 ir_node *block = get_nodes_block(node);
2553 ir_node *new_block = be_transform_node(block);
2554 ir_node *ptr = get_Store_ptr(node);
2555 ir_node *mem = get_Store_mem(node);
2556 dbg_info *dbgi = get_irn_dbg_info(node);
2559 int throws_exception = ir_throws_exception(node);
2561 ia32_address_t addr;
2563 build_address_ptr(&addr, ptr, mem);
2570 val= get_tarval_sub_bits(tv, ofs) |
2571 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2572 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2573 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2576 } else if (size >= 2) {
2577 val= get_tarval_sub_bits(tv, ofs) |
2578 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2582 panic("invalid size of Store float to mem (%+F)", node);
2584 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2586 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2587 addr.index, addr.mem, imm);
2588 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2590 ir_set_throws_exception(new_node, throws_exception);
2591 set_irn_pinned(new_node, get_irn_pinned(node));
2592 set_ia32_op_type(new_node, ia32_AddrModeD);
2593 set_ia32_ls_mode(new_node, mode);
2594 set_address(new_node, &addr);
2595 SET_IA32_ORIG_NODE(new_node, node);
2602 addr.offset += delta;
2603 } while (size != 0);
2606 return new_rd_Sync(dbgi, new_block, i, ins);
2608 return get_Proj_pred(ins[0]);
2613 * Generate a vfist or vfisttp instruction.
2615 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2616 ir_node *index, ir_node *mem, ir_node *val)
2618 if (ia32_cg_config.use_fisttp) {
2619 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2620 if other users exists */
2621 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2622 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_vfisttp_res);
2623 be_new_Keep(block, 1, &value);
2627 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2630 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2636 * Transforms a general (no special case) Store.
2638 * @return the created ia32 Store node
2640 static ir_node *gen_general_Store(ir_node *node)
2642 ir_node *val = get_Store_value(node);
2643 ir_mode *mode = get_irn_mode(val);
2644 ir_node *block = get_nodes_block(node);
2645 ir_node *new_block = be_transform_node(block);
2646 ir_node *ptr = get_Store_ptr(node);
2647 ir_node *mem = get_Store_mem(node);
2648 dbg_info *dbgi = get_irn_dbg_info(node);
2649 int throws_exception = ir_throws_exception(node);
2652 ia32_address_t addr;
2654 /* check for destination address mode */
2655 new_node = try_create_dest_am(node);
2656 if (new_node != NULL)
2659 /* construct store address */
2660 memset(&addr, 0, sizeof(addr));
2661 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2663 if (addr.base == NULL) {
2664 addr.base = noreg_GP;
2666 addr.base = be_transform_node(addr.base);
2669 if (addr.index == NULL) {
2670 addr.index = noreg_GP;
2672 addr.index = be_transform_node(addr.index);
2674 addr.mem = be_transform_node(mem);
2676 if (mode_is_float(mode)) {
2677 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2679 while (is_Conv(val) && mode == get_irn_mode(val)) {
2680 ir_node *op = get_Conv_op(val);
2681 if (!mode_is_float(get_irn_mode(op)))
2685 new_val = be_transform_node(val);
2686 if (ia32_cg_config.use_sse2) {
2687 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2688 addr.index, addr.mem, new_val);
2690 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2691 addr.index, addr.mem, new_val, mode);
2693 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2694 val = get_Conv_op(val);
2696 /* TODO: is this optimisation still necessary at all (middleend)? */
2697 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2699 while (is_Conv(val)) {
2700 ir_node *op = get_Conv_op(val);
2701 if (!mode_is_float(get_irn_mode(op)))
2703 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2707 new_val = be_transform_node(val);
2708 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2710 new_val = create_immediate_or_transform(val, 0);
2711 assert(mode != mode_b);
2713 if (get_mode_size_bits(mode) == 8) {
2714 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2715 addr.index, addr.mem, new_val);
2717 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2718 addr.index, addr.mem, new_val);
2721 ir_set_throws_exception(new_node, throws_exception);
2723 set_irn_pinned(new_node, get_irn_pinned(node));
2724 set_ia32_op_type(new_node, ia32_AddrModeD);
2725 set_ia32_ls_mode(new_node, mode);
2727 set_address(new_node, &addr);
2728 SET_IA32_ORIG_NODE(new_node, node);
2734 * Transforms a Store.
2736 * @return the created ia32 Store node
2738 static ir_node *gen_Store(ir_node *node)
2740 ir_node *val = get_Store_value(node);
2741 ir_mode *mode = get_irn_mode(val);
2743 if (mode_is_float(mode) && is_Const(val)) {
2744 /* We can transform every floating const store
2745 into a sequence of integer stores.
2746 If the constant is already in a register,
2747 it would be better to use it, but we don't
2748 have this information here. */
2749 return gen_float_const_Store(node, val);
2751 return gen_general_Store(node);
2755 * Transforms a Switch.
2757 * @return the created ia32 SwitchJmp node
2759 static ir_node *create_Switch(ir_node *node)
2761 dbg_info *dbgi = get_irn_dbg_info(node);
2762 ir_node *block = be_transform_node(get_nodes_block(node));
2763 ir_node *sel = get_Cond_selector(node);
2764 ir_node *new_sel = be_transform_node(sel);
2765 long default_pn = get_Cond_default_proj(node);
2769 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2771 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2772 set_entity_visibility(entity, ir_visibility_private);
2773 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2775 /* TODO: we could perform some more matching here to also use the base
2776 * register of the address mode */
2778 = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, default_pn);
2779 set_ia32_am_scale(new_node, 2);
2780 set_ia32_am_sc(new_node, entity);
2781 set_ia32_op_type(new_node, ia32_AddrModeS);
2782 set_ia32_ls_mode(new_node, mode_Iu);
2783 SET_IA32_ORIG_NODE(new_node, node);
2784 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2785 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2791 * Transform a Cond node.
2793 static ir_node *gen_Cond(ir_node *node)
2795 ir_node *block = get_nodes_block(node);
2796 ir_node *new_block = be_transform_node(block);
2797 dbg_info *dbgi = get_irn_dbg_info(node);
2798 ir_node *sel = get_Cond_selector(node);
2799 ir_mode *sel_mode = get_irn_mode(sel);
2800 ir_node *flags = NULL;
2802 ia32_condition_code_t cc;
2804 if (sel_mode != mode_b) {
2805 return create_Switch(node);
2808 /* we get flags from a Cmp */
2809 flags = get_flags_node(sel, &cc);
2811 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2812 SET_IA32_ORIG_NODE(new_node, node);
2818 * Transform a be_Copy.
2820 static ir_node *gen_be_Copy(ir_node *node)
2822 ir_node *new_node = be_duplicate_node(node);
2823 ir_mode *mode = get_irn_mode(new_node);
2825 if (ia32_mode_needs_gp_reg(mode)) {
2826 set_irn_mode(new_node, mode_Iu);
2832 static ir_node *create_Fucom(ir_node *node)
2834 dbg_info *dbgi = get_irn_dbg_info(node);
2835 ir_node *block = get_nodes_block(node);
2836 ir_node *new_block = be_transform_node(block);
2837 ir_node *left = get_Cmp_left(node);
2838 ir_node *new_left = be_transform_node(left);
2839 ir_node *right = get_Cmp_right(node);
2843 if (ia32_cg_config.use_fucomi) {
2844 new_right = be_transform_node(right);
2845 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2847 set_ia32_commutative(new_node);
2848 SET_IA32_ORIG_NODE(new_node, node);
2850 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2851 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2853 new_right = be_transform_node(right);
2854 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2857 set_ia32_commutative(new_node);
2859 SET_IA32_ORIG_NODE(new_node, node);
2861 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2862 SET_IA32_ORIG_NODE(new_node, node);
2868 static ir_node *create_Ucomi(ir_node *node)
2870 dbg_info *dbgi = get_irn_dbg_info(node);
2871 ir_node *src_block = get_nodes_block(node);
2872 ir_node *new_block = be_transform_node(src_block);
2873 ir_node *left = get_Cmp_left(node);
2874 ir_node *right = get_Cmp_right(node);
2876 ia32_address_mode_t am;
2877 ia32_address_t *addr = &am.addr;
2879 match_arguments(&am, src_block, left, right, NULL,
2880 match_commutative | match_am);
2882 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2883 addr->mem, am.new_op1, am.new_op2,
2885 set_am_attributes(new_node, &am);
2887 SET_IA32_ORIG_NODE(new_node, node);
2889 new_node = fix_mem_proj(new_node, &am);
2895 * returns true if it is assured, that the upper bits of a node are "clean"
2896 * which means for a 16 or 8 bit value, that the upper bits in the register
2897 * are 0 for unsigned and a copy of the last significant bit for signed
2900 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2902 assert(ia32_mode_needs_gp_reg(mode));
2903 if (get_mode_size_bits(mode) >= 32)
2906 if (is_Proj(transformed_node))
2907 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2909 switch (get_ia32_irn_opcode(transformed_node)) {
2910 case iro_ia32_Conv_I2I:
2911 case iro_ia32_Conv_I2I8Bit: {
2912 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2913 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2915 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2922 if (mode_is_signed(mode)) {
2923 return false; /* TODO handle signed modes */
2925 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2926 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2927 const ia32_immediate_attr_t *attr
2928 = get_ia32_immediate_attr_const(right);
2929 if (attr->symconst == 0 &&
2930 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2934 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2938 /* TODO too conservative if shift amount is constant */
2939 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2942 if (!mode_is_signed(mode)) {
2944 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2945 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2947 /* TODO if one is known to be zero extended, then || is sufficient */
2952 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2953 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2955 case iro_ia32_Const:
2956 case iro_ia32_Immediate: {
2957 const ia32_immediate_attr_t *attr =
2958 get_ia32_immediate_attr_const(transformed_node);
2959 if (mode_is_signed(mode)) {
2960 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2961 return shifted == 0 || shifted == -1;
2963 unsigned long shifted = (unsigned long)attr->offset;
2964 shifted >>= get_mode_size_bits(mode)-1;
2966 return shifted == 0;
2976 * Generate code for a Cmp.
2978 static ir_node *gen_Cmp(ir_node *node)
2980 dbg_info *dbgi = get_irn_dbg_info(node);
2981 ir_node *block = get_nodes_block(node);
2982 ir_node *new_block = be_transform_node(block);
2983 ir_node *left = get_Cmp_left(node);
2984 ir_node *right = get_Cmp_right(node);
2985 ir_mode *cmp_mode = get_irn_mode(left);
2987 ia32_address_mode_t am;
2988 ia32_address_t *addr = &am.addr;
2990 if (mode_is_float(cmp_mode)) {
2991 if (ia32_cg_config.use_sse2) {
2992 return create_Ucomi(node);
2994 return create_Fucom(node);
2998 assert(ia32_mode_needs_gp_reg(cmp_mode));
3000 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3001 if (is_Const_0(right) &&
3003 get_irn_n_edges(left) == 1) {
3004 /* Test(and_left, and_right) */
3005 ir_node *and_left = get_And_left(left);
3006 ir_node *and_right = get_And_right(left);
3008 /* matze: code here used mode instead of cmd_mode, I think it is always
3009 * the same as cmp_mode, but I leave this here to see if this is really
3012 assert(get_irn_mode(and_left) == cmp_mode);
3014 match_arguments(&am, block, and_left, and_right, NULL,
3016 match_am | match_8bit_am | match_16bit_am |
3017 match_am_and_immediates | match_immediate);
3019 /* use 32bit compare mode if possible since the opcode is smaller */
3020 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3021 upper_bits_clean(am.new_op2, cmp_mode)) {
3022 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3025 if (get_mode_size_bits(cmp_mode) == 8) {
3026 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3027 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3029 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
3030 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3033 /* Cmp(left, right) */
3034 match_arguments(&am, block, left, right, NULL,
3035 match_commutative | match_am | match_8bit_am |
3036 match_16bit_am | match_am_and_immediates |
3038 /* use 32bit compare mode if possible since the opcode is smaller */
3039 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3040 upper_bits_clean(am.new_op2, cmp_mode)) {
3041 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3044 if (get_mode_size_bits(cmp_mode) == 8) {
3045 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3046 addr->index, addr->mem, am.new_op1,
3047 am.new_op2, am.ins_permuted);
3049 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3050 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3053 set_am_attributes(new_node, &am);
3054 set_ia32_ls_mode(new_node, cmp_mode);
3056 SET_IA32_ORIG_NODE(new_node, node);
3058 new_node = fix_mem_proj(new_node, &am);
3063 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3064 ia32_condition_code_t cc)
3066 dbg_info *dbgi = get_irn_dbg_info(node);
3067 ir_node *block = get_nodes_block(node);
3068 ir_node *new_block = be_transform_node(block);
3069 ir_node *val_true = get_Mux_true(node);
3070 ir_node *val_false = get_Mux_false(node);
3072 ia32_address_mode_t am;
3073 ia32_address_t *addr;
3075 assert(ia32_cg_config.use_cmov);
3076 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3080 match_arguments(&am, block, val_false, val_true, flags,
3081 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3083 if (am.ins_permuted)
3084 cc = ia32_negate_condition_code(cc);
3086 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3087 addr->mem, am.new_op1, am.new_op2, new_flags,
3089 set_am_attributes(new_node, &am);
3091 SET_IA32_ORIG_NODE(new_node, node);
3093 new_node = fix_mem_proj(new_node, &am);
3099 * Creates a ia32 Setcc instruction.
3101 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3102 ir_node *flags, ia32_condition_code_t cc,
3105 ir_mode *mode = get_irn_mode(orig_node);
3108 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3109 SET_IA32_ORIG_NODE(new_node, orig_node);
3111 /* we might need to conv the result up */
3112 if (get_mode_size_bits(mode) > 8) {
3113 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3114 nomem, new_node, mode_Bu);
3115 SET_IA32_ORIG_NODE(new_node, orig_node);
3122 * Create instruction for an unsigned Difference or Zero.
3124 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3126 ir_mode *mode = get_irn_mode(psi);
3136 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3137 match_mode_neutral | match_am | match_immediate | match_two_users);
3139 block = get_nodes_block(new_node);
3141 if (is_Proj(new_node)) {
3142 sub = get_Proj_pred(new_node);
3145 set_irn_mode(sub, mode_T);
3146 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3148 assert(is_ia32_Sub(sub));
3149 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3151 dbgi = get_irn_dbg_info(psi);
3152 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3153 notn = new_bd_ia32_Not(dbgi, block, sbb);
3155 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3156 set_ia32_commutative(new_node);
3161 * Create an const array of two float consts.
3163 * @param c0 the first constant
3164 * @param c1 the second constant
3165 * @param new_mode IN/OUT for the mode of the constants, if NULL
3166 * smallest possible mode will be used
3168 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3171 ir_mode *mode = *new_mode;
3173 ir_initializer_t *initializer;
3174 ir_tarval *tv0 = get_Const_tarval(c0);
3175 ir_tarval *tv1 = get_Const_tarval(c1);
3178 /* detect the best mode for the constants */
3179 mode = get_tarval_mode(tv0);
3181 if (mode != mode_F) {
3182 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3183 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3185 tv0 = tarval_convert_to(tv0, mode);
3186 tv1 = tarval_convert_to(tv1, mode);
3187 } else if (mode != mode_D) {
3188 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3189 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3191 tv0 = tarval_convert_to(tv0, mode);
3192 tv1 = tarval_convert_to(tv1, mode);
3199 tp = ia32_get_prim_type(mode);
3200 tp = ia32_create_float_array(tp);
3202 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3204 set_entity_ld_ident(ent, get_entity_ident(ent));
3205 set_entity_visibility(ent, ir_visibility_private);
3206 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3208 initializer = create_initializer_compound(2);
3210 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3211 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3213 set_entity_initializer(ent, initializer);
3220 * Possible transformations for creating a Setcc.
3222 enum setcc_transform_insn {
3235 typedef struct setcc_transform {
3237 ia32_condition_code_t cc;
3239 enum setcc_transform_insn transform;
3243 } setcc_transform_t;
3246 * Setcc can only handle 0 and 1 result.
3247 * Find a transformation that creates 0 and 1 from
3250 static void find_const_transform(ia32_condition_code_t cc,
3251 ir_tarval *t, ir_tarval *f,
3252 setcc_transform_t *res)
3258 if (tarval_is_null(t)) {
3262 cc = ia32_negate_condition_code(cc);
3263 } else if (tarval_cmp(t, f) == ir_relation_less) {
3264 // now, t is the bigger one
3268 cc = ia32_negate_condition_code(cc);
3272 if (! tarval_is_null(f)) {
3273 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3276 res->steps[step].transform = SETCC_TR_ADD;
3278 if (t == tarval_bad)
3279 panic("constant subtract failed");
3280 if (! tarval_is_long(f))
3281 panic("tarval is not long");
3283 res->steps[step].val = get_tarval_long(f);
3285 f = tarval_sub(f, f, NULL);
3286 assert(tarval_is_null(f));
3289 if (tarval_is_one(t)) {
3290 res->steps[step].transform = SETCC_TR_SET;
3291 res->num_steps = ++step;
3295 if (tarval_is_minus_one(t)) {
3296 res->steps[step].transform = SETCC_TR_NEG;
3298 res->steps[step].transform = SETCC_TR_SET;
3299 res->num_steps = ++step;
3302 if (tarval_is_long(t)) {
3303 long v = get_tarval_long(t);
3305 res->steps[step].val = 0;
3308 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3310 res->steps[step].transform = SETCC_TR_LEAxx;
3311 res->steps[step].scale = 3; /* (a << 3) + a */
3314 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3316 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3317 res->steps[step].scale = 3; /* (a << 3) */
3320 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3322 res->steps[step].transform = SETCC_TR_LEAxx;
3323 res->steps[step].scale = 2; /* (a << 2) + a */
3326 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3328 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3329 res->steps[step].scale = 2; /* (a << 2) */
3332 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3334 res->steps[step].transform = SETCC_TR_LEAxx;
3335 res->steps[step].scale = 1; /* (a << 1) + a */
3338 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3340 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3341 res->steps[step].scale = 1; /* (a << 1) */
3344 res->num_steps = step;
3347 if (! tarval_is_single_bit(t)) {
3348 res->steps[step].transform = SETCC_TR_AND;
3349 res->steps[step].val = v;
3351 res->steps[step].transform = SETCC_TR_NEG;
3353 int val = get_tarval_lowest_bit(t);
3356 res->steps[step].transform = SETCC_TR_SHL;
3357 res->steps[step].scale = val;
3361 res->steps[step].transform = SETCC_TR_SET;
3362 res->num_steps = ++step;
3365 panic("tarval is not long");
3369 * Transforms a Mux node into some code sequence.
3371 * @return The transformed node.
3373 static ir_node *gen_Mux(ir_node *node)
3375 dbg_info *dbgi = get_irn_dbg_info(node);
3376 ir_node *block = get_nodes_block(node);
3377 ir_node *new_block = be_transform_node(block);
3378 ir_node *mux_true = get_Mux_true(node);
3379 ir_node *mux_false = get_Mux_false(node);
3380 ir_node *sel = get_Mux_sel(node);
3381 ir_mode *mode = get_irn_mode(node);
3385 ia32_condition_code_t cc;
3387 assert(get_irn_mode(sel) == mode_b);
3389 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3391 if (ia32_mode_needs_gp_reg(mode)) {
3392 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3395 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3396 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3400 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3401 if (mode_is_float(mode)) {
3402 ir_node *cmp_left = get_Cmp_left(sel);
3403 ir_node *cmp_right = get_Cmp_right(sel);
3404 ir_relation relation = get_Cmp_relation(sel);
3406 if (ia32_cg_config.use_sse2) {
3407 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3408 if (cmp_left == mux_true && cmp_right == mux_false) {
3409 /* Mux(a <= b, a, b) => MIN */
3410 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3411 match_commutative | match_am | match_two_users);
3412 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3413 /* Mux(a <= b, b, a) => MAX */
3414 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3415 match_commutative | match_am | match_two_users);
3417 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3418 if (cmp_left == mux_true && cmp_right == mux_false) {
3419 /* Mux(a >= b, a, b) => MAX */
3420 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3421 match_commutative | match_am | match_two_users);
3422 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3423 /* Mux(a >= b, b, a) => MIN */
3424 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3425 match_commutative | match_am | match_two_users);
3430 if (is_Const(mux_true) && is_Const(mux_false)) {
3431 ia32_address_mode_t am;
3436 flags = get_flags_node(sel, &cc);
3437 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3439 if (ia32_cg_config.use_sse2) {
3440 /* cannot load from different mode on SSE */
3443 /* x87 can load any mode */
3447 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3449 if (new_mode == mode_F) {
3451 } else if (new_mode == mode_D) {
3453 } else if (new_mode == ia32_mode_E) {
3454 /* arg, shift 16 NOT supported */
3456 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3458 panic("Unsupported constant size");
3461 am.ls_mode = new_mode;
3462 am.addr.base = get_symconst_base();
3463 am.addr.index = new_node;
3464 am.addr.mem = nomem;
3466 am.addr.scale = scale;
3467 am.addr.use_frame = 0;
3468 am.addr.tls_segment = false;
3469 am.addr.frame_entity = NULL;
3470 am.addr.symconst_sign = 0;
3471 am.mem_proj = am.addr.mem;
3472 am.op_type = ia32_AddrModeS;
3475 am.pinned = op_pin_state_floats;
3477 am.ins_permuted = false;
3479 if (ia32_cg_config.use_sse2)
3480 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3482 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3483 set_am_attributes(load, &am);
3485 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3487 panic("cannot transform floating point Mux");
3490 assert(ia32_mode_needs_gp_reg(mode));
3493 ir_node *cmp_left = get_Cmp_left(sel);
3494 ir_node *cmp_right = get_Cmp_right(sel);
3495 ir_relation relation = get_Cmp_relation(sel);
3496 ir_node *val_true = mux_true;
3497 ir_node *val_false = mux_false;
3499 if (is_Const(val_true) && is_Const_null(val_true)) {
3500 ir_node *tmp = val_false;
3501 val_false = val_true;
3503 relation = get_negated_relation(relation);
3505 if (is_Const_0(val_false) && is_Sub(val_true)) {
3506 if ((relation & ir_relation_greater)
3507 && get_Sub_left(val_true) == cmp_left
3508 && get_Sub_right(val_true) == cmp_right) {
3509 return create_doz(node, cmp_left, cmp_right);
3511 if ((relation & ir_relation_less)
3512 && get_Sub_left(val_true) == cmp_right
3513 && get_Sub_right(val_true) == cmp_left) {
3514 return create_doz(node, cmp_right, cmp_left);
3519 flags = get_flags_node(sel, &cc);
3521 if (is_Const(mux_true) && is_Const(mux_false)) {
3522 /* both are const, good */
3523 ir_tarval *tv_true = get_Const_tarval(mux_true);
3524 ir_tarval *tv_false = get_Const_tarval(mux_false);
3525 setcc_transform_t res;
3528 find_const_transform(cc, tv_true, tv_false, &res);
3530 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3533 switch (res.steps[step].transform) {
3535 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3536 add_ia32_am_offs_int(new_node, res.steps[step].val);
3538 case SETCC_TR_ADDxx:
3539 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3542 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3543 set_ia32_am_scale(new_node, res.steps[step].scale);
3544 set_ia32_am_offs_int(new_node, res.steps[step].val);
3546 case SETCC_TR_LEAxx:
3547 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3548 set_ia32_am_scale(new_node, res.steps[step].scale);
3549 set_ia32_am_offs_int(new_node, res.steps[step].val);
3552 imm = ia32_immediate_from_long(res.steps[step].scale);
3553 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3556 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3559 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3562 imm = ia32_immediate_from_long(res.steps[step].val);
3563 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3566 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3569 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3572 panic("unknown setcc transform");
3576 new_node = create_CMov(node, sel, flags, cc);
3583 * Create a conversion from x87 state register to general purpose.
3585 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3587 ir_node *block = be_transform_node(get_nodes_block(node));
3588 ir_node *op = get_Conv_op(node);
3589 ir_node *new_op = be_transform_node(op);
3590 ir_graph *irg = current_ir_graph;
3591 dbg_info *dbgi = get_irn_dbg_info(node);
3592 ir_mode *mode = get_irn_mode(node);
3593 ir_node *frame = get_irg_frame(irg);
3594 ir_node *fist, *load, *mem;
3596 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3597 set_irn_pinned(fist, op_pin_state_floats);
3598 set_ia32_use_frame(fist);
3599 set_ia32_op_type(fist, ia32_AddrModeD);
3601 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3602 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3604 assert(get_mode_size_bits(mode) <= 32);
3605 /* exception we can only store signed 32 bit integers, so for unsigned
3606 we store a 64bit (signed) integer and load the lower bits */
3607 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3608 set_ia32_ls_mode(fist, mode_Ls);
3610 set_ia32_ls_mode(fist, mode_Is);
3612 SET_IA32_ORIG_NODE(fist, node);
3615 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3617 set_irn_pinned(load, op_pin_state_floats);
3618 set_ia32_use_frame(load);
3619 set_ia32_op_type(load, ia32_AddrModeS);
3620 set_ia32_ls_mode(load, mode_Is);
3621 if (get_ia32_ls_mode(fist) == mode_Ls) {
3622 ia32_attr_t *attr = get_ia32_attr(load);
3623 attr->data.need_64bit_stackent = 1;
3625 ia32_attr_t *attr = get_ia32_attr(load);
3626 attr->data.need_32bit_stackent = 1;
3628 SET_IA32_ORIG_NODE(load, node);
3630 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3634 * Creates a x87 strict Conv by placing a Store and a Load
3636 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3638 ir_node *block = get_nodes_block(node);
3639 ir_graph *irg = get_Block_irg(block);
3640 dbg_info *dbgi = get_irn_dbg_info(node);
3641 ir_node *frame = get_irg_frame(irg);
3643 ir_node *store, *load;
3646 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3647 set_ia32_use_frame(store);
3648 set_ia32_op_type(store, ia32_AddrModeD);
3649 SET_IA32_ORIG_NODE(store, node);
3651 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3653 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3654 set_ia32_use_frame(load);
3655 set_ia32_op_type(load, ia32_AddrModeS);
3656 SET_IA32_ORIG_NODE(load, node);
3658 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_vfld_res);
3662 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3663 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3665 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3667 func = get_mode_size_bits(mode) == 8 ?
3668 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3669 return func(dbgi, block, base, index, mem, val, mode);
3673 * Create a conversion from general purpose to x87 register
3675 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3677 ir_node *src_block = get_nodes_block(node);
3678 ir_node *block = be_transform_node(src_block);
3679 ir_graph *irg = get_Block_irg(block);
3680 dbg_info *dbgi = get_irn_dbg_info(node);
3681 ir_node *op = get_Conv_op(node);
3682 ir_node *new_op = NULL;
3684 ir_mode *store_mode;
3690 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3691 if (possible_int_mode_for_fp(src_mode)) {
3692 ia32_address_mode_t am;
3694 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3695 if (am.op_type == ia32_AddrModeS) {
3696 ia32_address_t *addr = &am.addr;
3698 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3699 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3701 set_am_attributes(fild, &am);
3702 SET_IA32_ORIG_NODE(fild, node);
3704 fix_mem_proj(fild, &am);
3709 if (new_op == NULL) {
3710 new_op = be_transform_node(op);
3713 mode = get_irn_mode(op);
3715 /* first convert to 32 bit signed if necessary */
3716 if (get_mode_size_bits(src_mode) < 32) {
3717 if (!upper_bits_clean(new_op, src_mode)) {
3718 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3719 SET_IA32_ORIG_NODE(new_op, node);
3724 assert(get_mode_size_bits(mode) == 32);
3727 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3729 set_ia32_use_frame(store);
3730 set_ia32_op_type(store, ia32_AddrModeD);
3731 set_ia32_ls_mode(store, mode_Iu);
3733 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3735 /* exception for 32bit unsigned, do a 64bit spill+load */
3736 if (!mode_is_signed(mode)) {
3739 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3741 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3742 noreg_GP, nomem, zero_const);
3743 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3745 set_ia32_use_frame(zero_store);
3746 set_ia32_op_type(zero_store, ia32_AddrModeD);
3747 add_ia32_am_offs_int(zero_store, 4);
3748 set_ia32_ls_mode(zero_store, mode_Iu);
3750 in[0] = zero_store_mem;
3753 store_mem = new_rd_Sync(dbgi, block, 2, in);
3754 store_mode = mode_Ls;
3756 store_mode = mode_Is;
3760 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3762 set_ia32_use_frame(fild);
3763 set_ia32_op_type(fild, ia32_AddrModeS);
3764 set_ia32_ls_mode(fild, store_mode);
3766 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3772 * Create a conversion from one integer mode into another one
3774 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3775 dbg_info *dbgi, ir_node *block, ir_node *op,
3778 ir_node *new_block = be_transform_node(block);
3780 ir_mode *smaller_mode;
3781 ia32_address_mode_t am;
3782 ia32_address_t *addr = &am.addr;
3785 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3786 smaller_mode = src_mode;
3788 smaller_mode = tgt_mode;
3791 #ifdef DEBUG_libfirm
3793 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3798 match_arguments(&am, block, NULL, op, NULL,
3799 match_am | match_8bit_am | match_16bit_am);
3801 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3802 /* unnecessary conv. in theory it shouldn't have been AM */
3803 assert(is_ia32_NoReg_GP(addr->base));
3804 assert(is_ia32_NoReg_GP(addr->index));
3805 assert(is_NoMem(addr->mem));
3806 assert(am.addr.offset == 0);
3807 assert(am.addr.symconst_ent == NULL);
3811 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3812 addr->mem, am.new_op2, smaller_mode);
3813 set_am_attributes(new_node, &am);
3814 /* match_arguments assume that out-mode = in-mode, this isn't true here
3816 set_ia32_ls_mode(new_node, smaller_mode);
3817 SET_IA32_ORIG_NODE(new_node, node);
3818 new_node = fix_mem_proj(new_node, &am);
3823 * Transforms a Conv node.
3825 * @return The created ia32 Conv node
3827 static ir_node *gen_Conv(ir_node *node)
3829 ir_node *block = get_nodes_block(node);
3830 ir_node *new_block = be_transform_node(block);
3831 ir_node *op = get_Conv_op(node);
3832 ir_node *new_op = NULL;
3833 dbg_info *dbgi = get_irn_dbg_info(node);
3834 ir_mode *src_mode = get_irn_mode(op);
3835 ir_mode *tgt_mode = get_irn_mode(node);
3836 int src_bits = get_mode_size_bits(src_mode);
3837 int tgt_bits = get_mode_size_bits(tgt_mode);
3838 ir_node *res = NULL;
3840 assert(!mode_is_int(src_mode) || src_bits <= 32);
3841 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3843 /* modeB -> X should already be lowered by the lower_mode_b pass */
3844 if (src_mode == mode_b) {
3845 panic("ConvB not lowered %+F", node);
3848 if (src_mode == tgt_mode) {
3849 if (get_Conv_strict(node)) {
3850 if (ia32_cg_config.use_sse2) {
3851 /* when we are in SSE mode, we can kill all strict no-op conversion */
3852 return be_transform_node(op);
3855 /* this should be optimized already, but who knows... */
3856 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3857 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3858 return be_transform_node(op);
3862 if (mode_is_float(src_mode)) {
3863 new_op = be_transform_node(op);
3864 /* we convert from float ... */
3865 if (mode_is_float(tgt_mode)) {
3867 if (ia32_cg_config.use_sse2) {
3868 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3869 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3871 set_ia32_ls_mode(res, tgt_mode);
3873 if (get_Conv_strict(node)) {
3874 /* if fp_no_float_fold is not set then we assume that we
3875 * don't have any float operations in a non
3876 * mode_float_arithmetic mode and can skip strict upconvs */
3877 if (src_bits < tgt_bits) {
3878 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3881 res = gen_x87_strict_conv(tgt_mode, new_op);
3882 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3886 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3891 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3892 if (ia32_cg_config.use_sse2) {
3893 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3895 set_ia32_ls_mode(res, src_mode);
3897 return gen_x87_fp_to_gp(node);
3901 /* we convert from int ... */
3902 if (mode_is_float(tgt_mode)) {
3904 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3905 if (ia32_cg_config.use_sse2) {
3906 new_op = be_transform_node(op);
3907 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3909 set_ia32_ls_mode(res, tgt_mode);
3911 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3912 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3913 res = gen_x87_gp_to_fp(node, src_mode);
3915 /* we need a strict-Conv, if the int mode has more bits than the
3917 if (float_mantissa < int_mantissa) {
3918 res = gen_x87_strict_conv(tgt_mode, res);
3919 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3923 } else if (tgt_mode == mode_b) {
3924 /* mode_b lowering already took care that we only have 0/1 values */
3925 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3926 src_mode, tgt_mode));
3927 return be_transform_node(op);
3930 if (src_bits == tgt_bits) {
3931 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3932 src_mode, tgt_mode));
3933 return be_transform_node(op);
3936 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3944 static ir_node *create_immediate_or_transform(ir_node *node,
3945 char immediate_constraint_type)
3947 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3948 if (new_node == NULL) {
3949 new_node = be_transform_node(node);
3955 * Transforms a FrameAddr into an ia32 Add.
3957 static ir_node *gen_be_FrameAddr(ir_node *node)
3959 ir_node *block = be_transform_node(get_nodes_block(node));
3960 ir_node *op = be_get_FrameAddr_frame(node);
3961 ir_node *new_op = be_transform_node(op);
3962 dbg_info *dbgi = get_irn_dbg_info(node);
3965 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3966 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3967 set_ia32_use_frame(new_node);
3969 SET_IA32_ORIG_NODE(new_node, node);
3975 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3977 static ir_node *gen_be_Return(ir_node *node)
3979 ir_graph *irg = current_ir_graph;
3980 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
3981 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
3982 ir_node *new_ret_val = be_transform_node(ret_val);
3983 ir_node *new_ret_mem = be_transform_node(ret_mem);
3984 ir_entity *ent = get_irg_entity(irg);
3985 ir_type *tp = get_entity_type(ent);
3986 dbg_info *dbgi = get_irn_dbg_info(node);
3987 ir_node *block = be_transform_node(get_nodes_block(node));
4001 assert(ret_val != NULL);
4002 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4003 return be_duplicate_node(node);
4006 res_type = get_method_res_type(tp, 0);
4008 if (! is_Primitive_type(res_type)) {
4009 return be_duplicate_node(node);
4012 mode = get_type_mode(res_type);
4013 if (! mode_is_float(mode)) {
4014 return be_duplicate_node(node);
4017 assert(get_method_n_ress(tp) == 1);
4019 frame = get_irg_frame(irg);
4021 /* store xmm0 onto stack */
4022 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4023 new_ret_mem, new_ret_val);
4024 set_ia32_ls_mode(sse_store, mode);
4025 set_ia32_op_type(sse_store, ia32_AddrModeD);
4026 set_ia32_use_frame(sse_store);
4027 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4029 /* load into x87 register */
4030 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4031 set_ia32_op_type(fld, ia32_AddrModeS);
4032 set_ia32_use_frame(fld);
4034 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4035 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4037 /* create a new return */
4038 arity = get_irn_arity(node);
4039 in = ALLOCAN(ir_node*, arity);
4040 pop = be_Return_get_pop(node);
4041 for (i = 0; i < arity; ++i) {
4042 ir_node *op = get_irn_n(node, i);
4043 if (op == ret_val) {
4045 } else if (op == ret_mem) {
4048 in[i] = be_transform_node(op);
4051 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4052 copy_node_attr(irg, node, new_node);
4058 * Transform a be_AddSP into an ia32_SubSP.
4060 static ir_node *gen_be_AddSP(ir_node *node)
4062 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4063 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4065 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4066 match_am | match_immediate);
4067 assert(is_ia32_SubSP(new_node));
4068 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4069 &ia32_registers[REG_ESP]);
4074 * Transform a be_SubSP into an ia32_AddSP
4076 static ir_node *gen_be_SubSP(ir_node *node)
4078 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4079 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4081 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4082 match_am | match_immediate);
4083 assert(is_ia32_AddSP(new_node));
4084 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4085 &ia32_registers[REG_ESP]);
4090 * Change some phi modes
4092 static ir_node *gen_Phi(ir_node *node)
4094 const arch_register_req_t *req;
4095 ir_node *block = be_transform_node(get_nodes_block(node));
4096 ir_graph *irg = current_ir_graph;
4097 dbg_info *dbgi = get_irn_dbg_info(node);
4098 ir_mode *mode = get_irn_mode(node);
4101 if (ia32_mode_needs_gp_reg(mode)) {
4102 /* we shouldn't have any 64bit stuff around anymore */
4103 assert(get_mode_size_bits(mode) <= 32);
4104 /* all integer operations are on 32bit registers now */
4106 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4107 } else if (mode_is_float(mode)) {
4108 if (ia32_cg_config.use_sse2) {
4110 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4113 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4116 req = arch_no_register_req;
4119 /* phi nodes allow loops, so we use the old arguments for now
4120 * and fix this later */
4121 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4122 get_irn_in(node) + 1);
4123 copy_node_attr(irg, node, phi);
4124 be_duplicate_deps(node, phi);
4126 arch_set_irn_register_req_out(phi, 0, req);
4128 be_enqueue_preds(node);
4133 static ir_node *gen_Jmp(ir_node *node)
4135 ir_node *block = get_nodes_block(node);
4136 ir_node *new_block = be_transform_node(block);
4137 dbg_info *dbgi = get_irn_dbg_info(node);
4140 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4141 SET_IA32_ORIG_NODE(new_node, node);
4149 static ir_node *gen_IJmp(ir_node *node)
4151 ir_node *block = get_nodes_block(node);
4152 ir_node *new_block = be_transform_node(block);
4153 dbg_info *dbgi = get_irn_dbg_info(node);
4154 ir_node *op = get_IJmp_target(node);
4156 ia32_address_mode_t am;
4157 ia32_address_t *addr = &am.addr;
4159 assert(get_irn_mode(op) == mode_P);
4161 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4163 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4164 addr->mem, am.new_op2);
4165 set_am_attributes(new_node, &am);
4166 SET_IA32_ORIG_NODE(new_node, node);
4168 new_node = fix_mem_proj(new_node, &am);
4173 static ir_node *gen_ia32_l_Add(ir_node *node)
4175 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4176 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4177 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4178 match_commutative | match_am | match_immediate |
4179 match_mode_neutral);
4181 if (is_Proj(lowered)) {
4182 lowered = get_Proj_pred(lowered);
4184 assert(is_ia32_Add(lowered));
4185 set_irn_mode(lowered, mode_T);
4191 static ir_node *gen_ia32_l_Adc(ir_node *node)
4193 return gen_binop_flags(node, new_bd_ia32_Adc,
4194 match_commutative | match_am | match_immediate |
4195 match_mode_neutral);
4199 * Transforms a l_MulS into a "real" MulS node.
4201 * @return the created ia32 Mul node
4203 static ir_node *gen_ia32_l_Mul(ir_node *node)
4205 ir_node *left = get_binop_left(node);
4206 ir_node *right = get_binop_right(node);
4208 return gen_binop(node, left, right, new_bd_ia32_Mul,
4209 match_commutative | match_am | match_mode_neutral);
4213 * Transforms a l_IMulS into a "real" IMul1OPS node.
4215 * @return the created ia32 IMul1OP node
4217 static ir_node *gen_ia32_l_IMul(ir_node *node)
4219 ir_node *left = get_binop_left(node);
4220 ir_node *right = get_binop_right(node);
4222 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4223 match_commutative | match_am | match_mode_neutral);
4226 static ir_node *gen_ia32_l_Sub(ir_node *node)
4228 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4229 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4230 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4231 match_am | match_immediate | match_mode_neutral);
4233 if (is_Proj(lowered)) {
4234 lowered = get_Proj_pred(lowered);
4236 assert(is_ia32_Sub(lowered));
4237 set_irn_mode(lowered, mode_T);
4243 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4245 return gen_binop_flags(node, new_bd_ia32_Sbb,
4246 match_am | match_immediate | match_mode_neutral);
4249 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4251 ir_node *src_block = get_nodes_block(node);
4252 ir_node *block = be_transform_node(src_block);
4253 ir_graph *irg = current_ir_graph;
4254 dbg_info *dbgi = get_irn_dbg_info(node);
4255 ir_node *frame = get_irg_frame(irg);
4256 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4257 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4258 ir_node *new_val_low = be_transform_node(val_low);
4259 ir_node *new_val_high = be_transform_node(val_high);
4261 ir_node *sync, *fild, *res;
4263 ir_node *store_high;
4267 if (ia32_cg_config.use_sse2) {
4268 panic("ia32_l_LLtoFloat not implemented for SSE2");
4272 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4274 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4276 SET_IA32_ORIG_NODE(store_low, node);
4277 SET_IA32_ORIG_NODE(store_high, node);
4279 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4280 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4282 set_ia32_use_frame(store_low);
4283 set_ia32_use_frame(store_high);
4284 set_ia32_op_type(store_low, ia32_AddrModeD);
4285 set_ia32_op_type(store_high, ia32_AddrModeD);
4286 set_ia32_ls_mode(store_low, mode_Iu);
4287 set_ia32_ls_mode(store_high, mode_Is);
4288 add_ia32_am_offs_int(store_high, 4);
4292 sync = new_rd_Sync(dbgi, block, 2, in);
4295 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4297 set_ia32_use_frame(fild);
4298 set_ia32_op_type(fild, ia32_AddrModeS);
4299 set_ia32_ls_mode(fild, mode_Ls);
4301 SET_IA32_ORIG_NODE(fild, node);
4303 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4305 if (! mode_is_signed(get_irn_mode(val_high))) {
4306 ia32_address_mode_t am;
4308 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4311 am.addr.base = get_symconst_base();
4312 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4313 am.addr.mem = nomem;
4316 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4317 am.addr.tls_segment = false;
4318 am.addr.use_frame = 0;
4319 am.addr.frame_entity = NULL;
4320 am.addr.symconst_sign = 0;
4321 am.ls_mode = mode_F;
4322 am.mem_proj = nomem;
4323 am.op_type = ia32_AddrModeS;
4325 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4326 am.pinned = op_pin_state_floats;
4328 am.ins_permuted = false;
4330 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4331 am.new_op1, am.new_op2, get_fpcw());
4332 set_am_attributes(fadd, &am);
4334 set_irn_mode(fadd, mode_T);
4335 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4340 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4342 ir_node *src_block = get_nodes_block(node);
4343 ir_node *block = be_transform_node(src_block);
4344 ir_graph *irg = get_Block_irg(block);
4345 dbg_info *dbgi = get_irn_dbg_info(node);
4346 ir_node *frame = get_irg_frame(irg);
4347 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4348 ir_node *new_val = be_transform_node(val);
4351 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4352 SET_IA32_ORIG_NODE(fist, node);
4353 set_ia32_use_frame(fist);
4354 set_ia32_op_type(fist, ia32_AddrModeD);
4355 set_ia32_ls_mode(fist, mode_Ls);
4357 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4358 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4361 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4363 ir_node *block = be_transform_node(get_nodes_block(node));
4364 ir_graph *irg = get_Block_irg(block);
4365 ir_node *pred = get_Proj_pred(node);
4366 ir_node *new_pred = be_transform_node(pred);
4367 ir_node *frame = get_irg_frame(irg);
4368 dbg_info *dbgi = get_irn_dbg_info(node);
4369 long pn = get_Proj_proj(node);
4374 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4375 SET_IA32_ORIG_NODE(load, node);
4376 set_ia32_use_frame(load);
4377 set_ia32_op_type(load, ia32_AddrModeS);
4378 set_ia32_ls_mode(load, mode_Iu);
4379 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4380 * 32 bit from it with this particular load */
4381 attr = get_ia32_attr(load);
4382 attr->data.need_64bit_stackent = 1;
4384 if (pn == pn_ia32_l_FloattoLL_res_high) {
4385 add_ia32_am_offs_int(load, 4);
4387 assert(pn == pn_ia32_l_FloattoLL_res_low);
4390 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4396 * Transform the Projs of an AddSP.
4398 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4400 ir_node *pred = get_Proj_pred(node);
4401 ir_node *new_pred = be_transform_node(pred);
4402 dbg_info *dbgi = get_irn_dbg_info(node);
4403 long proj = get_Proj_proj(node);
4405 if (proj == pn_be_AddSP_sp) {
4406 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4407 pn_ia32_SubSP_stack);
4408 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4410 } else if (proj == pn_be_AddSP_res) {
4411 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4412 pn_ia32_SubSP_addr);
4413 } else if (proj == pn_be_AddSP_M) {
4414 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4417 panic("No idea how to transform proj->AddSP");
4421 * Transform the Projs of a SubSP.
4423 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4425 ir_node *pred = get_Proj_pred(node);
4426 ir_node *new_pred = be_transform_node(pred);
4427 dbg_info *dbgi = get_irn_dbg_info(node);
4428 long proj = get_Proj_proj(node);
4430 if (proj == pn_be_SubSP_sp) {
4431 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4432 pn_ia32_AddSP_stack);
4433 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4435 } else if (proj == pn_be_SubSP_M) {
4436 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4439 panic("No idea how to transform proj->SubSP");
4443 * Transform and renumber the Projs from a Load.
4445 static ir_node *gen_Proj_Load(ir_node *node)
4448 ir_node *pred = get_Proj_pred(node);
4449 dbg_info *dbgi = get_irn_dbg_info(node);
4450 long proj = get_Proj_proj(node);
4452 /* loads might be part of source address mode matches, so we don't
4453 * transform the ProjMs yet (with the exception of loads whose result is
4456 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4459 /* this is needed, because sometimes we have loops that are only
4460 reachable through the ProjM */
4461 be_enqueue_preds(node);
4462 /* do it in 2 steps, to silence firm verifier */
4463 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4464 set_Proj_proj(res, pn_ia32_mem);
4468 /* renumber the proj */
4469 new_pred = be_transform_node(pred);
4470 if (is_ia32_Load(new_pred)) {
4471 switch ((pn_Load)proj) {
4473 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4475 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4476 case pn_Load_X_except:
4477 /* This Load might raise an exception. Mark it. */
4478 set_ia32_exc_label(new_pred, 1);
4479 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4480 case pn_Load_X_regular:
4481 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4483 } else if (is_ia32_Conv_I2I(new_pred) ||
4484 is_ia32_Conv_I2I8Bit(new_pred)) {
4485 set_irn_mode(new_pred, mode_T);
4486 switch ((pn_Load)proj) {
4488 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4490 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4491 case pn_Load_X_except:
4492 /* This Load might raise an exception. Mark it. */
4493 set_ia32_exc_label(new_pred, 1);
4494 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4495 case pn_Load_X_regular:
4496 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4498 } else if (is_ia32_xLoad(new_pred)) {
4499 switch ((pn_Load)proj) {
4501 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4503 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4504 case pn_Load_X_except:
4505 /* This Load might raise an exception. Mark it. */
4506 set_ia32_exc_label(new_pred, 1);
4507 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4508 case pn_Load_X_regular:
4509 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4511 } else if (is_ia32_vfld(new_pred)) {
4512 switch ((pn_Load)proj) {
4514 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4516 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4517 case pn_Load_X_except:
4518 /* This Load might raise an exception. Mark it. */
4519 set_ia32_exc_label(new_pred, 1);
4520 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4521 case pn_Load_X_regular:
4522 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4525 /* can happen for ProJMs when source address mode happened for the
4528 /* however it should not be the result proj, as that would mean the
4529 load had multiple users and should not have been used for
4531 if (proj != pn_Load_M) {
4532 panic("internal error: transformed node not a Load");
4534 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4537 panic("No idea how to transform Proj(Load) %+F", node);
4540 static ir_node *gen_Proj_Store(ir_node *node)
4542 ir_node *pred = get_Proj_pred(node);
4543 ir_node *new_pred = be_transform_node(pred);
4544 dbg_info *dbgi = get_irn_dbg_info(node);
4545 long pn = get_Proj_proj(node);
4547 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4548 switch ((pn_Store)pn) {
4550 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4551 case pn_Store_X_except:
4552 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4553 case pn_Store_X_regular:
4554 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4556 } else if (is_ia32_vfist(new_pred)) {
4557 switch ((pn_Store)pn) {
4559 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4560 case pn_Store_X_except:
4561 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4562 case pn_Store_X_regular:
4563 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4565 } else if (is_ia32_vfisttp(new_pred)) {
4566 switch ((pn_Store)pn) {
4568 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4569 case pn_Store_X_except:
4570 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4571 case pn_Store_X_regular:
4572 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4574 } else if (is_ia32_vfst(new_pred)) {
4575 switch ((pn_Store)pn) {
4577 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4578 case pn_Store_X_except:
4579 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4580 case pn_Store_X_regular:
4581 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4583 } else if (is_ia32_xStore(new_pred)) {
4584 switch ((pn_Store)pn) {
4586 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4587 case pn_Store_X_except:
4588 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4589 case pn_Store_X_regular:
4590 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4592 } else if (is_Sync(new_pred)) {
4593 /* hack for the case that gen_float_const_Store produced a Sync */
4594 if (pn == pn_Store_M) {
4597 panic("exception control flow for gen_float_const_Store not implemented yet");
4598 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4599 /* destination address mode */
4600 if (pn == pn_Store_M) {
4603 panic("exception control flow for destination AM not implemented yet");
4606 panic("No idea how to transform Proj(Store) %+F", node);
4610 * Transform and renumber the Projs from a Div or Mod instruction.
4612 static ir_node *gen_Proj_Div(ir_node *node)
4614 ir_node *pred = get_Proj_pred(node);
4615 ir_node *new_pred = be_transform_node(pred);
4616 dbg_info *dbgi = get_irn_dbg_info(node);
4617 long proj = get_Proj_proj(node);
4619 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4620 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4622 switch ((pn_Div)proj) {
4624 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4625 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4626 } else if (is_ia32_xDiv(new_pred)) {
4627 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4628 } else if (is_ia32_vfdiv(new_pred)) {
4629 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4631 panic("Div transformed to unexpected thing %+F", new_pred);
4634 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4635 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4636 } else if (is_ia32_xDiv(new_pred)) {
4637 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4638 } else if (is_ia32_vfdiv(new_pred)) {
4639 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4641 panic("Div transformed to unexpected thing %+F", new_pred);
4643 case pn_Div_X_except:
4644 set_ia32_exc_label(new_pred, 1);
4645 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4646 case pn_Div_X_regular:
4647 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4650 panic("No idea how to transform proj->Div");
4654 * Transform and renumber the Projs from a Div or Mod instruction.
4656 static ir_node *gen_Proj_Mod(ir_node *node)
4658 ir_node *pred = get_Proj_pred(node);
4659 ir_node *new_pred = be_transform_node(pred);
4660 dbg_info *dbgi = get_irn_dbg_info(node);
4661 long proj = get_Proj_proj(node);
4663 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4664 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4665 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4667 switch ((pn_Mod)proj) {
4669 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4671 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4672 case pn_Mod_X_except:
4673 set_ia32_exc_label(new_pred, 1);
4674 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4675 case pn_Mod_X_regular:
4676 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4678 panic("No idea how to transform proj->Mod");
4682 * Transform and renumber the Projs from a CopyB.
4684 static ir_node *gen_Proj_CopyB(ir_node *node)
4686 ir_node *pred = get_Proj_pred(node);
4687 ir_node *new_pred = be_transform_node(pred);
4688 dbg_info *dbgi = get_irn_dbg_info(node);
4689 long proj = get_Proj_proj(node);
4691 switch ((pn_CopyB)proj) {
4693 if (is_ia32_CopyB_i(new_pred)) {
4694 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4695 } else if (is_ia32_CopyB(new_pred)) {
4696 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4699 case pn_CopyB_X_regular:
4700 if (is_ia32_CopyB_i(new_pred)) {
4701 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4702 } else if (is_ia32_CopyB(new_pred)) {
4703 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4706 case pn_CopyB_X_except:
4707 if (is_ia32_CopyB_i(new_pred)) {
4708 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4709 } else if (is_ia32_CopyB(new_pred)) {
4710 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4715 panic("No idea how to transform proj->CopyB");
4718 static ir_node *gen_be_Call(ir_node *node)
4720 dbg_info *const dbgi = get_irn_dbg_info(node);
4721 ir_node *const src_block = get_nodes_block(node);
4722 ir_node *const block = be_transform_node(src_block);
4723 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4724 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4725 ir_node *const sp = be_transform_node(src_sp);
4726 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4727 ia32_address_mode_t am;
4728 ia32_address_t *const addr = &am.addr;
4733 ir_node * eax = noreg_GP;
4734 ir_node * ecx = noreg_GP;
4735 ir_node * edx = noreg_GP;
4736 unsigned const pop = be_Call_get_pop(node);
4737 ir_type *const call_tp = be_Call_get_type(node);
4738 int old_no_pic_adjust;
4739 int throws_exception = ir_throws_exception(node);
4741 /* Run the x87 simulator if the call returns a float value */
4742 if (get_method_n_ress(call_tp) > 0) {
4743 ir_type *const res_type = get_method_res_type(call_tp, 0);
4744 ir_mode *const res_mode = get_type_mode(res_type);
4746 if (res_mode != NULL && mode_is_float(res_mode)) {
4747 ir_graph *irg = current_ir_graph;
4748 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4749 irg_data->do_x87_sim = 1;
4753 /* We do not want be_Call direct calls */
4754 assert(be_Call_get_entity(node) == NULL);
4756 /* special case for PIC trampoline calls */
4757 old_no_pic_adjust = ia32_no_pic_adjust;
4758 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4760 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4761 match_am | match_immediate);
4763 ia32_no_pic_adjust = old_no_pic_adjust;
4765 i = get_irn_arity(node) - 1;
4766 fpcw = be_transform_node(get_irn_n(node, i--));
4767 for (; i >= n_be_Call_first_arg; --i) {
4768 arch_register_req_t const *const req
4769 = arch_get_irn_register_req_in(node, i);
4770 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4772 assert(req->type == arch_register_req_type_limited);
4773 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4775 switch (*req->limited) {
4776 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4777 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4778 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4779 default: panic("Invalid GP register for register parameter");
4783 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4784 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4785 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4786 ir_set_throws_exception(call, throws_exception);
4787 set_am_attributes(call, &am);
4788 call = fix_mem_proj(call, &am);
4790 if (get_irn_pinned(node) == op_pin_state_pinned)
4791 set_irn_pinned(call, op_pin_state_pinned);
4793 SET_IA32_ORIG_NODE(call, node);
4795 if (ia32_cg_config.use_sse2) {
4796 /* remember this call for post-processing */
4797 ARR_APP1(ir_node *, call_list, call);
4798 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4805 * Transform Builtin trap
4807 static ir_node *gen_trap(ir_node *node)
4809 dbg_info *dbgi = get_irn_dbg_info(node);
4810 ir_node *block = be_transform_node(get_nodes_block(node));
4811 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4813 return new_bd_ia32_UD2(dbgi, block, mem);
4817 * Transform Builtin debugbreak
4819 static ir_node *gen_debugbreak(ir_node *node)
4821 dbg_info *dbgi = get_irn_dbg_info(node);
4822 ir_node *block = be_transform_node(get_nodes_block(node));
4823 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4825 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4829 * Transform Builtin return_address
4831 static ir_node *gen_return_address(ir_node *node)
4833 ir_node *param = get_Builtin_param(node, 0);
4834 ir_node *frame = get_Builtin_param(node, 1);
4835 dbg_info *dbgi = get_irn_dbg_info(node);
4836 ir_tarval *tv = get_Const_tarval(param);
4837 ir_graph *irg = get_irn_irg(node);
4838 unsigned long value = get_tarval_long(tv);
4840 ir_node *block = be_transform_node(get_nodes_block(node));
4841 ir_node *ptr = be_transform_node(frame);
4845 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4846 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4847 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4850 /* load the return address from this frame */
4851 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4853 set_irn_pinned(load, get_irn_pinned(node));
4854 set_ia32_op_type(load, ia32_AddrModeS);
4855 set_ia32_ls_mode(load, mode_Iu);
4857 set_ia32_am_offs_int(load, 0);
4858 set_ia32_use_frame(load);
4859 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4861 if (get_irn_pinned(node) == op_pin_state_floats) {
4862 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4863 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4864 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4865 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4868 SET_IA32_ORIG_NODE(load, node);
4869 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4873 * Transform Builtin frame_address
4875 static ir_node *gen_frame_address(ir_node *node)
4877 ir_node *param = get_Builtin_param(node, 0);
4878 ir_node *frame = get_Builtin_param(node, 1);
4879 dbg_info *dbgi = get_irn_dbg_info(node);
4880 ir_tarval *tv = get_Const_tarval(param);
4881 ir_graph *irg = get_irn_irg(node);
4882 unsigned long value = get_tarval_long(tv);
4884 ir_node *block = be_transform_node(get_nodes_block(node));
4885 ir_node *ptr = be_transform_node(frame);
4890 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4891 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4892 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4895 /* load the frame address from this frame */
4896 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4898 set_irn_pinned(load, get_irn_pinned(node));
4899 set_ia32_op_type(load, ia32_AddrModeS);
4900 set_ia32_ls_mode(load, mode_Iu);
4902 ent = ia32_get_frame_address_entity(irg);
4904 set_ia32_am_offs_int(load, 0);
4905 set_ia32_use_frame(load);
4906 set_ia32_frame_ent(load, ent);
4908 /* will fail anyway, but gcc does this: */
4909 set_ia32_am_offs_int(load, 0);
4912 if (get_irn_pinned(node) == op_pin_state_floats) {
4913 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4914 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4915 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4916 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4919 SET_IA32_ORIG_NODE(load, node);
4920 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4924 * Transform Builtin frame_address
4926 static ir_node *gen_prefetch(ir_node *node)
4929 ir_node *ptr, *block, *mem, *base, *idx;
4930 ir_node *param, *new_node;
4933 ia32_address_t addr;
4935 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4936 /* no prefetch at all, route memory */
4937 return be_transform_node(get_Builtin_mem(node));
4940 param = get_Builtin_param(node, 1);
4941 tv = get_Const_tarval(param);
4942 rw = get_tarval_long(tv);
4944 /* construct load address */
4945 memset(&addr, 0, sizeof(addr));
4946 ptr = get_Builtin_param(node, 0);
4947 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4954 base = be_transform_node(base);
4960 idx = be_transform_node(idx);
4963 dbgi = get_irn_dbg_info(node);
4964 block = be_transform_node(get_nodes_block(node));
4965 mem = be_transform_node(get_Builtin_mem(node));
4967 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4968 /* we have 3DNow!, this was already checked above */
4969 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4970 } else if (ia32_cg_config.use_sse_prefetch) {
4971 /* note: rw == 1 is IGNORED in that case */
4972 param = get_Builtin_param(node, 2);
4973 tv = get_Const_tarval(param);
4974 locality = get_tarval_long(tv);
4976 /* SSE style prefetch */
4979 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
4982 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
4985 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
4988 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
4992 assert(ia32_cg_config.use_3dnow_prefetch);
4993 /* 3DNow! style prefetch */
4994 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
4997 set_irn_pinned(new_node, get_irn_pinned(node));
4998 set_ia32_op_type(new_node, ia32_AddrModeS);
4999 set_ia32_ls_mode(new_node, mode_Bu);
5000 set_address(new_node, &addr);
5002 SET_IA32_ORIG_NODE(new_node, node);
5004 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5008 * Transform bsf like node
5010 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5012 ir_node *param = get_Builtin_param(node, 0);
5013 dbg_info *dbgi = get_irn_dbg_info(node);
5015 ir_node *block = get_nodes_block(node);
5016 ir_node *new_block = be_transform_node(block);
5018 ia32_address_mode_t am;
5019 ia32_address_t *addr = &am.addr;
5022 match_arguments(&am, block, NULL, param, NULL, match_am);
5024 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5025 set_am_attributes(cnt, &am);
5026 set_ia32_ls_mode(cnt, get_irn_mode(param));
5028 SET_IA32_ORIG_NODE(cnt, node);
5029 return fix_mem_proj(cnt, &am);
5033 * Transform builtin ffs.
5035 static ir_node *gen_ffs(ir_node *node)
5037 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5038 ir_node *real = skip_Proj(bsf);
5039 dbg_info *dbgi = get_irn_dbg_info(real);
5040 ir_node *block = get_nodes_block(real);
5041 ir_node *flag, *set, *conv, *neg, *orn, *add;
5044 if (get_irn_mode(real) != mode_T) {
5045 set_irn_mode(real, mode_T);
5046 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5049 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5052 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5053 SET_IA32_ORIG_NODE(set, node);
5056 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5057 SET_IA32_ORIG_NODE(conv, node);
5060 neg = new_bd_ia32_Neg(dbgi, block, conv);
5063 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5064 set_ia32_commutative(orn);
5067 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5068 add_ia32_am_offs_int(add, 1);
5073 * Transform builtin clz.
5075 static ir_node *gen_clz(ir_node *node)
5077 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5078 ir_node *real = skip_Proj(bsr);
5079 dbg_info *dbgi = get_irn_dbg_info(real);
5080 ir_node *block = get_nodes_block(real);
5081 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5083 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5087 * Transform builtin ctz.
5089 static ir_node *gen_ctz(ir_node *node)
5091 return gen_unop_AM(node, new_bd_ia32_Bsf);
5095 * Transform builtin parity.
5097 static ir_node *gen_parity(ir_node *node)
5099 dbg_info *dbgi = get_irn_dbg_info(node);
5100 ir_node *block = get_nodes_block(node);
5101 ir_node *new_block = be_transform_node(block);
5102 ir_node *param = get_Builtin_param(node, 0);
5103 ir_node *new_param = be_transform_node(param);
5106 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5107 * so we have to do complicated xoring first.
5108 * (we should also better lower this before the backend so we still have a
5109 * chance for CSE, constant folding and other goodies for some of these
5112 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5113 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5114 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5116 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5119 set_ia32_commutative(xor);
5121 set_irn_mode(xor2, mode_T);
5122 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5125 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5126 SET_IA32_ORIG_NODE(new_node, node);
5129 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5130 nomem, new_node, mode_Bu);
5131 SET_IA32_ORIG_NODE(new_node, node);
5136 * Transform builtin popcount
5138 static ir_node *gen_popcount(ir_node *node)
5140 ir_node *param = get_Builtin_param(node, 0);
5141 dbg_info *dbgi = get_irn_dbg_info(node);
5143 ir_node *block = get_nodes_block(node);
5144 ir_node *new_block = be_transform_node(block);
5147 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5149 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5150 if (ia32_cg_config.use_popcnt) {
5151 ia32_address_mode_t am;
5152 ia32_address_t *addr = &am.addr;
5155 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5157 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5158 set_am_attributes(cnt, &am);
5159 set_ia32_ls_mode(cnt, get_irn_mode(param));
5161 SET_IA32_ORIG_NODE(cnt, node);
5162 return fix_mem_proj(cnt, &am);
5165 new_param = be_transform_node(param);
5167 /* do the standard popcount algo */
5168 /* TODO: This is stupid, we should transform this before the backend,
5169 * to get CSE, localopts, etc. for the operations
5170 * TODO: This is also not the optimal algorithm (it is just the starting
5171 * example in hackers delight, they optimize it more on the following page)
5172 * But I'm too lazy to fix this now, as the code should get lowered before
5173 * the backend anyway.
5176 /* m1 = x & 0x55555555 */
5177 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5178 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5181 simm = ia32_create_Immediate(NULL, 0, 1);
5182 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5184 /* m2 = s1 & 0x55555555 */
5185 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5188 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5190 /* m4 = m3 & 0x33333333 */
5191 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5192 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5195 simm = ia32_create_Immediate(NULL, 0, 2);
5196 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5198 /* m5 = s2 & 0x33333333 */
5199 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5202 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5204 /* m7 = m6 & 0x0F0F0F0F */
5205 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5206 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5209 simm = ia32_create_Immediate(NULL, 0, 4);
5210 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5212 /* m8 = s3 & 0x0F0F0F0F */
5213 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5216 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5218 /* m10 = m9 & 0x00FF00FF */
5219 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5220 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5223 simm = ia32_create_Immediate(NULL, 0, 8);
5224 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5226 /* m11 = s4 & 0x00FF00FF */
5227 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5229 /* m12 = m10 + m11 */
5230 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5232 /* m13 = m12 & 0x0000FFFF */
5233 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5234 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5236 /* s5 = m12 >> 16 */
5237 simm = ia32_create_Immediate(NULL, 0, 16);
5238 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5240 /* res = m13 + s5 */
5241 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5245 * Transform builtin byte swap.
5247 static ir_node *gen_bswap(ir_node *node)
5249 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5250 dbg_info *dbgi = get_irn_dbg_info(node);
5252 ir_node *block = get_nodes_block(node);
5253 ir_node *new_block = be_transform_node(block);
5254 ir_mode *mode = get_irn_mode(param);
5255 unsigned size = get_mode_size_bits(mode);
5256 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5260 if (ia32_cg_config.use_i486) {
5261 /* swap available */
5262 return new_bd_ia32_Bswap(dbgi, new_block, param);
5264 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5265 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5267 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5268 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5270 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5272 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5273 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5275 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5276 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5279 /* swap16 always available */
5280 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5283 panic("Invalid bswap size (%d)", size);
5288 * Transform builtin outport.
5290 static ir_node *gen_outport(ir_node *node)
5292 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5293 ir_node *oldv = get_Builtin_param(node, 1);
5294 ir_mode *mode = get_irn_mode(oldv);
5295 ir_node *value = be_transform_node(oldv);
5296 ir_node *block = be_transform_node(get_nodes_block(node));
5297 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5298 dbg_info *dbgi = get_irn_dbg_info(node);
5300 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5301 set_ia32_ls_mode(res, mode);
5306 * Transform builtin inport.
5308 static ir_node *gen_inport(ir_node *node)
5310 ir_type *tp = get_Builtin_type(node);
5311 ir_type *rstp = get_method_res_type(tp, 0);
5312 ir_mode *mode = get_type_mode(rstp);
5313 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5314 ir_node *block = be_transform_node(get_nodes_block(node));
5315 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5316 dbg_info *dbgi = get_irn_dbg_info(node);
5318 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5319 set_ia32_ls_mode(res, mode);
5321 /* check for missing Result Proj */
5326 * Transform a builtin inner trampoline
5328 static ir_node *gen_inner_trampoline(ir_node *node)
5330 ir_node *ptr = get_Builtin_param(node, 0);
5331 ir_node *callee = get_Builtin_param(node, 1);
5332 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5333 ir_node *mem = get_Builtin_mem(node);
5334 ir_node *block = get_nodes_block(node);
5335 ir_node *new_block = be_transform_node(block);
5339 ir_node *trampoline;
5341 dbg_info *dbgi = get_irn_dbg_info(node);
5342 ia32_address_t addr;
5344 /* construct store address */
5345 memset(&addr, 0, sizeof(addr));
5346 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5348 if (addr.base == NULL) {
5349 addr.base = noreg_GP;
5351 addr.base = be_transform_node(addr.base);
5354 if (addr.index == NULL) {
5355 addr.index = noreg_GP;
5357 addr.index = be_transform_node(addr.index);
5359 addr.mem = be_transform_node(mem);
5361 /* mov ecx, <env> */
5362 val = ia32_create_Immediate(NULL, 0, 0xB9);
5363 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5364 addr.index, addr.mem, val);
5365 set_irn_pinned(store, get_irn_pinned(node));
5366 set_ia32_op_type(store, ia32_AddrModeD);
5367 set_ia32_ls_mode(store, mode_Bu);
5368 set_address(store, &addr);
5372 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5373 addr.index, addr.mem, env);
5374 set_irn_pinned(store, get_irn_pinned(node));
5375 set_ia32_op_type(store, ia32_AddrModeD);
5376 set_ia32_ls_mode(store, mode_Iu);
5377 set_address(store, &addr);
5381 /* jmp rel <callee> */
5382 val = ia32_create_Immediate(NULL, 0, 0xE9);
5383 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5384 addr.index, addr.mem, val);
5385 set_irn_pinned(store, get_irn_pinned(node));
5386 set_ia32_op_type(store, ia32_AddrModeD);
5387 set_ia32_ls_mode(store, mode_Bu);
5388 set_address(store, &addr);
5392 trampoline = be_transform_node(ptr);
5394 /* the callee is typically an immediate */
5395 if (is_SymConst(callee)) {
5396 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5398 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5399 add_ia32_am_offs_int(rel, -10);
5401 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5403 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5404 addr.index, addr.mem, rel);
5405 set_irn_pinned(store, get_irn_pinned(node));
5406 set_ia32_op_type(store, ia32_AddrModeD);
5407 set_ia32_ls_mode(store, mode_Iu);
5408 set_address(store, &addr);
5413 return new_r_Tuple(new_block, 2, in);
5417 * Transform Builtin node.
5419 static ir_node *gen_Builtin(ir_node *node)
5421 ir_builtin_kind kind = get_Builtin_kind(node);
5425 return gen_trap(node);
5426 case ir_bk_debugbreak:
5427 return gen_debugbreak(node);
5428 case ir_bk_return_address:
5429 return gen_return_address(node);
5430 case ir_bk_frame_address:
5431 return gen_frame_address(node);
5432 case ir_bk_prefetch:
5433 return gen_prefetch(node);
5435 return gen_ffs(node);
5437 return gen_clz(node);
5439 return gen_ctz(node);
5441 return gen_parity(node);
5442 case ir_bk_popcount:
5443 return gen_popcount(node);
5445 return gen_bswap(node);
5447 return gen_outport(node);
5449 return gen_inport(node);
5450 case ir_bk_inner_trampoline:
5451 return gen_inner_trampoline(node);
5453 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5457 * Transform Proj(Builtin) node.
5459 static ir_node *gen_Proj_Builtin(ir_node *proj)
5461 ir_node *node = get_Proj_pred(proj);
5462 ir_node *new_node = be_transform_node(node);
5463 ir_builtin_kind kind = get_Builtin_kind(node);
5466 case ir_bk_return_address:
5467 case ir_bk_frame_address:
5472 case ir_bk_popcount:
5474 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5477 case ir_bk_debugbreak:
5478 case ir_bk_prefetch:
5480 assert(get_Proj_proj(proj) == pn_Builtin_M);
5483 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5484 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5486 assert(get_Proj_proj(proj) == pn_Builtin_M);
5487 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5489 case ir_bk_inner_trampoline:
5490 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5491 return get_Tuple_pred(new_node, 1);
5493 assert(get_Proj_proj(proj) == pn_Builtin_M);
5494 return get_Tuple_pred(new_node, 0);
5497 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5500 static ir_node *gen_be_IncSP(ir_node *node)
5502 ir_node *res = be_duplicate_node(node);
5503 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5509 * Transform the Projs from a be_Call.
5511 static ir_node *gen_Proj_be_Call(ir_node *node)
5513 ir_node *call = get_Proj_pred(node);
5514 ir_node *new_call = be_transform_node(call);
5515 dbg_info *dbgi = get_irn_dbg_info(node);
5516 long proj = get_Proj_proj(node);
5517 ir_mode *mode = get_irn_mode(node);
5520 if (proj == pn_be_Call_M) {
5521 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5523 /* transform call modes */
5524 if (mode_is_data(mode)) {
5525 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5529 /* Map from be_Call to ia32_Call proj number */
5530 if (proj == pn_be_Call_sp) {
5531 proj = pn_ia32_Call_stack;
5532 } else if (proj == pn_be_Call_M) {
5533 proj = pn_ia32_Call_M;
5534 } else if (proj == pn_be_Call_X_except) {
5535 proj = pn_ia32_Call_X_except;
5536 } else if (proj == pn_be_Call_X_regular) {
5537 proj = pn_ia32_Call_X_regular;
5539 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5540 int const n_outs = arch_get_irn_n_outs(new_call);
5543 assert(proj >= pn_be_Call_first_res);
5544 assert(req->type & arch_register_req_type_limited);
5546 for (i = 0; i < n_outs; ++i) {
5547 arch_register_req_t const *const new_req
5548 = arch_get_irn_register_req_out(new_call, i);
5550 if (!(new_req->type & arch_register_req_type_limited) ||
5551 new_req->cls != req->cls ||
5552 *new_req->limited != *req->limited)
5561 res = new_rd_Proj(dbgi, new_call, mode, proj);
5563 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5565 case pn_ia32_Call_stack:
5566 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5569 case pn_ia32_Call_fpcw:
5570 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5578 * Transform the Projs from a Cmp.
5580 static ir_node *gen_Proj_Cmp(ir_node *node)
5582 /* this probably means not all mode_b nodes were lowered... */
5583 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5587 static ir_node *gen_Proj_ASM(ir_node *node)
5589 ir_mode *mode = get_irn_mode(node);
5590 ir_node *pred = get_Proj_pred(node);
5591 ir_node *new_pred = be_transform_node(pred);
5592 long pos = get_Proj_proj(node);
5594 if (mode == mode_M) {
5595 pos = arch_get_irn_n_outs(new_pred)-1;
5596 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5598 } else if (mode_is_float(mode)) {
5601 panic("unexpected proj mode at ASM");
5604 return new_r_Proj(new_pred, mode, pos);
5608 * Transform and potentially renumber Proj nodes.
5610 static ir_node *gen_Proj(ir_node *node)
5612 ir_node *pred = get_Proj_pred(node);
5615 switch (get_irn_opcode(pred)) {
5617 return gen_Proj_Load(node);
5619 return gen_Proj_Store(node);
5621 return gen_Proj_ASM(node);
5623 return gen_Proj_Builtin(node);
5625 return gen_Proj_Div(node);
5627 return gen_Proj_Mod(node);
5629 return gen_Proj_CopyB(node);
5631 return gen_Proj_be_SubSP(node);
5633 return gen_Proj_be_AddSP(node);
5635 return gen_Proj_be_Call(node);
5637 return gen_Proj_Cmp(node);
5639 proj = get_Proj_proj(node);
5641 case pn_Start_X_initial_exec: {
5642 ir_node *block = get_nodes_block(pred);
5643 ir_node *new_block = be_transform_node(block);
5644 dbg_info *dbgi = get_irn_dbg_info(node);
5645 /* we exchange the ProjX with a jump */
5646 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5654 if (is_ia32_l_FloattoLL(pred)) {
5655 return gen_Proj_l_FloattoLL(node);
5657 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5661 ir_mode *mode = get_irn_mode(node);
5662 if (ia32_mode_needs_gp_reg(mode)) {
5663 ir_node *new_pred = be_transform_node(pred);
5664 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5665 get_Proj_proj(node));
5666 new_proj->node_nr = node->node_nr;
5671 return be_duplicate_node(node);
5675 * Enters all transform functions into the generic pointer
5677 static void register_transformers(void)
5679 /* first clear the generic function pointer for all ops */
5680 be_start_transform_setup();
5682 be_set_transform_function(op_Add, gen_Add);
5683 be_set_transform_function(op_And, gen_And);
5684 be_set_transform_function(op_ASM, ia32_gen_ASM);
5685 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5686 be_set_transform_function(op_be_Call, gen_be_Call);
5687 be_set_transform_function(op_be_Copy, gen_be_Copy);
5688 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5689 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5690 be_set_transform_function(op_be_Return, gen_be_Return);
5691 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5692 be_set_transform_function(op_Builtin, gen_Builtin);
5693 be_set_transform_function(op_Cmp, gen_Cmp);
5694 be_set_transform_function(op_Cond, gen_Cond);
5695 be_set_transform_function(op_Const, gen_Const);
5696 be_set_transform_function(op_Conv, gen_Conv);
5697 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5698 be_set_transform_function(op_Div, gen_Div);
5699 be_set_transform_function(op_Eor, gen_Eor);
5700 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5701 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5702 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5703 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5704 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5705 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5706 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5707 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5708 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5709 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5710 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5711 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5712 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5713 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5714 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5715 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5716 be_set_transform_function(op_IJmp, gen_IJmp);
5717 be_set_transform_function(op_Jmp, gen_Jmp);
5718 be_set_transform_function(op_Load, gen_Load);
5719 be_set_transform_function(op_Minus, gen_Minus);
5720 be_set_transform_function(op_Mod, gen_Mod);
5721 be_set_transform_function(op_Mul, gen_Mul);
5722 be_set_transform_function(op_Mulh, gen_Mulh);
5723 be_set_transform_function(op_Mux, gen_Mux);
5724 be_set_transform_function(op_Not, gen_Not);
5725 be_set_transform_function(op_Or, gen_Or);
5726 be_set_transform_function(op_Phi, gen_Phi);
5727 be_set_transform_function(op_Proj, gen_Proj);
5728 be_set_transform_function(op_Rotl, gen_Rotl);
5729 be_set_transform_function(op_Shl, gen_Shl);
5730 be_set_transform_function(op_Shr, gen_Shr);
5731 be_set_transform_function(op_Shrs, gen_Shrs);
5732 be_set_transform_function(op_Store, gen_Store);
5733 be_set_transform_function(op_Sub, gen_Sub);
5734 be_set_transform_function(op_SymConst, gen_SymConst);
5735 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5739 * Pre-transform all unknown and noreg nodes.
5741 static void ia32_pretransform_node(void)
5743 ir_graph *irg = current_ir_graph;
5744 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5746 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5747 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5748 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5749 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5750 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5752 nomem = get_irg_no_mem(irg);
5753 noreg_GP = ia32_new_NoReg_gp(irg);
5757 * Post-process all calls if we are in SSE mode.
5758 * The ABI requires that the results are in st0, copy them
5759 * to a xmm register.
5761 static void postprocess_fp_call_results(void)
5765 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5766 ir_node *call = call_list[i];
5767 ir_type *mtp = call_types[i];
5770 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5771 ir_type *res_tp = get_method_res_type(mtp, j);
5772 ir_node *res, *new_res;
5773 const ir_edge_t *edge, *next;
5776 if (! is_atomic_type(res_tp)) {
5777 /* no floating point return */
5780 res_mode = get_type_mode(res_tp);
5781 if (! mode_is_float(res_mode)) {
5782 /* no floating point return */
5786 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5789 /* now patch the users */
5790 foreach_out_edge_safe(res, edge, next) {
5791 ir_node *succ = get_edge_src_irn(edge);
5794 if (be_is_Keep(succ))
5797 if (is_ia32_xStore(succ)) {
5798 /* an xStore can be patched into an vfst */
5799 dbg_info *db = get_irn_dbg_info(succ);
5800 ir_node *block = get_nodes_block(succ);
5801 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5802 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5803 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5804 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5805 ir_mode *mode = get_ia32_ls_mode(succ);
5807 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5808 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5809 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5810 if (is_ia32_use_frame(succ))
5811 set_ia32_use_frame(st);
5812 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5813 set_irn_pinned(st, get_irn_pinned(succ));
5814 set_ia32_op_type(st, ia32_AddrModeD);
5816 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5817 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5818 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5825 if (new_res == NULL) {
5826 dbg_info *db = get_irn_dbg_info(call);
5827 ir_node *block = get_nodes_block(call);
5828 ir_node *frame = get_irg_frame(current_ir_graph);
5829 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5830 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5831 ir_node *vfst, *xld, *new_mem;
5834 /* store st(0) on stack */
5835 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5837 set_ia32_op_type(vfst, ia32_AddrModeD);
5838 set_ia32_use_frame(vfst);
5840 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5842 /* load into SSE register */
5843 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5845 set_ia32_op_type(xld, ia32_AddrModeS);
5846 set_ia32_use_frame(xld);
5848 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5849 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5851 if (old_mem != NULL) {
5852 edges_reroute(old_mem, new_mem);
5856 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5862 /* do the transformation */
5863 void ia32_transform_graph(ir_graph *irg)
5867 register_transformers();
5868 initial_fpcw = NULL;
5869 ia32_no_pic_adjust = 0;
5871 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5873 be_timer_push(T_HEIGHTS);
5874 ia32_heights = heights_new(irg);
5875 be_timer_pop(T_HEIGHTS);
5876 ia32_calculate_non_address_mode_nodes(irg);
5878 /* the transform phase is not safe for CSE (yet) because several nodes get
5879 * attributes set after their creation */
5880 cse_last = get_opt_cse();
5883 call_list = NEW_ARR_F(ir_node *, 0);
5884 call_types = NEW_ARR_F(ir_type *, 0);
5885 be_transform_graph(irg, ia32_pretransform_node);
5887 if (ia32_cg_config.use_sse2)
5888 postprocess_fp_call_results();
5889 DEL_ARR_F(call_types);
5890 DEL_ARR_F(call_list);
5892 set_opt_cse(cse_last);
5894 ia32_free_non_address_mode_nodes();
5895 heights_free(ia32_heights);
5896 ia32_heights = NULL;
5899 void ia32_init_transform(void)
5901 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");