2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
56 #include "betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_address_mode.h"
67 #include "ia32_architecture.h"
69 #include "gen_ia32_regalloc_if.h"
71 /* define this to construct SSE constants instead of load them */
72 #undef CONSTRUCT_SSE_CONST
74 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
75 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
77 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
79 static ir_node *old_initial_fpcw = NULL;
80 static ir_node *initial_fpcw = NULL;
81 int ia32_no_pic_adjust;
83 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
84 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
87 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
88 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
91 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
92 ir_node *op1, ir_node *op2);
94 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
95 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
97 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem);
100 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
101 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
104 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
106 static ir_node *create_immediate_or_transform(ir_node *node,
107 char immediate_constraint_type);
109 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
110 dbg_info *dbgi, ir_node *block,
111 ir_node *op, ir_node *orig_node);
113 /* its enough to have those once */
114 static ir_node *nomem, *noreg_GP;
116 /** a list to postprocess all calls */
117 static ir_node **call_list;
118 static ir_type **call_types;
120 /** Return non-zero is a node represents the 0 constant. */
121 static bool is_Const_0(ir_node *node)
123 return is_Const(node) && is_Const_null(node);
126 /** Return non-zero is a node represents the 1 constant. */
127 static bool is_Const_1(ir_node *node)
129 return is_Const(node) && is_Const_one(node);
132 /** Return non-zero is a node represents the -1 constant. */
133 static bool is_Const_Minus_1(ir_node *node)
135 return is_Const(node) && is_Const_all_one(node);
139 * returns true if constant can be created with a simple float command
141 static bool is_simple_x87_Const(ir_node *node)
143 ir_tarval *tv = get_Const_tarval(node);
144 if (tarval_is_null(tv) || tarval_is_one(tv))
147 /* TODO: match all the other float constants */
152 * returns true if constant can be created with a simple float command
154 static bool is_simple_sse_Const(ir_node *node)
156 ir_tarval *tv = get_Const_tarval(node);
157 ir_mode *mode = get_tarval_mode(tv);
162 if (tarval_is_null(tv)
163 #ifdef CONSTRUCT_SSE_CONST
168 #ifdef CONSTRUCT_SSE_CONST
169 if (mode == mode_D) {
170 unsigned val = get_tarval_sub_bits(tv, 0) |
171 (get_tarval_sub_bits(tv, 1) << 8) |
172 (get_tarval_sub_bits(tv, 2) << 16) |
173 (get_tarval_sub_bits(tv, 3) << 24);
175 /* lower 32bit are zero, really a 32bit constant */
178 #endif /* CONSTRUCT_SSE_CONST */
179 /* TODO: match all the other float constants */
184 * return NoREG or pic_base in case of PIC.
185 * This is necessary as base address for newly created symbols
187 static ir_node *get_symconst_base(void)
189 ir_graph *irg = current_ir_graph;
191 if (be_get_irg_options(irg)->pic) {
192 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
193 return arch_env->impl->get_pic_base(irg);
200 * Transforms a Const.
202 static ir_node *gen_Const(ir_node *node)
204 ir_node *old_block = get_nodes_block(node);
205 ir_node *block = be_transform_node(old_block);
206 dbg_info *dbgi = get_irn_dbg_info(node);
207 ir_mode *mode = get_irn_mode(node);
208 ir_tarval *tv = get_Const_tarval(node);
210 assert(is_Const(node));
212 if (mode_is_float(mode)) {
213 ir_graph *irg = get_irn_irg(node);
214 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
215 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
220 if (ia32_cg_config.use_sse2) {
221 if (tarval_is_null(tv)) {
222 load = new_bd_ia32_xZero(dbgi, block);
223 set_ia32_ls_mode(load, mode);
225 #ifdef CONSTRUCT_SSE_CONST
226 } else if (tarval_is_one(tv)) {
227 int cnst = mode == mode_F ? 26 : 55;
228 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
229 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
230 ir_node *pslld, *psrld;
232 load = new_bd_ia32_xAllOnes(dbgi, block);
233 set_ia32_ls_mode(load, mode);
234 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
235 set_ia32_ls_mode(pslld, mode);
236 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
237 set_ia32_ls_mode(psrld, mode);
239 #endif /* CONSTRUCT_SSE_CONST */
240 } else if (mode == mode_F) {
241 /* we can place any 32bit constant by using a movd gp, sse */
242 unsigned val = get_tarval_sub_bits(tv, 0) |
243 (get_tarval_sub_bits(tv, 1) << 8) |
244 (get_tarval_sub_bits(tv, 2) << 16) |
245 (get_tarval_sub_bits(tv, 3) << 24);
246 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
247 load = new_bd_ia32_xMovd(dbgi, block, cnst);
248 set_ia32_ls_mode(load, mode);
252 #ifdef CONSTRUCT_SSE_CONST
253 if (mode == mode_D) {
254 unsigned val = get_tarval_sub_bits(tv, 0) |
255 (get_tarval_sub_bits(tv, 1) << 8) |
256 (get_tarval_sub_bits(tv, 2) << 16) |
257 (get_tarval_sub_bits(tv, 3) << 24);
259 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
260 ir_node *cnst, *psllq;
262 /* fine, lower 32bit are zero, produce 32bit value */
263 val = get_tarval_sub_bits(tv, 4) |
264 (get_tarval_sub_bits(tv, 5) << 8) |
265 (get_tarval_sub_bits(tv, 6) << 16) |
266 (get_tarval_sub_bits(tv, 7) << 24);
267 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
268 load = new_bd_ia32_xMovd(dbgi, block, cnst);
269 set_ia32_ls_mode(load, mode);
270 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
271 set_ia32_ls_mode(psllq, mode);
276 #endif /* CONSTRUCT_SSE_CONST */
277 floatent = ia32_create_float_const_entity(isa, tv, NULL);
279 base = get_symconst_base();
280 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
282 set_ia32_op_type(load, ia32_AddrModeS);
283 set_ia32_am_sc(load, floatent);
284 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
285 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
288 if (tarval_is_null(tv)) {
289 load = new_bd_ia32_vfldz(dbgi, block);
291 set_ia32_ls_mode(load, mode);
292 } else if (tarval_is_one(tv)) {
293 load = new_bd_ia32_vfld1(dbgi, block);
295 set_ia32_ls_mode(load, mode);
300 floatent = ia32_create_float_const_entity(isa, tv, NULL);
301 /* create_float_const_ent is smart and sometimes creates
303 ls_mode = get_type_mode(get_entity_type(floatent));
304 base = get_symconst_base();
305 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
307 set_ia32_op_type(load, ia32_AddrModeS);
308 set_ia32_am_sc(load, floatent);
309 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
310 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
313 #ifdef CONSTRUCT_SSE_CONST
315 #endif /* CONSTRUCT_SSE_CONST */
316 SET_IA32_ORIG_NODE(load, node);
318 } else { /* non-float mode */
322 tv = tarval_convert_to(tv, mode_Iu);
324 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
326 panic("couldn't convert constant tarval (%+F)", node);
328 val = get_tarval_long(tv);
330 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
331 SET_IA32_ORIG_NODE(cnst, node);
338 * Transforms a SymConst.
340 static ir_node *gen_SymConst(ir_node *node)
342 ir_node *old_block = get_nodes_block(node);
343 ir_node *block = be_transform_node(old_block);
344 dbg_info *dbgi = get_irn_dbg_info(node);
345 ir_mode *mode = get_irn_mode(node);
348 if (mode_is_float(mode)) {
349 if (ia32_cg_config.use_sse2)
350 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
352 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
353 set_ia32_am_sc(cnst, get_SymConst_entity(node));
354 set_ia32_use_frame(cnst);
358 if (get_SymConst_kind(node) != symconst_addr_ent) {
359 panic("backend only support symconst_addr_ent (at %+F)", node);
361 entity = get_SymConst_entity(node);
362 if (get_entity_owner(entity) == get_tls_type()) {
363 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
364 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
365 set_ia32_am_sc(lea, entity);
368 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
372 SET_IA32_ORIG_NODE(cnst, node);
377 static ir_type *make_array_type(ir_type *tp)
379 unsigned alignment = get_type_alignment_bytes(tp);
380 unsigned size = get_type_size_bytes(tp);
381 ir_type *res = new_type_array(1, tp);
382 set_type_alignment_bytes(res, alignment);
383 set_array_bounds_int(res, 0, 0, 2);
384 if (alignment > size)
386 set_type_size_bytes(res, 2 * size);
387 set_type_state(res, layout_fixed);
392 * Create a float[2] array type for the given atomic type.
394 * @param tp the atomic type
396 static ir_type *ia32_create_float_array(ir_type *tp)
398 ir_mode *mode = get_type_mode(tp);
401 if (mode == mode_F) {
402 static ir_type *float_F;
406 arr = float_F = make_array_type(tp);
407 } else if (mode == mode_D) {
408 static ir_type *float_D;
412 arr = float_D = make_array_type(tp);
414 static ir_type *float_E;
418 arr = float_E = make_array_type(tp);
423 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
424 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
426 static const struct {
428 const char *cnst_str;
430 } names [ia32_known_const_max] = {
431 { "C_sfp_sign", "0x80000000", 0 },
432 { "C_dfp_sign", "0x8000000000000000", 1 },
433 { "C_sfp_abs", "0x7FFFFFFF", 0 },
434 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
435 { "C_ull_bias", "0x10000000000000000", 2 }
437 static ir_entity *ent_cache[ia32_known_const_max];
439 ir_entity *ent = ent_cache[kct];
442 ir_graph *irg = current_ir_graph;
443 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
444 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
445 const char *cnst_str = names[kct].cnst_str;
446 ident *name = new_id_from_str(names[kct].name);
449 switch (names[kct].mode) {
450 case 0: mode = mode_Iu; break;
451 case 1: mode = mode_Lu; break;
452 case 2: mode = mode_F; break;
453 default: panic("internal compiler error (ia32_gen_fp_known_const)");
455 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
457 if (kct == ia32_ULLBIAS) {
458 ir_type *type = ia32_get_prim_type(mode_F);
459 ir_type *atype = ia32_create_float_array(type);
460 ir_initializer_t *initializer;
462 ent = new_entity(get_glob_type(), name, atype);
464 set_entity_ld_ident(ent, name);
465 set_entity_visibility(ent, ir_visibility_private);
466 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
468 initializer = create_initializer_compound(2);
469 set_initializer_compound_value(initializer, 0,
470 create_initializer_tarval(get_mode_null(mode)));
471 set_initializer_compound_value(initializer, 1,
472 create_initializer_tarval(tv));
473 set_entity_initializer(ent, initializer);
475 ent = ia32_create_float_const_entity(isa, tv, name);
477 /* cache the entry */
478 ent_cache[kct] = ent;
481 return ent_cache[kct];
485 * return true if the node is a Proj(Load) and could be used in source address
486 * mode for another node. Will return only true if the @p other node is not
487 * dependent on the memory of the Load (for binary operations use the other
488 * input here, for unary operations use NULL).
490 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
491 ir_node *other, ir_node *other2, match_flags_t flags)
496 /* float constants are always available */
497 if (is_Const(node)) {
498 ir_mode *mode = get_irn_mode(node);
499 if (mode_is_float(mode)) {
500 if (ia32_cg_config.use_sse2) {
501 if (is_simple_sse_Const(node))
504 if (is_simple_x87_Const(node))
507 if (get_irn_n_edges(node) > 1)
515 load = get_Proj_pred(node);
516 pn = get_Proj_proj(node);
517 if (!is_Load(load) || pn != pn_Load_res)
519 if (get_nodes_block(load) != block)
521 /* we only use address mode if we're the only user of the load */
522 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
524 /* in some edge cases with address mode we might reach the load normally
525 * and through some AM sequence, if it is already materialized then we
526 * can't create an AM node from it */
527 if (be_is_transformed(node))
530 /* don't do AM if other node inputs depend on the load (via mem-proj) */
531 if (other != NULL && ia32_prevents_AM(block, load, other))
534 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
540 typedef struct ia32_address_mode_t ia32_address_mode_t;
541 struct ia32_address_mode_t {
546 ia32_op_type_t op_type;
550 unsigned commutative : 1;
551 unsigned ins_permuted : 1;
554 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
556 /* construct load address */
557 memset(addr, 0, sizeof(addr[0]));
558 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
560 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
561 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
562 addr->mem = be_transform_node(mem);
565 static void build_address(ia32_address_mode_t *am, ir_node *node,
566 ia32_create_am_flags_t flags)
568 ia32_address_t *addr = &am->addr;
574 /* floating point immediates */
575 if (is_Const(node)) {
576 ir_graph *irg = get_irn_irg(node);
577 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
578 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
579 ir_tarval *tv = get_Const_tarval(node);
580 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
581 addr->base = get_symconst_base();
582 addr->index = noreg_GP;
584 addr->symconst_ent = entity;
585 addr->tls_segment = false;
587 am->ls_mode = get_type_mode(get_entity_type(entity));
588 am->pinned = op_pin_state_floats;
592 load = get_Proj_pred(node);
593 ptr = get_Load_ptr(load);
594 mem = get_Load_mem(load);
595 new_mem = be_transform_node(mem);
596 am->pinned = get_irn_pinned(load);
597 am->ls_mode = get_Load_mode(load);
598 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
601 /* construct load address */
602 ia32_create_address_mode(addr, ptr, flags);
604 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
605 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
609 static void set_address(ir_node *node, const ia32_address_t *addr)
611 set_ia32_am_scale(node, addr->scale);
612 set_ia32_am_sc(node, addr->symconst_ent);
613 set_ia32_am_offs_int(node, addr->offset);
614 set_ia32_am_tls_segment(node, addr->tls_segment);
615 if (addr->symconst_sign)
616 set_ia32_am_sc_sign(node);
618 set_ia32_use_frame(node);
619 set_ia32_frame_ent(node, addr->frame_entity);
623 * Apply attributes of a given address mode to a node.
625 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
627 set_address(node, &am->addr);
629 set_ia32_op_type(node, am->op_type);
630 set_ia32_ls_mode(node, am->ls_mode);
631 if (am->pinned == op_pin_state_pinned) {
632 /* beware: some nodes are already pinned and did not allow to change the state */
633 if (get_irn_pinned(node) != op_pin_state_pinned)
634 set_irn_pinned(node, op_pin_state_pinned);
637 set_ia32_commutative(node);
641 * Check, if a given node is a Down-Conv, ie. a integer Conv
642 * from a mode with a mode with more bits to a mode with lesser bits.
643 * Moreover, we return only true if the node has not more than 1 user.
645 * @param node the node
646 * @return non-zero if node is a Down-Conv
648 static int is_downconv(const ir_node *node)
656 /* we only want to skip the conv when we're the only user
657 * (because this test is used in the context of address-mode selection
658 * and we don't want to use address mode for multiple users) */
659 if (get_irn_n_edges(node) > 1)
662 src_mode = get_irn_mode(get_Conv_op(node));
663 dest_mode = get_irn_mode(node);
665 ia32_mode_needs_gp_reg(src_mode) &&
666 ia32_mode_needs_gp_reg(dest_mode) &&
667 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
670 /** Skip all Down-Conv's on a given node and return the resulting node. */
671 ir_node *ia32_skip_downconv(ir_node *node)
673 while (is_downconv(node))
674 node = get_Conv_op(node);
679 static bool is_sameconv(ir_node *node)
687 /* we only want to skip the conv when we're the only user
688 * (because this test is used in the context of address-mode selection
689 * and we don't want to use address mode for multiple users) */
690 if (get_irn_n_edges(node) > 1)
693 src_mode = get_irn_mode(get_Conv_op(node));
694 dest_mode = get_irn_mode(node);
696 ia32_mode_needs_gp_reg(src_mode) &&
697 ia32_mode_needs_gp_reg(dest_mode) &&
698 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
701 /** Skip all signedness convs */
702 static ir_node *ia32_skip_sameconv(ir_node *node)
704 while (is_sameconv(node))
705 node = get_Conv_op(node);
710 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
712 ir_mode *mode = get_irn_mode(node);
717 if (mode_is_signed(mode)) {
722 block = get_nodes_block(node);
723 dbgi = get_irn_dbg_info(node);
725 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
729 * matches operands of a node into ia32 addressing/operand modes. This covers
730 * usage of source address mode, immediates, operations with non 32-bit modes,
732 * The resulting data is filled into the @p am struct. block is the block
733 * of the node whose arguments are matched. op1, op2 are the first and second
734 * input that are matched (op1 may be NULL). other_op is another unrelated
735 * input that is not matched! but which is needed sometimes to check if AM
736 * for op1/op2 is legal.
737 * @p flags describes the supported modes of the operation in detail.
739 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
740 ir_node *op1, ir_node *op2, ir_node *other_op,
743 ia32_address_t *addr = &am->addr;
744 ir_mode *mode = get_irn_mode(op2);
745 int mode_bits = get_mode_size_bits(mode);
746 ir_node *new_op1, *new_op2;
748 unsigned commutative;
749 int use_am_and_immediates;
752 memset(am, 0, sizeof(am[0]));
754 commutative = (flags & match_commutative) != 0;
755 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
756 use_am = (flags & match_am) != 0;
757 use_immediate = (flags & match_immediate) != 0;
758 assert(!use_am_and_immediates || use_immediate);
761 assert(!commutative || op1 != NULL);
762 assert(use_am || !(flags & match_8bit_am));
763 assert(use_am || !(flags & match_16bit_am));
765 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
766 (mode_bits == 16 && !(flags & match_16bit_am))) {
770 /* we can simply skip downconvs for mode neutral nodes: the upper bits
771 * can be random for these operations */
772 if (flags & match_mode_neutral) {
773 op2 = ia32_skip_downconv(op2);
775 op1 = ia32_skip_downconv(op1);
778 op2 = ia32_skip_sameconv(op2);
780 op1 = ia32_skip_sameconv(op1);
784 /* match immediates. firm nodes are normalized: constants are always on the
787 if (!(flags & match_try_am) && use_immediate) {
788 new_op2 = ia32_try_create_Immediate(op2, 0);
791 if (new_op2 == NULL &&
792 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
793 build_address(am, op2, ia32_create_am_normal);
794 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
795 if (mode_is_float(mode)) {
796 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
800 am->op_type = ia32_AddrModeS;
801 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
803 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
805 build_address(am, op1, ia32_create_am_normal);
807 if (mode_is_float(mode)) {
808 noreg = ia32_new_NoReg_vfp(current_ir_graph);
813 if (new_op2 != NULL) {
816 new_op1 = be_transform_node(op2);
818 am->ins_permuted = true;
820 am->op_type = ia32_AddrModeS;
822 am->op_type = ia32_Normal;
824 if (flags & match_try_am) {
830 mode = get_irn_mode(op2);
831 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
832 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
834 new_op2 = create_upconv(op2, NULL);
835 am->ls_mode = mode_Iu;
837 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
839 new_op2 = be_transform_node(op2);
840 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
843 if (addr->base == NULL)
844 addr->base = noreg_GP;
845 if (addr->index == NULL)
846 addr->index = noreg_GP;
847 if (addr->mem == NULL)
850 am->new_op1 = new_op1;
851 am->new_op2 = new_op2;
852 am->commutative = commutative;
856 * "Fixes" a node that uses address mode by turning it into mode_T
857 * and returning a pn_ia32_res Proj.
859 * @param node the node
860 * @param am its address mode
862 * @return a Proj(pn_ia32_res) if a memory address mode is used,
865 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
870 if (am->mem_proj == NULL)
873 /* we have to create a mode_T so the old MemProj can attach to us */
874 mode = get_irn_mode(node);
875 load = get_Proj_pred(am->mem_proj);
877 be_set_transformed_node(load, node);
879 if (mode != mode_T) {
880 set_irn_mode(node, mode_T);
881 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
888 * Construct a standard binary operation, set AM and immediate if required.
890 * @param node The original node for which the binop is created
891 * @param op1 The first operand
892 * @param op2 The second operand
893 * @param func The node constructor function
894 * @return The constructed ia32 node.
896 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
897 construct_binop_func *func, match_flags_t flags)
900 ir_node *block, *new_block, *new_node;
901 ia32_address_mode_t am;
902 ia32_address_t *addr = &am.addr;
904 block = get_nodes_block(node);
905 match_arguments(&am, block, op1, op2, NULL, flags);
907 dbgi = get_irn_dbg_info(node);
908 new_block = be_transform_node(block);
909 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
910 am.new_op1, am.new_op2);
911 set_am_attributes(new_node, &am);
912 /* we can't use source address mode anymore when using immediates */
913 if (!(flags & match_am_and_immediates) &&
914 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
915 set_ia32_am_support(new_node, ia32_am_none);
916 SET_IA32_ORIG_NODE(new_node, node);
918 new_node = fix_mem_proj(new_node, &am);
924 * Generic names for the inputs of an ia32 binary op.
927 n_ia32_l_binop_left, /**< ia32 left input */
928 n_ia32_l_binop_right, /**< ia32 right input */
929 n_ia32_l_binop_eflags /**< ia32 eflags input */
931 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
932 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
933 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
934 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
935 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
936 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
939 * Construct a binary operation which also consumes the eflags.
941 * @param node The node to transform
942 * @param func The node constructor function
943 * @param flags The match flags
944 * @return The constructor ia32 node
946 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
949 ir_node *src_block = get_nodes_block(node);
950 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
951 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
952 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
954 ir_node *block, *new_node, *new_eflags;
955 ia32_address_mode_t am;
956 ia32_address_t *addr = &am.addr;
958 match_arguments(&am, src_block, op1, op2, eflags, flags);
960 dbgi = get_irn_dbg_info(node);
961 block = be_transform_node(src_block);
962 new_eflags = be_transform_node(eflags);
963 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
964 am.new_op1, am.new_op2, new_eflags);
965 set_am_attributes(new_node, &am);
966 /* we can't use source address mode anymore when using immediates */
967 if (!(flags & match_am_and_immediates) &&
968 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
969 set_ia32_am_support(new_node, ia32_am_none);
970 SET_IA32_ORIG_NODE(new_node, node);
972 new_node = fix_mem_proj(new_node, &am);
977 static ir_node *get_fpcw(void)
979 if (initial_fpcw != NULL)
982 initial_fpcw = be_transform_node(old_initial_fpcw);
987 * Construct a standard binary operation, set AM and immediate if required.
989 * @param op1 The first operand
990 * @param op2 The second operand
991 * @param func The node constructor function
992 * @return The constructed ia32 node.
994 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
995 construct_binop_float_func *func)
997 ir_mode *mode = get_irn_mode(node);
999 ir_node *block, *new_block, *new_node;
1000 ia32_address_mode_t am;
1001 ia32_address_t *addr = &am.addr;
1002 ia32_x87_attr_t *attr;
1003 /* All operations are considered commutative, because there are reverse
1005 match_flags_t flags = match_commutative;
1007 /* happens for div nodes... */
1008 if (mode == mode_T) {
1010 mode = get_Div_resmode(node);
1012 panic("can't determine mode");
1015 /* cannot use address mode with long double on x87 */
1016 if (get_mode_size_bits(mode) <= 64)
1019 block = get_nodes_block(node);
1020 match_arguments(&am, block, op1, op2, NULL, flags);
1022 dbgi = get_irn_dbg_info(node);
1023 new_block = be_transform_node(block);
1024 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1025 am.new_op1, am.new_op2, get_fpcw());
1026 set_am_attributes(new_node, &am);
1028 attr = get_ia32_x87_attr(new_node);
1029 attr->attr.data.ins_permuted = am.ins_permuted;
1031 SET_IA32_ORIG_NODE(new_node, node);
1033 new_node = fix_mem_proj(new_node, &am);
1039 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1041 * @param op1 The first operand
1042 * @param op2 The second operand
1043 * @param func The node constructor function
1044 * @return The constructed ia32 node.
1046 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1047 construct_shift_func *func,
1048 match_flags_t flags)
1051 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1052 ir_mode *mode = get_irn_mode(node);
1054 assert(! mode_is_float(mode));
1055 assert(flags & match_immediate);
1056 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1058 if (get_mode_modulo_shift(mode) != 32)
1059 panic("modulo shift!=32 not supported by ia32 backend");
1061 if (flags & match_mode_neutral) {
1062 op1 = ia32_skip_downconv(op1);
1063 new_op1 = be_transform_node(op1);
1064 } else if (get_mode_size_bits(mode) != 32) {
1065 new_op1 = create_upconv(op1, node);
1067 new_op1 = be_transform_node(op1);
1070 /* the shift amount can be any mode that is bigger than 5 bits, since all
1071 * other bits are ignored anyway */
1072 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1073 ir_node *const op = get_Conv_op(op2);
1074 if (mode_is_float(get_irn_mode(op)))
1077 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1079 new_op2 = create_immediate_or_transform(op2, 0);
1081 dbgi = get_irn_dbg_info(node);
1082 block = get_nodes_block(node);
1083 new_block = be_transform_node(block);
1084 new_node = func(dbgi, new_block, new_op1, new_op2);
1085 SET_IA32_ORIG_NODE(new_node, node);
1087 /* lowered shift instruction may have a dependency operand, handle it here */
1088 if (get_irn_arity(node) == 3) {
1089 /* we have a dependency */
1090 ir_node* dep = get_irn_n(node, 2);
1091 if (get_irn_n_edges(dep) > 1) {
1092 /* ... which has at least one user other than 'node' */
1093 ir_node *new_dep = be_transform_node(dep);
1094 add_irn_dep(new_node, new_dep);
1103 * Construct a standard unary operation, set AM and immediate if required.
1105 * @param op The operand
1106 * @param func The node constructor function
1107 * @return The constructed ia32 node.
1109 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1110 match_flags_t flags)
1113 ir_node *block, *new_block, *new_op, *new_node;
1115 assert(flags == 0 || flags == match_mode_neutral);
1116 if (flags & match_mode_neutral) {
1117 op = ia32_skip_downconv(op);
1120 new_op = be_transform_node(op);
1121 dbgi = get_irn_dbg_info(node);
1122 block = get_nodes_block(node);
1123 new_block = be_transform_node(block);
1124 new_node = func(dbgi, new_block, new_op);
1126 SET_IA32_ORIG_NODE(new_node, node);
1131 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1132 ia32_address_t *addr)
1142 base = be_transform_node(base);
1149 idx = be_transform_node(idx);
1152 /* segment overrides are ineffective for Leas :-( so we have to patch
1154 if (addr->tls_segment) {
1155 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1156 assert(addr->symconst_ent != NULL);
1157 if (base == noreg_GP)
1160 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1161 addr->tls_segment = false;
1164 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1165 set_address(res, addr);
1171 * Returns non-zero if a given address mode has a symbolic or
1172 * numerical offset != 0.
1174 static int am_has_immediates(const ia32_address_t *addr)
1176 return addr->offset != 0 || addr->symconst_ent != NULL
1177 || addr->frame_entity || addr->use_frame;
1180 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1181 ir_node *high, ir_node *low,
1185 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1186 * op1 - target to be shifted
1187 * op2 - contains bits to be shifted into target
1189 * Only op3 can be an immediate.
1191 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1192 ir_node *high, ir_node *low, ir_node *count,
1193 new_shiftd_func func)
1195 ir_node *new_block = be_transform_node(block);
1196 ir_node *new_high = be_transform_node(high);
1197 ir_node *new_low = be_transform_node(low);
1201 /* the shift amount can be any mode that is bigger than 5 bits, since all
1202 * other bits are ignored anyway */
1203 while (is_Conv(count) &&
1204 get_irn_n_edges(count) == 1 &&
1205 mode_is_int(get_irn_mode(count))) {
1206 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1207 count = get_Conv_op(count);
1209 new_count = create_immediate_or_transform(count, 0);
1211 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1216 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1219 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1221 if (is_Const(value1) && is_Const(value2)) {
1222 ir_tarval *tv1 = get_Const_tarval(value1);
1223 ir_tarval *tv2 = get_Const_tarval(value2);
1224 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1225 long v1 = get_tarval_long(tv1);
1226 long v2 = get_tarval_long(tv2);
1227 return v1 <= v2 && v2 == 32-v1;
1233 static ir_node *match_64bit_shift(ir_node *node)
1235 ir_node *op1 = get_binop_left(node);
1236 ir_node *op2 = get_binop_right(node);
1237 assert(is_Or(node) || is_Add(node));
1245 /* match ShlD operation */
1246 if (is_Shl(op1) && is_Shr(op2)) {
1247 ir_node *shl_right = get_Shl_right(op1);
1248 ir_node *shl_left = get_Shl_left(op1);
1249 ir_node *shr_right = get_Shr_right(op2);
1250 ir_node *shr_left = get_Shr_left(op2);
1251 /* constant ShlD operation */
1252 if (is_complementary_shifts(shl_right, shr_right)) {
1253 dbg_info *dbgi = get_irn_dbg_info(node);
1254 ir_node *block = get_nodes_block(node);
1255 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1258 /* constant ShrD operation */
1259 if (is_complementary_shifts(shr_right, shl_right)) {
1260 dbg_info *dbgi = get_irn_dbg_info(node);
1261 ir_node *block = get_nodes_block(node);
1262 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1265 /* lower_dw produces the following for ShlD:
1266 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1267 if (is_Shr(shr_left) && is_Not(shr_right)
1268 && is_Const_1(get_Shr_right(shr_left))
1269 && get_Not_op(shr_right) == shl_right) {
1270 dbg_info *dbgi = get_irn_dbg_info(node);
1271 ir_node *block = get_nodes_block(node);
1272 ir_node *val_h = get_Shr_left(shr_left);
1273 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1276 /* lower_dw produces the following for ShrD:
1277 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1278 if (is_Shl(shl_left) && is_Not(shl_right)
1279 && is_Const_1(get_Shl_right(shl_left))
1280 && get_Not_op(shl_right) == shr_right) {
1281 dbg_info *dbgi = get_irn_dbg_info(node);
1282 ir_node *block = get_nodes_block(node);
1283 ir_node *val_h = get_Shl_left(shl_left);
1284 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1293 * Creates an ia32 Add.
1295 * @return the created ia32 Add node
1297 static ir_node *gen_Add(ir_node *node)
1299 ir_mode *mode = get_irn_mode(node);
1300 ir_node *op1 = get_Add_left(node);
1301 ir_node *op2 = get_Add_right(node);
1303 ir_node *block, *new_block, *new_node, *add_immediate_op;
1304 ia32_address_t addr;
1305 ia32_address_mode_t am;
1307 new_node = match_64bit_shift(node);
1308 if (new_node != NULL)
1311 if (mode_is_float(mode)) {
1312 if (ia32_cg_config.use_sse2)
1313 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1314 match_commutative | match_am);
1316 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1319 ia32_mark_non_am(node);
1321 op2 = ia32_skip_downconv(op2);
1322 op1 = ia32_skip_downconv(op1);
1326 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1327 * 1. Add with immediate -> Lea
1328 * 2. Add with possible source address mode -> Add
1329 * 3. Otherwise -> Lea
1331 memset(&addr, 0, sizeof(addr));
1332 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1333 add_immediate_op = NULL;
1335 dbgi = get_irn_dbg_info(node);
1336 block = get_nodes_block(node);
1337 new_block = be_transform_node(block);
1340 if (addr.base == NULL && addr.index == NULL) {
1341 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1342 addr.symconst_sign, 0, addr.offset);
1343 SET_IA32_ORIG_NODE(new_node, node);
1346 /* add with immediate? */
1347 if (addr.index == NULL) {
1348 add_immediate_op = addr.base;
1349 } else if (addr.base == NULL && addr.scale == 0) {
1350 add_immediate_op = addr.index;
1353 if (add_immediate_op != NULL) {
1354 if (!am_has_immediates(&addr)) {
1355 #ifdef DEBUG_libfirm
1356 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1359 return be_transform_node(add_immediate_op);
1362 new_node = create_lea_from_address(dbgi, new_block, &addr);
1363 SET_IA32_ORIG_NODE(new_node, node);
1367 /* test if we can use source address mode */
1368 match_arguments(&am, block, op1, op2, NULL, match_commutative
1369 | match_mode_neutral | match_am | match_immediate | match_try_am);
1371 /* construct an Add with source address mode */
1372 if (am.op_type == ia32_AddrModeS) {
1373 ia32_address_t *am_addr = &am.addr;
1374 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1375 am_addr->index, am_addr->mem, am.new_op1,
1377 set_am_attributes(new_node, &am);
1378 SET_IA32_ORIG_NODE(new_node, node);
1380 new_node = fix_mem_proj(new_node, &am);
1385 /* otherwise construct a lea */
1386 new_node = create_lea_from_address(dbgi, new_block, &addr);
1387 SET_IA32_ORIG_NODE(new_node, node);
1392 * Creates an ia32 Mul.
1394 * @return the created ia32 Mul node
1396 static ir_node *gen_Mul(ir_node *node)
1398 ir_node *op1 = get_Mul_left(node);
1399 ir_node *op2 = get_Mul_right(node);
1400 ir_mode *mode = get_irn_mode(node);
1402 if (mode_is_float(mode)) {
1403 if (ia32_cg_config.use_sse2)
1404 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1405 match_commutative | match_am);
1407 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1409 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1410 match_commutative | match_am | match_mode_neutral |
1411 match_immediate | match_am_and_immediates);
1415 * Creates an ia32 Mulh.
1416 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1417 * this result while Mul returns the lower 32 bit.
1419 * @return the created ia32 Mulh node
1421 static ir_node *gen_Mulh(ir_node *node)
1423 dbg_info *dbgi = get_irn_dbg_info(node);
1424 ir_node *op1 = get_Mulh_left(node);
1425 ir_node *op2 = get_Mulh_right(node);
1426 ir_mode *mode = get_irn_mode(node);
1428 ir_node *proj_res_high;
1430 if (get_mode_size_bits(mode) != 32) {
1431 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1434 if (mode_is_signed(mode)) {
1435 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1436 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1438 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1439 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1441 return proj_res_high;
1445 * Creates an ia32 And.
1447 * @return The created ia32 And node
1449 static ir_node *gen_And(ir_node *node)
1451 ir_node *op1 = get_And_left(node);
1452 ir_node *op2 = get_And_right(node);
1453 assert(! mode_is_float(get_irn_mode(node)));
1455 /* is it a zero extension? */
1456 if (is_Const(op2)) {
1457 ir_tarval *tv = get_Const_tarval(op2);
1458 long v = get_tarval_long(tv);
1460 if (v == 0xFF || v == 0xFFFF) {
1461 dbg_info *dbgi = get_irn_dbg_info(node);
1462 ir_node *block = get_nodes_block(node);
1469 assert(v == 0xFFFF);
1472 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1477 return gen_binop(node, op1, op2, new_bd_ia32_And,
1478 match_commutative | match_mode_neutral | match_am | match_immediate);
1482 * Creates an ia32 Or.
1484 * @return The created ia32 Or node
1486 static ir_node *gen_Or(ir_node *node)
1488 ir_node *op1 = get_Or_left(node);
1489 ir_node *op2 = get_Or_right(node);
1492 res = match_64bit_shift(node);
1496 assert (! mode_is_float(get_irn_mode(node)));
1497 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1498 | match_mode_neutral | match_am | match_immediate);
1504 * Creates an ia32 Eor.
1506 * @return The created ia32 Eor node
1508 static ir_node *gen_Eor(ir_node *node)
1510 ir_node *op1 = get_Eor_left(node);
1511 ir_node *op2 = get_Eor_right(node);
1513 assert(! mode_is_float(get_irn_mode(node)));
1514 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1515 | match_mode_neutral | match_am | match_immediate);
1520 * Creates an ia32 Sub.
1522 * @return The created ia32 Sub node
1524 static ir_node *gen_Sub(ir_node *node)
1526 ir_node *op1 = get_Sub_left(node);
1527 ir_node *op2 = get_Sub_right(node);
1528 ir_mode *mode = get_irn_mode(node);
1530 if (mode_is_float(mode)) {
1531 if (ia32_cg_config.use_sse2)
1532 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1534 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1537 if (is_Const(op2)) {
1538 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1542 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1543 | match_am | match_immediate);
1546 static ir_node *transform_AM_mem(ir_node *const block,
1547 ir_node *const src_val,
1548 ir_node *const src_mem,
1549 ir_node *const am_mem)
1551 if (is_NoMem(am_mem)) {
1552 return be_transform_node(src_mem);
1553 } else if (is_Proj(src_val) &&
1555 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1556 /* avoid memory loop */
1558 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1559 ir_node *const ptr_pred = get_Proj_pred(src_val);
1560 int const arity = get_Sync_n_preds(src_mem);
1565 NEW_ARR_A(ir_node*, ins, arity + 1);
1567 /* NOTE: This sometimes produces dead-code because the old sync in
1568 * src_mem might not be used anymore, we should detect this case
1569 * and kill the sync... */
1570 for (i = arity - 1; i >= 0; --i) {
1571 ir_node *const pred = get_Sync_pred(src_mem, i);
1573 /* avoid memory loop */
1574 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1577 ins[n++] = be_transform_node(pred);
1580 if (n==1 && ins[0] == am_mem) {
1582 /* creating a new Sync and relying on CSE may fail,
1583 * if am_mem is a ProjM, which does not yet verify. */
1587 return new_r_Sync(block, n, ins);
1591 ins[0] = be_transform_node(src_mem);
1593 return new_r_Sync(block, 2, ins);
1598 * Create a 32bit to 64bit signed extension.
1600 * @param dbgi debug info
1601 * @param block the block where node nodes should be placed
1602 * @param val the value to extend
1603 * @param orig the original node
1605 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1606 ir_node *val, const ir_node *orig)
1611 if (ia32_cg_config.use_short_sex_eax) {
1612 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1613 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1615 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1616 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1618 SET_IA32_ORIG_NODE(res, orig);
1623 * Generates an ia32 Div with additional infrastructure for the
1624 * register allocator if needed.
1626 static ir_node *create_Div(ir_node *node)
1628 dbg_info *dbgi = get_irn_dbg_info(node);
1629 ir_node *block = get_nodes_block(node);
1630 ir_node *new_block = be_transform_node(block);
1631 int throws_exception = ir_throws_exception(node);
1638 ir_node *sign_extension;
1639 ia32_address_mode_t am;
1640 ia32_address_t *addr = &am.addr;
1642 /* the upper bits have random contents for smaller modes */
1643 switch (get_irn_opcode(node)) {
1645 op1 = get_Div_left(node);
1646 op2 = get_Div_right(node);
1647 mem = get_Div_mem(node);
1648 mode = get_Div_resmode(node);
1651 op1 = get_Mod_left(node);
1652 op2 = get_Mod_right(node);
1653 mem = get_Mod_mem(node);
1654 mode = get_Mod_resmode(node);
1657 panic("invalid divmod node %+F", node);
1660 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1662 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1663 is the memory of the consumed address. We can have only the second op as address
1664 in Div nodes, so check only op2. */
1665 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1667 if (mode_is_signed(mode)) {
1668 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1669 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1670 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1672 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1674 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1675 addr->index, new_mem, am.new_op2,
1676 am.new_op1, sign_extension);
1678 ir_set_throws_exception(new_node, throws_exception);
1680 set_irn_pinned(new_node, get_irn_pinned(node));
1682 set_am_attributes(new_node, &am);
1683 SET_IA32_ORIG_NODE(new_node, node);
1685 new_node = fix_mem_proj(new_node, &am);
1691 * Generates an ia32 Mod.
1693 static ir_node *gen_Mod(ir_node *node)
1695 return create_Div(node);
1699 * Generates an ia32 Div.
1701 static ir_node *gen_Div(ir_node *node)
1703 ir_mode *mode = get_Div_resmode(node);
1704 if (mode_is_float(mode)) {
1705 ir_node *op1 = get_Div_left(node);
1706 ir_node *op2 = get_Div_right(node);
1708 if (ia32_cg_config.use_sse2) {
1709 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1711 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1715 return create_Div(node);
1719 * Creates an ia32 Shl.
1721 * @return The created ia32 Shl node
1723 static ir_node *gen_Shl(ir_node *node)
1725 ir_node *left = get_Shl_left(node);
1726 ir_node *right = get_Shl_right(node);
1728 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1729 match_mode_neutral | match_immediate);
1733 * Creates an ia32 Shr.
1735 * @return The created ia32 Shr node
1737 static ir_node *gen_Shr(ir_node *node)
1739 ir_node *left = get_Shr_left(node);
1740 ir_node *right = get_Shr_right(node);
1742 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1748 * Creates an ia32 Sar.
1750 * @return The created ia32 Shrs node
1752 static ir_node *gen_Shrs(ir_node *node)
1754 ir_node *left = get_Shrs_left(node);
1755 ir_node *right = get_Shrs_right(node);
1757 if (is_Const(right)) {
1758 ir_tarval *tv = get_Const_tarval(right);
1759 long val = get_tarval_long(tv);
1761 /* this is a sign extension */
1762 dbg_info *dbgi = get_irn_dbg_info(node);
1763 ir_node *block = be_transform_node(get_nodes_block(node));
1764 ir_node *new_op = be_transform_node(left);
1766 return create_sex_32_64(dbgi, block, new_op, node);
1770 /* 8 or 16 bit sign extension? */
1771 if (is_Const(right) && is_Shl(left)) {
1772 ir_node *shl_left = get_Shl_left(left);
1773 ir_node *shl_right = get_Shl_right(left);
1774 if (is_Const(shl_right)) {
1775 ir_tarval *tv1 = get_Const_tarval(right);
1776 ir_tarval *tv2 = get_Const_tarval(shl_right);
1777 if (tv1 == tv2 && tarval_is_long(tv1)) {
1778 long val = get_tarval_long(tv1);
1779 if (val == 16 || val == 24) {
1780 dbg_info *dbgi = get_irn_dbg_info(node);
1781 ir_node *block = get_nodes_block(node);
1791 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1800 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1806 * Creates an ia32 Rol.
1808 * @param op1 The first operator
1809 * @param op2 The second operator
1810 * @return The created ia32 RotL node
1812 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1814 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1820 * Creates an ia32 Ror.
1821 * NOTE: There is no RotR with immediate because this would always be a RotL
1822 * "imm-mode_size_bits" which can be pre-calculated.
1824 * @param op1 The first operator
1825 * @param op2 The second operator
1826 * @return The created ia32 RotR node
1828 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1830 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1836 * Creates an ia32 RotR or RotL (depending on the found pattern).
1838 * @return The created ia32 RotL or RotR node
1840 static ir_node *gen_Rotl(ir_node *node)
1842 ir_node *op1 = get_Rotl_left(node);
1843 ir_node *op2 = get_Rotl_right(node);
1845 if (is_Minus(op2)) {
1846 return gen_Ror(node, op1, get_Minus_op(op2));
1849 return gen_Rol(node, op1, op2);
1855 * Transforms a Minus node.
1857 * @return The created ia32 Minus node
1859 static ir_node *gen_Minus(ir_node *node)
1861 ir_node *op = get_Minus_op(node);
1862 ir_node *block = be_transform_node(get_nodes_block(node));
1863 dbg_info *dbgi = get_irn_dbg_info(node);
1864 ir_mode *mode = get_irn_mode(node);
1869 if (mode_is_float(mode)) {
1870 ir_node *new_op = be_transform_node(op);
1871 if (ia32_cg_config.use_sse2) {
1872 /* TODO: non-optimal... if we have many xXors, then we should
1873 * rather create a load for the const and use that instead of
1874 * several AM nodes... */
1875 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1877 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1878 noreg_GP, nomem, new_op, noreg_xmm);
1880 size = get_mode_size_bits(mode);
1881 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1883 set_ia32_am_sc(new_node, ent);
1884 set_ia32_op_type(new_node, ia32_AddrModeS);
1885 set_ia32_ls_mode(new_node, mode);
1887 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1890 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1893 SET_IA32_ORIG_NODE(new_node, node);
1899 * Transforms a Not node.
1901 * @return The created ia32 Not node
1903 static ir_node *gen_Not(ir_node *node)
1905 ir_node *op = get_Not_op(node);
1907 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1908 assert (! mode_is_float(get_irn_mode(node)));
1910 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1913 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1914 bool negate, ir_node *node)
1916 ir_node *new_block = be_transform_node(block);
1917 ir_mode *mode = get_irn_mode(op);
1918 ir_node *new_op = be_transform_node(op);
1923 assert(mode_is_float(mode));
1925 if (ia32_cg_config.use_sse2) {
1926 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
1927 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1928 noreg_GP, nomem, new_op, noreg_fp);
1930 size = get_mode_size_bits(mode);
1931 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1933 set_ia32_am_sc(new_node, ent);
1935 SET_IA32_ORIG_NODE(new_node, node);
1937 set_ia32_op_type(new_node, ia32_AddrModeS);
1938 set_ia32_ls_mode(new_node, mode);
1940 /* TODO, implement -Abs case */
1943 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1944 SET_IA32_ORIG_NODE(new_node, node);
1946 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1947 SET_IA32_ORIG_NODE(new_node, node);
1955 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1957 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1959 dbg_info *dbgi = get_irn_dbg_info(cmp);
1960 ir_node *block = get_nodes_block(cmp);
1961 ir_node *new_block = be_transform_node(block);
1962 ir_node *op1 = be_transform_node(x);
1963 ir_node *op2 = be_transform_node(n);
1965 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1968 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
1971 if (mode_is_float(mode)) {
1973 case ir_relation_equal: return ia32_cc_float_equal;
1974 case ir_relation_less: return ia32_cc_float_below;
1975 case ir_relation_less_equal: return ia32_cc_float_below_equal;
1976 case ir_relation_greater: return ia32_cc_float_above;
1977 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
1978 case ir_relation_less_greater: return ia32_cc_not_equal;
1979 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
1980 case ir_relation_unordered: return ia32_cc_parity;
1981 case ir_relation_unordered_equal: return ia32_cc_equal;
1982 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
1983 case ir_relation_unordered_less_equal:
1984 return ia32_cc_float_unordered_below_equal;
1985 case ir_relation_unordered_greater:
1986 return ia32_cc_float_unordered_above;
1987 case ir_relation_unordered_greater_equal:
1988 return ia32_cc_float_unordered_above_equal;
1989 case ir_relation_unordered_less_greater:
1990 return ia32_cc_float_not_equal;
1991 case ir_relation_false:
1992 case ir_relation_true:
1993 /* should we introduce a jump always/jump never? */
1996 panic("Unexpected float pnc");
1997 } else if (mode_is_signed(mode)) {
1999 case ir_relation_unordered_equal:
2000 case ir_relation_equal: return ia32_cc_equal;
2001 case ir_relation_unordered_less:
2002 case ir_relation_less: return ia32_cc_less;
2003 case ir_relation_unordered_less_equal:
2004 case ir_relation_less_equal: return ia32_cc_less_equal;
2005 case ir_relation_unordered_greater:
2006 case ir_relation_greater: return ia32_cc_greater;
2007 case ir_relation_unordered_greater_equal:
2008 case ir_relation_greater_equal: return ia32_cc_greater_equal;
2009 case ir_relation_unordered_less_greater:
2010 case ir_relation_less_greater: return ia32_cc_not_equal;
2011 case ir_relation_less_equal_greater:
2012 case ir_relation_unordered:
2013 case ir_relation_false:
2014 case ir_relation_true:
2015 /* introduce jump always/jump never? */
2018 panic("Unexpected pnc");
2021 case ir_relation_unordered_equal:
2022 case ir_relation_equal: return ia32_cc_equal;
2023 case ir_relation_unordered_less:
2024 case ir_relation_less: return ia32_cc_below;
2025 case ir_relation_unordered_less_equal:
2026 case ir_relation_less_equal: return ia32_cc_below_equal;
2027 case ir_relation_unordered_greater:
2028 case ir_relation_greater: return ia32_cc_above;
2029 case ir_relation_unordered_greater_equal:
2030 case ir_relation_greater_equal: return ia32_cc_above_equal;
2031 case ir_relation_unordered_less_greater:
2032 case ir_relation_less_greater: return ia32_cc_not_equal;
2033 case ir_relation_less_equal_greater:
2034 case ir_relation_unordered:
2035 case ir_relation_false:
2036 case ir_relation_true:
2037 /* introduce jump always/jump never? */
2040 panic("Unexpected pnc");
2044 static ir_node *get_flags_node_cmp(ir_node *cmp, ia32_condition_code_t *cc_out)
2046 /* must have a Cmp as input */
2047 ir_relation relation = get_Cmp_relation(cmp);
2048 ir_relation possible;
2049 ir_node *l = get_Cmp_left(cmp);
2050 ir_node *r = get_Cmp_right(cmp);
2051 ir_mode *mode = get_irn_mode(l);
2054 /* check for bit-test */
2055 if (ia32_cg_config.use_bt && (relation == ir_relation_equal
2056 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2057 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2059 ir_node *la = get_And_left(l);
2060 ir_node *ra = get_And_right(l);
2067 ir_node *c = get_Shl_left(la);
2068 if (is_Const_1(c) && is_Const_0(r)) {
2069 /* (1 << n) & ra) */
2070 ir_node *n = get_Shl_right(la);
2071 flags = gen_bt(cmp, ra, n);
2072 /* the bit is copied into the CF flag */
2073 if (relation & ir_relation_equal)
2074 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2076 *cc_out = ia32_cc_below; /* test for CF=1 */
2082 /* the middle-end tries to eliminate impossible relations, so a ptr != 0
2083 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2084 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2085 * a predecessor node). So add the < bit */
2086 possible = ir_get_possible_cmp_relations(l, r);
2087 if (((relation & ir_relation_less) && !(possible & ir_relation_greater))
2088 || ((relation & ir_relation_greater) && !(possible & ir_relation_less)))
2089 relation |= ir_relation_less_greater;
2091 /* just do a normal transformation of the Cmp */
2092 *cc_out = relation_to_condition_code(relation, mode);
2093 flags = be_transform_node(cmp);
2098 * Transform a node returning a "flag" result.
2100 * @param node the node to transform
2101 * @param cc_out the compare mode to use
2103 static ir_node *get_flags_node(ir_node *node, ia32_condition_code_t *cc_out)
2105 assert(is_Cmp(node));
2106 return get_flags_node_cmp(node, cc_out);
2110 * Transforms a Load.
2112 * @return the created ia32 Load node
2114 static ir_node *gen_Load(ir_node *node)
2116 ir_node *old_block = get_nodes_block(node);
2117 ir_node *block = be_transform_node(old_block);
2118 ir_node *ptr = get_Load_ptr(node);
2119 ir_node *mem = get_Load_mem(node);
2120 ir_node *new_mem = be_transform_node(mem);
2121 dbg_info *dbgi = get_irn_dbg_info(node);
2122 ir_mode *mode = get_Load_mode(node);
2123 int throws_exception = ir_throws_exception(node);
2127 ia32_address_t addr;
2129 /* construct load address */
2130 memset(&addr, 0, sizeof(addr));
2131 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2138 base = be_transform_node(base);
2144 idx = be_transform_node(idx);
2147 if (mode_is_float(mode)) {
2148 if (ia32_cg_config.use_sse2) {
2149 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2152 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2156 assert(mode != mode_b);
2158 /* create a conv node with address mode for smaller modes */
2159 if (get_mode_size_bits(mode) < 32) {
2160 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2161 new_mem, noreg_GP, mode);
2163 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2166 ir_set_throws_exception(new_node, throws_exception);
2168 set_irn_pinned(new_node, get_irn_pinned(node));
2169 set_ia32_op_type(new_node, ia32_AddrModeS);
2170 set_ia32_ls_mode(new_node, mode);
2171 set_address(new_node, &addr);
2173 if (get_irn_pinned(node) == op_pin_state_floats) {
2174 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2175 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2176 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2177 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2180 SET_IA32_ORIG_NODE(new_node, node);
2185 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2186 ir_node *ptr, ir_node *other)
2193 /* we only use address mode if we're the only user of the load */
2194 if (get_irn_n_edges(node) > 1)
2197 load = get_Proj_pred(node);
2200 if (get_nodes_block(load) != block)
2203 /* store should have the same pointer as the load */
2204 if (get_Load_ptr(load) != ptr)
2207 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2208 if (other != NULL &&
2209 get_nodes_block(other) == block &&
2210 heights_reachable_in_block(ia32_heights, other, load)) {
2214 if (ia32_prevents_AM(block, load, mem))
2216 /* Store should be attached to the load via mem */
2217 assert(heights_reachable_in_block(ia32_heights, mem, load));
2222 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2223 ir_node *mem, ir_node *ptr, ir_mode *mode,
2224 construct_binop_dest_func *func,
2225 construct_binop_dest_func *func8bit,
2226 match_flags_t flags)
2228 ir_node *src_block = get_nodes_block(node);
2236 ia32_address_mode_t am;
2237 ia32_address_t *addr = &am.addr;
2238 memset(&am, 0, sizeof(am));
2240 assert(flags & match_immediate); /* there is no destam node without... */
2241 commutative = (flags & match_commutative) != 0;
2243 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2244 build_address(&am, op1, ia32_create_am_double_use);
2245 new_op = create_immediate_or_transform(op2, 0);
2246 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2247 build_address(&am, op2, ia32_create_am_double_use);
2248 new_op = create_immediate_or_transform(op1, 0);
2253 if (addr->base == NULL)
2254 addr->base = noreg_GP;
2255 if (addr->index == NULL)
2256 addr->index = noreg_GP;
2257 if (addr->mem == NULL)
2260 dbgi = get_irn_dbg_info(node);
2261 block = be_transform_node(src_block);
2262 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2264 if (get_mode_size_bits(mode) == 8) {
2265 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2267 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2269 set_address(new_node, addr);
2270 set_ia32_op_type(new_node, ia32_AddrModeD);
2271 set_ia32_ls_mode(new_node, mode);
2272 SET_IA32_ORIG_NODE(new_node, node);
2274 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2275 mem_proj = be_transform_node(am.mem_proj);
2276 be_set_transformed_node(am.mem_proj, new_node);
2277 be_set_transformed_node(mem_proj, new_node);
2282 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2283 ir_node *ptr, ir_mode *mode,
2284 construct_unop_dest_func *func)
2286 ir_node *src_block = get_nodes_block(node);
2292 ia32_address_mode_t am;
2293 ia32_address_t *addr = &am.addr;
2295 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2298 memset(&am, 0, sizeof(am));
2299 build_address(&am, op, ia32_create_am_double_use);
2301 dbgi = get_irn_dbg_info(node);
2302 block = be_transform_node(src_block);
2303 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2304 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2305 set_address(new_node, addr);
2306 set_ia32_op_type(new_node, ia32_AddrModeD);
2307 set_ia32_ls_mode(new_node, mode);
2308 SET_IA32_ORIG_NODE(new_node, node);
2310 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2311 mem_proj = be_transform_node(am.mem_proj);
2312 be_set_transformed_node(am.mem_proj, new_node);
2313 be_set_transformed_node(mem_proj, new_node);
2318 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2320 ir_mode *mode = get_irn_mode(node);
2321 ir_node *mux_true = get_Mux_true(node);
2322 ir_node *mux_false = get_Mux_false(node);
2330 ia32_condition_code_t cc;
2331 ia32_address_t addr;
2333 if (get_mode_size_bits(mode) != 8)
2336 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2338 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2344 cond = get_Mux_sel(node);
2345 flags = get_flags_node(cond, &cc);
2346 /* we can't handle the float special cases with SetM */
2347 if (cc & ia32_cc_additional_float_cases)
2350 cc = ia32_negate_condition_code(cc);
2352 build_address_ptr(&addr, ptr, mem);
2354 dbgi = get_irn_dbg_info(node);
2355 block = get_nodes_block(node);
2356 new_block = be_transform_node(block);
2357 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2358 addr.index, addr.mem, flags, cc);
2359 set_address(new_node, &addr);
2360 set_ia32_op_type(new_node, ia32_AddrModeD);
2361 set_ia32_ls_mode(new_node, mode);
2362 SET_IA32_ORIG_NODE(new_node, node);
2367 static ir_node *try_create_dest_am(ir_node *node)
2369 ir_node *val = get_Store_value(node);
2370 ir_node *mem = get_Store_mem(node);
2371 ir_node *ptr = get_Store_ptr(node);
2372 ir_mode *mode = get_irn_mode(val);
2373 unsigned bits = get_mode_size_bits(mode);
2378 /* handle only GP modes for now... */
2379 if (!ia32_mode_needs_gp_reg(mode))
2383 /* store must be the only user of the val node */
2384 if (get_irn_n_edges(val) > 1)
2386 /* skip pointless convs */
2388 ir_node *conv_op = get_Conv_op(val);
2389 ir_mode *pred_mode = get_irn_mode(conv_op);
2390 if (!ia32_mode_needs_gp_reg(pred_mode))
2392 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2400 /* value must be in the same block */
2401 if (get_nodes_block(node) != get_nodes_block(val))
2404 switch (get_irn_opcode(val)) {
2406 op1 = get_Add_left(val);
2407 op2 = get_Add_right(val);
2408 if (ia32_cg_config.use_incdec) {
2409 if (is_Const_1(op2)) {
2410 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2412 } else if (is_Const_Minus_1(op2)) {
2413 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2417 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2418 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2419 match_commutative | match_immediate);
2422 op1 = get_Sub_left(val);
2423 op2 = get_Sub_right(val);
2424 if (is_Const(op2)) {
2425 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2427 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2428 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2432 op1 = get_And_left(val);
2433 op2 = get_And_right(val);
2434 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2435 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2436 match_commutative | match_immediate);
2439 op1 = get_Or_left(val);
2440 op2 = get_Or_right(val);
2441 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2442 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2443 match_commutative | match_immediate);
2446 op1 = get_Eor_left(val);
2447 op2 = get_Eor_right(val);
2448 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2449 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2450 match_commutative | match_immediate);
2453 op1 = get_Shl_left(val);
2454 op2 = get_Shl_right(val);
2455 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2456 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2460 op1 = get_Shr_left(val);
2461 op2 = get_Shr_right(val);
2462 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2463 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2467 op1 = get_Shrs_left(val);
2468 op2 = get_Shrs_right(val);
2469 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2470 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2474 op1 = get_Rotl_left(val);
2475 op2 = get_Rotl_right(val);
2476 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2477 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2480 /* TODO: match ROR patterns... */
2482 new_node = try_create_SetMem(val, ptr, mem);
2486 op1 = get_Minus_op(val);
2487 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2490 /* should be lowered already */
2491 assert(mode != mode_b);
2492 op1 = get_Not_op(val);
2493 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2499 if (new_node != NULL) {
2500 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2501 get_irn_pinned(node) == op_pin_state_pinned) {
2502 set_irn_pinned(new_node, op_pin_state_pinned);
2509 static bool possible_int_mode_for_fp(ir_mode *mode)
2513 if (!mode_is_signed(mode))
2515 size = get_mode_size_bits(mode);
2516 if (size != 16 && size != 32)
2521 static int is_float_to_int_conv(const ir_node *node)
2523 ir_mode *mode = get_irn_mode(node);
2527 if (!possible_int_mode_for_fp(mode))
2532 conv_op = get_Conv_op(node);
2533 conv_mode = get_irn_mode(conv_op);
2535 if (!mode_is_float(conv_mode))
2542 * Transform a Store(floatConst) into a sequence of
2545 * @return the created ia32 Store node
2547 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2549 ir_mode *mode = get_irn_mode(cns);
2550 unsigned size = get_mode_size_bytes(mode);
2551 ir_tarval *tv = get_Const_tarval(cns);
2552 ir_node *block = get_nodes_block(node);
2553 ir_node *new_block = be_transform_node(block);
2554 ir_node *ptr = get_Store_ptr(node);
2555 ir_node *mem = get_Store_mem(node);
2556 dbg_info *dbgi = get_irn_dbg_info(node);
2559 int throws_exception = ir_throws_exception(node);
2561 ia32_address_t addr;
2563 build_address_ptr(&addr, ptr, mem);
2570 val= get_tarval_sub_bits(tv, ofs) |
2571 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2572 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2573 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2576 } else if (size >= 2) {
2577 val= get_tarval_sub_bits(tv, ofs) |
2578 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2582 panic("invalid size of Store float to mem (%+F)", node);
2584 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2586 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2587 addr.index, addr.mem, imm);
2588 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2590 ir_set_throws_exception(new_node, throws_exception);
2591 set_irn_pinned(new_node, get_irn_pinned(node));
2592 set_ia32_op_type(new_node, ia32_AddrModeD);
2593 set_ia32_ls_mode(new_node, mode);
2594 set_address(new_node, &addr);
2595 SET_IA32_ORIG_NODE(new_node, node);
2602 addr.offset += delta;
2603 } while (size != 0);
2606 return new_rd_Sync(dbgi, new_block, i, ins);
2608 return get_Proj_pred(ins[0]);
2613 * Generate a vfist or vfisttp instruction.
2615 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2616 ir_node *index, ir_node *mem, ir_node *val)
2618 if (ia32_cg_config.use_fisttp) {
2619 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2620 if other users exists */
2621 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2622 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_vfisttp_res);
2623 be_new_Keep(block, 1, &value);
2627 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2630 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2636 * Transforms a general (no special case) Store.
2638 * @return the created ia32 Store node
2640 static ir_node *gen_general_Store(ir_node *node)
2642 ir_node *val = get_Store_value(node);
2643 ir_mode *mode = get_irn_mode(val);
2644 ir_node *block = get_nodes_block(node);
2645 ir_node *new_block = be_transform_node(block);
2646 ir_node *ptr = get_Store_ptr(node);
2647 ir_node *mem = get_Store_mem(node);
2648 dbg_info *dbgi = get_irn_dbg_info(node);
2649 int throws_exception = ir_throws_exception(node);
2652 ia32_address_t addr;
2654 /* check for destination address mode */
2655 new_node = try_create_dest_am(node);
2656 if (new_node != NULL)
2659 /* construct store address */
2660 memset(&addr, 0, sizeof(addr));
2661 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2663 if (addr.base == NULL) {
2664 addr.base = noreg_GP;
2666 addr.base = be_transform_node(addr.base);
2669 if (addr.index == NULL) {
2670 addr.index = noreg_GP;
2672 addr.index = be_transform_node(addr.index);
2674 addr.mem = be_transform_node(mem);
2676 if (mode_is_float(mode)) {
2677 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2679 while (is_Conv(val) && mode == get_irn_mode(val)) {
2680 ir_node *op = get_Conv_op(val);
2681 if (!mode_is_float(get_irn_mode(op)))
2685 new_val = be_transform_node(val);
2686 if (ia32_cg_config.use_sse2) {
2687 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2688 addr.index, addr.mem, new_val);
2690 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2691 addr.index, addr.mem, new_val, mode);
2693 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2694 val = get_Conv_op(val);
2696 /* TODO: is this optimisation still necessary at all (middleend)? */
2697 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before
2699 while (is_Conv(val)) {
2700 ir_node *op = get_Conv_op(val);
2701 if (!mode_is_float(get_irn_mode(op)))
2703 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2707 new_val = be_transform_node(val);
2708 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2710 new_val = create_immediate_or_transform(val, 0);
2711 assert(mode != mode_b);
2713 if (get_mode_size_bits(mode) == 8) {
2714 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2715 addr.index, addr.mem, new_val);
2717 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2718 addr.index, addr.mem, new_val);
2721 ir_set_throws_exception(new_node, throws_exception);
2723 set_irn_pinned(new_node, get_irn_pinned(node));
2724 set_ia32_op_type(new_node, ia32_AddrModeD);
2725 set_ia32_ls_mode(new_node, mode);
2727 set_address(new_node, &addr);
2728 SET_IA32_ORIG_NODE(new_node, node);
2734 * Transforms a Store.
2736 * @return the created ia32 Store node
2738 static ir_node *gen_Store(ir_node *node)
2740 ir_node *val = get_Store_value(node);
2741 ir_mode *mode = get_irn_mode(val);
2743 if (mode_is_float(mode) && is_Const(val)) {
2744 /* We can transform every floating const store
2745 into a sequence of integer stores.
2746 If the constant is already in a register,
2747 it would be better to use it, but we don't
2748 have this information here. */
2749 return gen_float_const_Store(node, val);
2751 return gen_general_Store(node);
2755 * Transforms a Switch.
2757 * @return the created ia32 SwitchJmp node
2759 static ir_node *gen_Switch(ir_node *node)
2761 dbg_info *dbgi = get_irn_dbg_info(node);
2762 ir_graph *irg = get_irn_irg(node);
2763 ir_node *block = be_transform_node(get_nodes_block(node));
2764 ir_node *sel = get_Switch_selector(node);
2765 ir_node *new_sel = be_transform_node(sel);
2766 ir_mode *sel_mode = get_irn_mode(sel);
2767 const ir_switch_table *table = get_Switch_table(node);
2768 unsigned n_outs = get_Switch_n_outs(node);
2772 assert(get_mode_size_bits(get_irn_mode(sel)) <= 32);
2773 if (get_mode_size_bits(sel_mode) != 32)
2774 new_sel = create_upconv(new_sel, sel);
2776 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2777 set_entity_visibility(entity, ir_visibility_private);
2778 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2780 table = ir_switch_table_duplicate(irg, table);
2782 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2783 set_ia32_am_scale(new_node, 2);
2784 set_ia32_am_sc(new_node, entity);
2785 set_ia32_op_type(new_node, ia32_AddrModeS);
2786 set_ia32_ls_mode(new_node, mode_Iu);
2787 SET_IA32_ORIG_NODE(new_node, node);
2788 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2789 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2795 * Transform a Cond node.
2797 static ir_node *gen_Cond(ir_node *node)
2799 ir_node *block = get_nodes_block(node);
2800 ir_node *new_block = be_transform_node(block);
2801 dbg_info *dbgi = get_irn_dbg_info(node);
2802 ir_node *sel = get_Cond_selector(node);
2803 ir_node *flags = NULL;
2805 ia32_condition_code_t cc;
2807 /* we get flags from a Cmp */
2808 flags = get_flags_node(sel, &cc);
2810 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2811 SET_IA32_ORIG_NODE(new_node, node);
2817 * Transform a be_Copy.
2819 static ir_node *gen_be_Copy(ir_node *node)
2821 ir_node *new_node = be_duplicate_node(node);
2822 ir_mode *mode = get_irn_mode(new_node);
2824 if (ia32_mode_needs_gp_reg(mode)) {
2825 set_irn_mode(new_node, mode_Iu);
2831 static ir_node *create_Fucom(ir_node *node)
2833 dbg_info *dbgi = get_irn_dbg_info(node);
2834 ir_node *block = get_nodes_block(node);
2835 ir_node *new_block = be_transform_node(block);
2836 ir_node *left = get_Cmp_left(node);
2837 ir_node *new_left = be_transform_node(left);
2838 ir_node *right = get_Cmp_right(node);
2842 if (ia32_cg_config.use_fucomi) {
2843 new_right = be_transform_node(right);
2844 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2846 set_ia32_commutative(new_node);
2847 SET_IA32_ORIG_NODE(new_node, node);
2849 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2850 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2852 new_right = be_transform_node(right);
2853 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2856 set_ia32_commutative(new_node);
2858 SET_IA32_ORIG_NODE(new_node, node);
2860 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2861 SET_IA32_ORIG_NODE(new_node, node);
2867 static ir_node *create_Ucomi(ir_node *node)
2869 dbg_info *dbgi = get_irn_dbg_info(node);
2870 ir_node *src_block = get_nodes_block(node);
2871 ir_node *new_block = be_transform_node(src_block);
2872 ir_node *left = get_Cmp_left(node);
2873 ir_node *right = get_Cmp_right(node);
2875 ia32_address_mode_t am;
2876 ia32_address_t *addr = &am.addr;
2878 match_arguments(&am, src_block, left, right, NULL,
2879 match_commutative | match_am);
2881 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2882 addr->mem, am.new_op1, am.new_op2,
2884 set_am_attributes(new_node, &am);
2886 SET_IA32_ORIG_NODE(new_node, node);
2888 new_node = fix_mem_proj(new_node, &am);
2894 * returns true if it is assured, that the upper bits of a node are "clean"
2895 * which means for a 16 or 8 bit value, that the upper bits in the register
2896 * are 0 for unsigned and a copy of the last significant bit for signed
2899 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2901 assert(ia32_mode_needs_gp_reg(mode));
2902 if (get_mode_size_bits(mode) >= 32)
2905 if (is_Proj(transformed_node))
2906 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2908 switch (get_ia32_irn_opcode(transformed_node)) {
2909 case iro_ia32_Conv_I2I:
2910 case iro_ia32_Conv_I2I8Bit: {
2911 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2912 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2914 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2921 if (mode_is_signed(mode)) {
2922 return false; /* TODO handle signed modes */
2924 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2925 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2926 const ia32_immediate_attr_t *attr
2927 = get_ia32_immediate_attr_const(right);
2928 if (attr->symconst == 0 &&
2929 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2933 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2937 /* TODO too conservative if shift amount is constant */
2938 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2941 if (!mode_is_signed(mode)) {
2943 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2944 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2946 /* TODO if one is known to be zero extended, then || is sufficient */
2951 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2952 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2954 case iro_ia32_Const:
2955 case iro_ia32_Immediate: {
2956 const ia32_immediate_attr_t *attr =
2957 get_ia32_immediate_attr_const(transformed_node);
2958 if (mode_is_signed(mode)) {
2959 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2960 return shifted == 0 || shifted == -1;
2962 unsigned long shifted = (unsigned long)attr->offset;
2963 shifted >>= get_mode_size_bits(mode)-1;
2965 return shifted == 0;
2975 * Generate code for a Cmp.
2977 static ir_node *gen_Cmp(ir_node *node)
2979 dbg_info *dbgi = get_irn_dbg_info(node);
2980 ir_node *block = get_nodes_block(node);
2981 ir_node *new_block = be_transform_node(block);
2982 ir_node *left = get_Cmp_left(node);
2983 ir_node *right = get_Cmp_right(node);
2984 ir_mode *cmp_mode = get_irn_mode(left);
2986 ia32_address_mode_t am;
2987 ia32_address_t *addr = &am.addr;
2989 if (mode_is_float(cmp_mode)) {
2990 if (ia32_cg_config.use_sse2) {
2991 return create_Ucomi(node);
2993 return create_Fucom(node);
2997 assert(ia32_mode_needs_gp_reg(cmp_mode));
2999 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3000 if (is_Const_0(right) &&
3002 get_irn_n_edges(left) == 1) {
3003 /* Test(and_left, and_right) */
3004 ir_node *and_left = get_And_left(left);
3005 ir_node *and_right = get_And_right(left);
3007 /* matze: code here used mode instead of cmd_mode, I think it is always
3008 * the same as cmp_mode, but I leave this here to see if this is really
3011 assert(get_irn_mode(and_left) == cmp_mode);
3013 match_arguments(&am, block, and_left, and_right, NULL,
3015 match_am | match_8bit_am | match_16bit_am |
3016 match_am_and_immediates | match_immediate);
3018 /* use 32bit compare mode if possible since the opcode is smaller */
3019 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3020 upper_bits_clean(am.new_op2, cmp_mode)) {
3021 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3024 if (get_mode_size_bits(cmp_mode) == 8) {
3025 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3026 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3028 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
3029 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3032 /* Cmp(left, right) */
3033 match_arguments(&am, block, left, right, NULL,
3034 match_commutative | match_am | match_8bit_am |
3035 match_16bit_am | match_am_and_immediates |
3037 /* use 32bit compare mode if possible since the opcode is smaller */
3038 if (upper_bits_clean(am.new_op1, cmp_mode) &&
3039 upper_bits_clean(am.new_op2, cmp_mode)) {
3040 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3043 if (get_mode_size_bits(cmp_mode) == 8) {
3044 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3045 addr->index, addr->mem, am.new_op1,
3046 am.new_op2, am.ins_permuted);
3048 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3049 addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3052 set_am_attributes(new_node, &am);
3053 set_ia32_ls_mode(new_node, cmp_mode);
3055 SET_IA32_ORIG_NODE(new_node, node);
3057 new_node = fix_mem_proj(new_node, &am);
3062 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3063 ia32_condition_code_t cc)
3065 dbg_info *dbgi = get_irn_dbg_info(node);
3066 ir_node *block = get_nodes_block(node);
3067 ir_node *new_block = be_transform_node(block);
3068 ir_node *val_true = get_Mux_true(node);
3069 ir_node *val_false = get_Mux_false(node);
3071 ia32_address_mode_t am;
3072 ia32_address_t *addr;
3074 assert(ia32_cg_config.use_cmov);
3075 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3079 match_arguments(&am, block, val_false, val_true, flags,
3080 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3082 if (am.ins_permuted)
3083 cc = ia32_negate_condition_code(cc);
3085 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3086 addr->mem, am.new_op1, am.new_op2, new_flags,
3088 set_am_attributes(new_node, &am);
3090 SET_IA32_ORIG_NODE(new_node, node);
3092 new_node = fix_mem_proj(new_node, &am);
3098 * Creates a ia32 Setcc instruction.
3100 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3101 ir_node *flags, ia32_condition_code_t cc,
3104 ir_mode *mode = get_irn_mode(orig_node);
3107 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3108 SET_IA32_ORIG_NODE(new_node, orig_node);
3110 /* we might need to conv the result up */
3111 if (get_mode_size_bits(mode) > 8) {
3112 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3113 nomem, new_node, mode_Bu);
3114 SET_IA32_ORIG_NODE(new_node, orig_node);
3121 * Create instruction for an unsigned Difference or Zero.
3123 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3125 ir_mode *mode = get_irn_mode(psi);
3135 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3136 match_mode_neutral | match_am | match_immediate | match_two_users);
3138 block = get_nodes_block(new_node);
3140 if (is_Proj(new_node)) {
3141 sub = get_Proj_pred(new_node);
3144 set_irn_mode(sub, mode_T);
3145 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3147 assert(is_ia32_Sub(sub));
3148 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3150 dbgi = get_irn_dbg_info(psi);
3151 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3152 notn = new_bd_ia32_Not(dbgi, block, sbb);
3154 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3155 set_ia32_commutative(new_node);
3160 * Create an const array of two float consts.
3162 * @param c0 the first constant
3163 * @param c1 the second constant
3164 * @param new_mode IN/OUT for the mode of the constants, if NULL
3165 * smallest possible mode will be used
3167 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3170 ir_mode *mode = *new_mode;
3172 ir_initializer_t *initializer;
3173 ir_tarval *tv0 = get_Const_tarval(c0);
3174 ir_tarval *tv1 = get_Const_tarval(c1);
3177 /* detect the best mode for the constants */
3178 mode = get_tarval_mode(tv0);
3180 if (mode != mode_F) {
3181 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3182 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3184 tv0 = tarval_convert_to(tv0, mode);
3185 tv1 = tarval_convert_to(tv1, mode);
3186 } else if (mode != mode_D) {
3187 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3188 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3190 tv0 = tarval_convert_to(tv0, mode);
3191 tv1 = tarval_convert_to(tv1, mode);
3198 tp = ia32_get_prim_type(mode);
3199 tp = ia32_create_float_array(tp);
3201 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3203 set_entity_ld_ident(ent, get_entity_ident(ent));
3204 set_entity_visibility(ent, ir_visibility_private);
3205 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3207 initializer = create_initializer_compound(2);
3209 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3210 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3212 set_entity_initializer(ent, initializer);
3219 * Possible transformations for creating a Setcc.
3221 enum setcc_transform_insn {
3234 typedef struct setcc_transform {
3236 ia32_condition_code_t cc;
3238 enum setcc_transform_insn transform;
3242 } setcc_transform_t;
3245 * Setcc can only handle 0 and 1 result.
3246 * Find a transformation that creates 0 and 1 from
3249 static void find_const_transform(ia32_condition_code_t cc,
3250 ir_tarval *t, ir_tarval *f,
3251 setcc_transform_t *res)
3257 if (tarval_is_null(t)) {
3261 cc = ia32_negate_condition_code(cc);
3262 } else if (tarval_cmp(t, f) == ir_relation_less) {
3263 // now, t is the bigger one
3267 cc = ia32_negate_condition_code(cc);
3271 if (! tarval_is_null(f)) {
3272 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3275 res->steps[step].transform = SETCC_TR_ADD;
3277 if (t == tarval_bad)
3278 panic("constant subtract failed");
3279 if (! tarval_is_long(f))
3280 panic("tarval is not long");
3282 res->steps[step].val = get_tarval_long(f);
3284 f = tarval_sub(f, f, NULL);
3285 assert(tarval_is_null(f));
3288 if (tarval_is_one(t)) {
3289 res->steps[step].transform = SETCC_TR_SET;
3290 res->num_steps = ++step;
3294 if (tarval_is_minus_one(t)) {
3295 res->steps[step].transform = SETCC_TR_NEG;
3297 res->steps[step].transform = SETCC_TR_SET;
3298 res->num_steps = ++step;
3301 if (tarval_is_long(t)) {
3302 long v = get_tarval_long(t);
3304 res->steps[step].val = 0;
3307 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3309 res->steps[step].transform = SETCC_TR_LEAxx;
3310 res->steps[step].scale = 3; /* (a << 3) + a */
3313 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3315 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3316 res->steps[step].scale = 3; /* (a << 3) */
3319 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3321 res->steps[step].transform = SETCC_TR_LEAxx;
3322 res->steps[step].scale = 2; /* (a << 2) + a */
3325 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3327 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3328 res->steps[step].scale = 2; /* (a << 2) */
3331 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3333 res->steps[step].transform = SETCC_TR_LEAxx;
3334 res->steps[step].scale = 1; /* (a << 1) + a */
3337 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3339 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3340 res->steps[step].scale = 1; /* (a << 1) */
3343 res->num_steps = step;
3346 if (! tarval_is_single_bit(t)) {
3347 res->steps[step].transform = SETCC_TR_AND;
3348 res->steps[step].val = v;
3350 res->steps[step].transform = SETCC_TR_NEG;
3352 int val = get_tarval_lowest_bit(t);
3355 res->steps[step].transform = SETCC_TR_SHL;
3356 res->steps[step].scale = val;
3360 res->steps[step].transform = SETCC_TR_SET;
3361 res->num_steps = ++step;
3364 panic("tarval is not long");
3368 * Transforms a Mux node into some code sequence.
3370 * @return The transformed node.
3372 static ir_node *gen_Mux(ir_node *node)
3374 dbg_info *dbgi = get_irn_dbg_info(node);
3375 ir_node *block = get_nodes_block(node);
3376 ir_node *new_block = be_transform_node(block);
3377 ir_node *mux_true = get_Mux_true(node);
3378 ir_node *mux_false = get_Mux_false(node);
3379 ir_node *sel = get_Mux_sel(node);
3380 ir_mode *mode = get_irn_mode(node);
3384 ia32_condition_code_t cc;
3386 assert(get_irn_mode(sel) == mode_b);
3388 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3390 if (ia32_mode_needs_gp_reg(mode)) {
3391 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3394 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3395 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3399 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3400 if (mode_is_float(mode)) {
3401 ir_node *cmp_left = get_Cmp_left(sel);
3402 ir_node *cmp_right = get_Cmp_right(sel);
3403 ir_relation relation = get_Cmp_relation(sel);
3405 if (ia32_cg_config.use_sse2) {
3406 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3407 if (cmp_left == mux_true && cmp_right == mux_false) {
3408 /* Mux(a <= b, a, b) => MIN */
3409 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3410 match_commutative | match_am | match_two_users);
3411 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3412 /* Mux(a <= b, b, a) => MAX */
3413 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3414 match_commutative | match_am | match_two_users);
3416 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3417 if (cmp_left == mux_true && cmp_right == mux_false) {
3418 /* Mux(a >= b, a, b) => MAX */
3419 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3420 match_commutative | match_am | match_two_users);
3421 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3422 /* Mux(a >= b, b, a) => MIN */
3423 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3424 match_commutative | match_am | match_two_users);
3429 if (is_Const(mux_true) && is_Const(mux_false)) {
3430 ia32_address_mode_t am;
3435 flags = get_flags_node(sel, &cc);
3436 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3438 if (ia32_cg_config.use_sse2) {
3439 /* cannot load from different mode on SSE */
3442 /* x87 can load any mode */
3446 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3448 if (new_mode == mode_F) {
3450 } else if (new_mode == mode_D) {
3452 } else if (new_mode == ia32_mode_E) {
3453 /* arg, shift 16 NOT supported */
3455 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3457 panic("Unsupported constant size");
3460 am.ls_mode = new_mode;
3461 am.addr.base = get_symconst_base();
3462 am.addr.index = new_node;
3463 am.addr.mem = nomem;
3465 am.addr.scale = scale;
3466 am.addr.use_frame = 0;
3467 am.addr.tls_segment = false;
3468 am.addr.frame_entity = NULL;
3469 am.addr.symconst_sign = 0;
3470 am.mem_proj = am.addr.mem;
3471 am.op_type = ia32_AddrModeS;
3474 am.pinned = op_pin_state_floats;
3476 am.ins_permuted = false;
3478 if (ia32_cg_config.use_sse2)
3479 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3481 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3482 set_am_attributes(load, &am);
3484 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3486 panic("cannot transform floating point Mux");
3489 assert(ia32_mode_needs_gp_reg(mode));
3492 ir_node *cmp_left = get_Cmp_left(sel);
3493 ir_node *cmp_right = get_Cmp_right(sel);
3494 ir_relation relation = get_Cmp_relation(sel);
3495 ir_node *val_true = mux_true;
3496 ir_node *val_false = mux_false;
3498 if (is_Const(val_true) && is_Const_null(val_true)) {
3499 ir_node *tmp = val_false;
3500 val_false = val_true;
3502 relation = get_negated_relation(relation);
3504 if (is_Const_0(val_false) && is_Sub(val_true)) {
3505 if ((relation & ir_relation_greater)
3506 && get_Sub_left(val_true) == cmp_left
3507 && get_Sub_right(val_true) == cmp_right) {
3508 return create_doz(node, cmp_left, cmp_right);
3510 if ((relation & ir_relation_less)
3511 && get_Sub_left(val_true) == cmp_right
3512 && get_Sub_right(val_true) == cmp_left) {
3513 return create_doz(node, cmp_right, cmp_left);
3518 flags = get_flags_node(sel, &cc);
3520 if (is_Const(mux_true) && is_Const(mux_false)) {
3521 /* both are const, good */
3522 ir_tarval *tv_true = get_Const_tarval(mux_true);
3523 ir_tarval *tv_false = get_Const_tarval(mux_false);
3524 setcc_transform_t res;
3527 find_const_transform(cc, tv_true, tv_false, &res);
3529 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3532 switch (res.steps[step].transform) {
3534 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3535 add_ia32_am_offs_int(new_node, res.steps[step].val);
3537 case SETCC_TR_ADDxx:
3538 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3541 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3542 set_ia32_am_scale(new_node, res.steps[step].scale);
3543 set_ia32_am_offs_int(new_node, res.steps[step].val);
3545 case SETCC_TR_LEAxx:
3546 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3547 set_ia32_am_scale(new_node, res.steps[step].scale);
3548 set_ia32_am_offs_int(new_node, res.steps[step].val);
3551 imm = ia32_immediate_from_long(res.steps[step].scale);
3552 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3555 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3558 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3561 imm = ia32_immediate_from_long(res.steps[step].val);
3562 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3565 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3568 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3571 panic("unknown setcc transform");
3575 new_node = create_CMov(node, sel, flags, cc);
3582 * Create a conversion from x87 state register to general purpose.
3584 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3586 ir_node *block = be_transform_node(get_nodes_block(node));
3587 ir_node *op = get_Conv_op(node);
3588 ir_node *new_op = be_transform_node(op);
3589 ir_graph *irg = current_ir_graph;
3590 dbg_info *dbgi = get_irn_dbg_info(node);
3591 ir_mode *mode = get_irn_mode(node);
3592 ir_node *frame = get_irg_frame(irg);
3593 ir_node *fist, *load, *mem;
3595 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3596 set_irn_pinned(fist, op_pin_state_floats);
3597 set_ia32_use_frame(fist);
3598 set_ia32_op_type(fist, ia32_AddrModeD);
3600 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3601 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3603 assert(get_mode_size_bits(mode) <= 32);
3604 /* exception we can only store signed 32 bit integers, so for unsigned
3605 we store a 64bit (signed) integer and load the lower bits */
3606 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3607 set_ia32_ls_mode(fist, mode_Ls);
3609 set_ia32_ls_mode(fist, mode_Is);
3611 SET_IA32_ORIG_NODE(fist, node);
3614 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3616 set_irn_pinned(load, op_pin_state_floats);
3617 set_ia32_use_frame(load);
3618 set_ia32_op_type(load, ia32_AddrModeS);
3619 set_ia32_ls_mode(load, mode_Is);
3620 if (get_ia32_ls_mode(fist) == mode_Ls) {
3621 ia32_attr_t *attr = get_ia32_attr(load);
3622 attr->data.need_64bit_stackent = 1;
3624 ia32_attr_t *attr = get_ia32_attr(load);
3625 attr->data.need_32bit_stackent = 1;
3627 SET_IA32_ORIG_NODE(load, node);
3629 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3633 * Creates a x87 strict Conv by placing a Store and a Load
3635 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3637 ir_node *block = get_nodes_block(node);
3638 ir_graph *irg = get_Block_irg(block);
3639 dbg_info *dbgi = get_irn_dbg_info(node);
3640 ir_node *frame = get_irg_frame(irg);
3642 ir_node *store, *load;
3645 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3646 set_ia32_use_frame(store);
3647 set_ia32_op_type(store, ia32_AddrModeD);
3648 SET_IA32_ORIG_NODE(store, node);
3650 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3652 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3653 set_ia32_use_frame(load);
3654 set_ia32_op_type(load, ia32_AddrModeS);
3655 SET_IA32_ORIG_NODE(load, node);
3657 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_vfld_res);
3661 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3662 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3664 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3666 func = get_mode_size_bits(mode) == 8 ?
3667 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3668 return func(dbgi, block, base, index, mem, val, mode);
3672 * Create a conversion from general purpose to x87 register
3674 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3676 ir_node *src_block = get_nodes_block(node);
3677 ir_node *block = be_transform_node(src_block);
3678 ir_graph *irg = get_Block_irg(block);
3679 dbg_info *dbgi = get_irn_dbg_info(node);
3680 ir_node *op = get_Conv_op(node);
3681 ir_node *new_op = NULL;
3683 ir_mode *store_mode;
3689 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3690 if (possible_int_mode_for_fp(src_mode)) {
3691 ia32_address_mode_t am;
3693 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3694 if (am.op_type == ia32_AddrModeS) {
3695 ia32_address_t *addr = &am.addr;
3697 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3698 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3700 set_am_attributes(fild, &am);
3701 SET_IA32_ORIG_NODE(fild, node);
3703 fix_mem_proj(fild, &am);
3708 if (new_op == NULL) {
3709 new_op = be_transform_node(op);
3712 mode = get_irn_mode(op);
3714 /* first convert to 32 bit signed if necessary */
3715 if (get_mode_size_bits(src_mode) < 32) {
3716 if (!upper_bits_clean(new_op, src_mode)) {
3717 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3718 SET_IA32_ORIG_NODE(new_op, node);
3723 assert(get_mode_size_bits(mode) == 32);
3726 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3728 set_ia32_use_frame(store);
3729 set_ia32_op_type(store, ia32_AddrModeD);
3730 set_ia32_ls_mode(store, mode_Iu);
3732 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3734 /* exception for 32bit unsigned, do a 64bit spill+load */
3735 if (!mode_is_signed(mode)) {
3738 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3740 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3741 noreg_GP, nomem, zero_const);
3742 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3744 set_ia32_use_frame(zero_store);
3745 set_ia32_op_type(zero_store, ia32_AddrModeD);
3746 add_ia32_am_offs_int(zero_store, 4);
3747 set_ia32_ls_mode(zero_store, mode_Iu);
3749 in[0] = zero_store_mem;
3752 store_mem = new_rd_Sync(dbgi, block, 2, in);
3753 store_mode = mode_Ls;
3755 store_mode = mode_Is;
3759 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3761 set_ia32_use_frame(fild);
3762 set_ia32_op_type(fild, ia32_AddrModeS);
3763 set_ia32_ls_mode(fild, store_mode);
3765 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3771 * Create a conversion from one integer mode into another one
3773 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3774 dbg_info *dbgi, ir_node *block, ir_node *op,
3777 ir_node *new_block = be_transform_node(block);
3779 ir_mode *smaller_mode;
3780 ia32_address_mode_t am;
3781 ia32_address_t *addr = &am.addr;
3784 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3785 smaller_mode = src_mode;
3787 smaller_mode = tgt_mode;
3790 #ifdef DEBUG_libfirm
3792 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3797 match_arguments(&am, block, NULL, op, NULL,
3798 match_am | match_8bit_am | match_16bit_am);
3800 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3801 /* unnecessary conv. in theory it shouldn't have been AM */
3802 assert(is_ia32_NoReg_GP(addr->base));
3803 assert(is_ia32_NoReg_GP(addr->index));
3804 assert(is_NoMem(addr->mem));
3805 assert(am.addr.offset == 0);
3806 assert(am.addr.symconst_ent == NULL);
3810 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3811 addr->mem, am.new_op2, smaller_mode);
3812 set_am_attributes(new_node, &am);
3813 /* match_arguments assume that out-mode = in-mode, this isn't true here
3815 set_ia32_ls_mode(new_node, smaller_mode);
3816 SET_IA32_ORIG_NODE(new_node, node);
3817 new_node = fix_mem_proj(new_node, &am);
3822 * Transforms a Conv node.
3824 * @return The created ia32 Conv node
3826 static ir_node *gen_Conv(ir_node *node)
3828 ir_node *block = get_nodes_block(node);
3829 ir_node *new_block = be_transform_node(block);
3830 ir_node *op = get_Conv_op(node);
3831 ir_node *new_op = NULL;
3832 dbg_info *dbgi = get_irn_dbg_info(node);
3833 ir_mode *src_mode = get_irn_mode(op);
3834 ir_mode *tgt_mode = get_irn_mode(node);
3835 int src_bits = get_mode_size_bits(src_mode);
3836 int tgt_bits = get_mode_size_bits(tgt_mode);
3837 ir_node *res = NULL;
3839 assert(!mode_is_int(src_mode) || src_bits <= 32);
3840 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3842 /* modeB -> X should already be lowered by the lower_mode_b pass */
3843 if (src_mode == mode_b) {
3844 panic("ConvB not lowered %+F", node);
3847 if (src_mode == tgt_mode) {
3848 if (get_Conv_strict(node)) {
3849 if (ia32_cg_config.use_sse2) {
3850 /* when we are in SSE mode, we can kill all strict no-op conversion */
3851 return be_transform_node(op);
3854 /* this should be optimized already, but who knows... */
3855 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3856 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3857 return be_transform_node(op);
3861 if (mode_is_float(src_mode)) {
3862 new_op = be_transform_node(op);
3863 /* we convert from float ... */
3864 if (mode_is_float(tgt_mode)) {
3866 if (ia32_cg_config.use_sse2) {
3867 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3868 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3870 set_ia32_ls_mode(res, tgt_mode);
3872 if (get_Conv_strict(node)) {
3873 /* if fp_no_float_fold is not set then we assume that we
3874 * don't have any float operations in a non
3875 * mode_float_arithmetic mode and can skip strict upconvs */
3876 if (src_bits < tgt_bits) {
3877 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3880 res = gen_x87_strict_conv(tgt_mode, new_op);
3881 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3885 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3890 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3891 if (ia32_cg_config.use_sse2) {
3892 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3894 set_ia32_ls_mode(res, src_mode);
3896 return gen_x87_fp_to_gp(node);
3900 /* we convert from int ... */
3901 if (mode_is_float(tgt_mode)) {
3903 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3904 if (ia32_cg_config.use_sse2) {
3905 new_op = be_transform_node(op);
3906 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3908 set_ia32_ls_mode(res, tgt_mode);
3910 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3911 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3912 res = gen_x87_gp_to_fp(node, src_mode);
3914 /* we need a strict-Conv, if the int mode has more bits than the
3916 if (float_mantissa < int_mantissa) {
3917 res = gen_x87_strict_conv(tgt_mode, res);
3918 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3922 } else if (tgt_mode == mode_b) {
3923 /* mode_b lowering already took care that we only have 0/1 values */
3924 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3925 src_mode, tgt_mode));
3926 return be_transform_node(op);
3929 if (src_bits == tgt_bits) {
3930 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3931 src_mode, tgt_mode));
3932 return be_transform_node(op);
3935 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3943 static ir_node *create_immediate_or_transform(ir_node *node,
3944 char immediate_constraint_type)
3946 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3947 if (new_node == NULL) {
3948 new_node = be_transform_node(node);
3954 * Transforms a FrameAddr into an ia32 Add.
3956 static ir_node *gen_be_FrameAddr(ir_node *node)
3958 ir_node *block = be_transform_node(get_nodes_block(node));
3959 ir_node *op = be_get_FrameAddr_frame(node);
3960 ir_node *new_op = be_transform_node(op);
3961 dbg_info *dbgi = get_irn_dbg_info(node);
3964 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3965 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3966 set_ia32_use_frame(new_node);
3968 SET_IA32_ORIG_NODE(new_node, node);
3974 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3976 static ir_node *gen_be_Return(ir_node *node)
3978 ir_graph *irg = current_ir_graph;
3979 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
3980 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
3981 ir_node *new_ret_val = be_transform_node(ret_val);
3982 ir_node *new_ret_mem = be_transform_node(ret_mem);
3983 ir_entity *ent = get_irg_entity(irg);
3984 ir_type *tp = get_entity_type(ent);
3985 dbg_info *dbgi = get_irn_dbg_info(node);
3986 ir_node *block = be_transform_node(get_nodes_block(node));
4000 assert(ret_val != NULL);
4001 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
4002 return be_duplicate_node(node);
4005 res_type = get_method_res_type(tp, 0);
4007 if (! is_Primitive_type(res_type)) {
4008 return be_duplicate_node(node);
4011 mode = get_type_mode(res_type);
4012 if (! mode_is_float(mode)) {
4013 return be_duplicate_node(node);
4016 assert(get_method_n_ress(tp) == 1);
4018 frame = get_irg_frame(irg);
4020 /* store xmm0 onto stack */
4021 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4022 new_ret_mem, new_ret_val);
4023 set_ia32_ls_mode(sse_store, mode);
4024 set_ia32_op_type(sse_store, ia32_AddrModeD);
4025 set_ia32_use_frame(sse_store);
4026 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4028 /* load into x87 register */
4029 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4030 set_ia32_op_type(fld, ia32_AddrModeS);
4031 set_ia32_use_frame(fld);
4033 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4034 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4036 /* create a new return */
4037 arity = get_irn_arity(node);
4038 in = ALLOCAN(ir_node*, arity);
4039 pop = be_Return_get_pop(node);
4040 for (i = 0; i < arity; ++i) {
4041 ir_node *op = get_irn_n(node, i);
4042 if (op == ret_val) {
4044 } else if (op == ret_mem) {
4047 in[i] = be_transform_node(op);
4050 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4051 copy_node_attr(irg, node, new_node);
4057 * Transform a be_AddSP into an ia32_SubSP.
4059 static ir_node *gen_be_AddSP(ir_node *node)
4061 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4062 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4064 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4065 match_am | match_immediate);
4066 assert(is_ia32_SubSP(new_node));
4067 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4068 &ia32_registers[REG_ESP]);
4073 * Transform a be_SubSP into an ia32_AddSP
4075 static ir_node *gen_be_SubSP(ir_node *node)
4077 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4078 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4080 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4081 match_am | match_immediate);
4082 assert(is_ia32_AddSP(new_node));
4083 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4084 &ia32_registers[REG_ESP]);
4089 * Change some phi modes
4091 static ir_node *gen_Phi(ir_node *node)
4093 const arch_register_req_t *req;
4094 ir_node *block = be_transform_node(get_nodes_block(node));
4095 ir_graph *irg = current_ir_graph;
4096 dbg_info *dbgi = get_irn_dbg_info(node);
4097 ir_mode *mode = get_irn_mode(node);
4100 if (ia32_mode_needs_gp_reg(mode)) {
4101 /* we shouldn't have any 64bit stuff around anymore */
4102 assert(get_mode_size_bits(mode) <= 32);
4103 /* all integer operations are on 32bit registers now */
4105 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4106 } else if (mode_is_float(mode)) {
4107 if (ia32_cg_config.use_sse2) {
4109 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4112 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4115 req = arch_no_register_req;
4118 /* phi nodes allow loops, so we use the old arguments for now
4119 * and fix this later */
4120 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4121 get_irn_in(node) + 1);
4122 copy_node_attr(irg, node, phi);
4123 be_duplicate_deps(node, phi);
4125 arch_set_irn_register_req_out(phi, 0, req);
4127 be_enqueue_preds(node);
4132 static ir_node *gen_Jmp(ir_node *node)
4134 ir_node *block = get_nodes_block(node);
4135 ir_node *new_block = be_transform_node(block);
4136 dbg_info *dbgi = get_irn_dbg_info(node);
4139 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4140 SET_IA32_ORIG_NODE(new_node, node);
4148 static ir_node *gen_IJmp(ir_node *node)
4150 ir_node *block = get_nodes_block(node);
4151 ir_node *new_block = be_transform_node(block);
4152 dbg_info *dbgi = get_irn_dbg_info(node);
4153 ir_node *op = get_IJmp_target(node);
4155 ia32_address_mode_t am;
4156 ia32_address_t *addr = &am.addr;
4158 assert(get_irn_mode(op) == mode_P);
4160 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4162 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4163 addr->mem, am.new_op2);
4164 set_am_attributes(new_node, &am);
4165 SET_IA32_ORIG_NODE(new_node, node);
4167 new_node = fix_mem_proj(new_node, &am);
4172 static ir_node *gen_ia32_l_Add(ir_node *node)
4174 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4175 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4176 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4177 match_commutative | match_am | match_immediate |
4178 match_mode_neutral);
4180 if (is_Proj(lowered)) {
4181 lowered = get_Proj_pred(lowered);
4183 assert(is_ia32_Add(lowered));
4184 set_irn_mode(lowered, mode_T);
4190 static ir_node *gen_ia32_l_Adc(ir_node *node)
4192 return gen_binop_flags(node, new_bd_ia32_Adc,
4193 match_commutative | match_am | match_immediate |
4194 match_mode_neutral);
4198 * Transforms a l_MulS into a "real" MulS node.
4200 * @return the created ia32 Mul node
4202 static ir_node *gen_ia32_l_Mul(ir_node *node)
4204 ir_node *left = get_binop_left(node);
4205 ir_node *right = get_binop_right(node);
4207 return gen_binop(node, left, right, new_bd_ia32_Mul,
4208 match_commutative | match_am | match_mode_neutral);
4212 * Transforms a l_IMulS into a "real" IMul1OPS node.
4214 * @return the created ia32 IMul1OP node
4216 static ir_node *gen_ia32_l_IMul(ir_node *node)
4218 ir_node *left = get_binop_left(node);
4219 ir_node *right = get_binop_right(node);
4221 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4222 match_commutative | match_am | match_mode_neutral);
4225 static ir_node *gen_ia32_l_Sub(ir_node *node)
4227 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4228 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4229 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4230 match_am | match_immediate | match_mode_neutral);
4232 if (is_Proj(lowered)) {
4233 lowered = get_Proj_pred(lowered);
4235 assert(is_ia32_Sub(lowered));
4236 set_irn_mode(lowered, mode_T);
4242 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4244 return gen_binop_flags(node, new_bd_ia32_Sbb,
4245 match_am | match_immediate | match_mode_neutral);
4248 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4250 ir_node *src_block = get_nodes_block(node);
4251 ir_node *block = be_transform_node(src_block);
4252 ir_graph *irg = current_ir_graph;
4253 dbg_info *dbgi = get_irn_dbg_info(node);
4254 ir_node *frame = get_irg_frame(irg);
4255 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4256 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4257 ir_node *new_val_low = be_transform_node(val_low);
4258 ir_node *new_val_high = be_transform_node(val_high);
4260 ir_node *sync, *fild, *res;
4262 ir_node *store_high;
4266 if (ia32_cg_config.use_sse2) {
4267 panic("ia32_l_LLtoFloat not implemented for SSE2");
4271 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4273 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4275 SET_IA32_ORIG_NODE(store_low, node);
4276 SET_IA32_ORIG_NODE(store_high, node);
4278 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4279 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4281 set_ia32_use_frame(store_low);
4282 set_ia32_use_frame(store_high);
4283 set_ia32_op_type(store_low, ia32_AddrModeD);
4284 set_ia32_op_type(store_high, ia32_AddrModeD);
4285 set_ia32_ls_mode(store_low, mode_Iu);
4286 set_ia32_ls_mode(store_high, mode_Is);
4287 add_ia32_am_offs_int(store_high, 4);
4291 sync = new_rd_Sync(dbgi, block, 2, in);
4294 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4296 set_ia32_use_frame(fild);
4297 set_ia32_op_type(fild, ia32_AddrModeS);
4298 set_ia32_ls_mode(fild, mode_Ls);
4300 SET_IA32_ORIG_NODE(fild, node);
4302 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4304 if (! mode_is_signed(get_irn_mode(val_high))) {
4305 ia32_address_mode_t am;
4307 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4310 am.addr.base = get_symconst_base();
4311 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4312 am.addr.mem = nomem;
4315 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4316 am.addr.tls_segment = false;
4317 am.addr.use_frame = 0;
4318 am.addr.frame_entity = NULL;
4319 am.addr.symconst_sign = 0;
4320 am.ls_mode = mode_F;
4321 am.mem_proj = nomem;
4322 am.op_type = ia32_AddrModeS;
4324 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4325 am.pinned = op_pin_state_floats;
4327 am.ins_permuted = false;
4329 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4330 am.new_op1, am.new_op2, get_fpcw());
4331 set_am_attributes(fadd, &am);
4333 set_irn_mode(fadd, mode_T);
4334 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4339 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4341 ir_node *src_block = get_nodes_block(node);
4342 ir_node *block = be_transform_node(src_block);
4343 ir_graph *irg = get_Block_irg(block);
4344 dbg_info *dbgi = get_irn_dbg_info(node);
4345 ir_node *frame = get_irg_frame(irg);
4346 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4347 ir_node *new_val = be_transform_node(val);
4350 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4351 SET_IA32_ORIG_NODE(fist, node);
4352 set_ia32_use_frame(fist);
4353 set_ia32_op_type(fist, ia32_AddrModeD);
4354 set_ia32_ls_mode(fist, mode_Ls);
4356 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4357 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4360 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4362 ir_node *block = be_transform_node(get_nodes_block(node));
4363 ir_graph *irg = get_Block_irg(block);
4364 ir_node *pred = get_Proj_pred(node);
4365 ir_node *new_pred = be_transform_node(pred);
4366 ir_node *frame = get_irg_frame(irg);
4367 dbg_info *dbgi = get_irn_dbg_info(node);
4368 long pn = get_Proj_proj(node);
4373 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4374 SET_IA32_ORIG_NODE(load, node);
4375 set_ia32_use_frame(load);
4376 set_ia32_op_type(load, ia32_AddrModeS);
4377 set_ia32_ls_mode(load, mode_Iu);
4378 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4379 * 32 bit from it with this particular load */
4380 attr = get_ia32_attr(load);
4381 attr->data.need_64bit_stackent = 1;
4383 if (pn == pn_ia32_l_FloattoLL_res_high) {
4384 add_ia32_am_offs_int(load, 4);
4386 assert(pn == pn_ia32_l_FloattoLL_res_low);
4389 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4395 * Transform the Projs of an AddSP.
4397 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4399 ir_node *pred = get_Proj_pred(node);
4400 ir_node *new_pred = be_transform_node(pred);
4401 dbg_info *dbgi = get_irn_dbg_info(node);
4402 long proj = get_Proj_proj(node);
4404 if (proj == pn_be_AddSP_sp) {
4405 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4406 pn_ia32_SubSP_stack);
4407 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4409 } else if (proj == pn_be_AddSP_res) {
4410 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4411 pn_ia32_SubSP_addr);
4412 } else if (proj == pn_be_AddSP_M) {
4413 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4416 panic("No idea how to transform proj->AddSP");
4420 * Transform the Projs of a SubSP.
4422 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4424 ir_node *pred = get_Proj_pred(node);
4425 ir_node *new_pred = be_transform_node(pred);
4426 dbg_info *dbgi = get_irn_dbg_info(node);
4427 long proj = get_Proj_proj(node);
4429 if (proj == pn_be_SubSP_sp) {
4430 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4431 pn_ia32_AddSP_stack);
4432 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4434 } else if (proj == pn_be_SubSP_M) {
4435 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4438 panic("No idea how to transform proj->SubSP");
4442 * Transform and renumber the Projs from a Load.
4444 static ir_node *gen_Proj_Load(ir_node *node)
4447 ir_node *pred = get_Proj_pred(node);
4448 dbg_info *dbgi = get_irn_dbg_info(node);
4449 long proj = get_Proj_proj(node);
4451 /* loads might be part of source address mode matches, so we don't
4452 * transform the ProjMs yet (with the exception of loads whose result is
4455 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4458 /* this is needed, because sometimes we have loops that are only
4459 reachable through the ProjM */
4460 be_enqueue_preds(node);
4461 /* do it in 2 steps, to silence firm verifier */
4462 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4463 set_Proj_proj(res, pn_ia32_mem);
4467 /* renumber the proj */
4468 new_pred = be_transform_node(pred);
4469 if (is_ia32_Load(new_pred)) {
4470 switch ((pn_Load)proj) {
4472 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4474 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4475 case pn_Load_X_except:
4476 /* This Load might raise an exception. Mark it. */
4477 set_ia32_exc_label(new_pred, 1);
4478 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4479 case pn_Load_X_regular:
4480 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4482 } else if (is_ia32_Conv_I2I(new_pred) ||
4483 is_ia32_Conv_I2I8Bit(new_pred)) {
4484 set_irn_mode(new_pred, mode_T);
4485 switch ((pn_Load)proj) {
4487 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4489 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4490 case pn_Load_X_except:
4491 /* This Load might raise an exception. Mark it. */
4492 set_ia32_exc_label(new_pred, 1);
4493 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4494 case pn_Load_X_regular:
4495 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4497 } else if (is_ia32_xLoad(new_pred)) {
4498 switch ((pn_Load)proj) {
4500 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4502 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4503 case pn_Load_X_except:
4504 /* This Load might raise an exception. Mark it. */
4505 set_ia32_exc_label(new_pred, 1);
4506 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4507 case pn_Load_X_regular:
4508 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4510 } else if (is_ia32_vfld(new_pred)) {
4511 switch ((pn_Load)proj) {
4513 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4515 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4516 case pn_Load_X_except:
4517 /* This Load might raise an exception. Mark it. */
4518 set_ia32_exc_label(new_pred, 1);
4519 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4520 case pn_Load_X_regular:
4521 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4524 /* can happen for ProJMs when source address mode happened for the
4527 /* however it should not be the result proj, as that would mean the
4528 load had multiple users and should not have been used for
4530 if (proj != pn_Load_M) {
4531 panic("internal error: transformed node not a Load");
4533 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4536 panic("No idea how to transform Proj(Load) %+F", node);
4539 static ir_node *gen_Proj_Store(ir_node *node)
4541 ir_node *pred = get_Proj_pred(node);
4542 ir_node *new_pred = be_transform_node(pred);
4543 dbg_info *dbgi = get_irn_dbg_info(node);
4544 long pn = get_Proj_proj(node);
4546 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4547 switch ((pn_Store)pn) {
4549 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4550 case pn_Store_X_except:
4551 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4552 case pn_Store_X_regular:
4553 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4555 } else if (is_ia32_vfist(new_pred)) {
4556 switch ((pn_Store)pn) {
4558 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4559 case pn_Store_X_except:
4560 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4561 case pn_Store_X_regular:
4562 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4564 } else if (is_ia32_vfisttp(new_pred)) {
4565 switch ((pn_Store)pn) {
4567 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4568 case pn_Store_X_except:
4569 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4570 case pn_Store_X_regular:
4571 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4573 } else if (is_ia32_vfst(new_pred)) {
4574 switch ((pn_Store)pn) {
4576 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4577 case pn_Store_X_except:
4578 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4579 case pn_Store_X_regular:
4580 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4582 } else if (is_ia32_xStore(new_pred)) {
4583 switch ((pn_Store)pn) {
4585 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4586 case pn_Store_X_except:
4587 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4588 case pn_Store_X_regular:
4589 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4591 } else if (is_Sync(new_pred)) {
4592 /* hack for the case that gen_float_const_Store produced a Sync */
4593 if (pn == pn_Store_M) {
4596 panic("exception control flow for gen_float_const_Store not implemented yet");
4597 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4598 /* destination address mode */
4599 if (pn == pn_Store_M) {
4602 panic("exception control flow for destination AM not implemented yet");
4605 panic("No idea how to transform Proj(Store) %+F", node);
4609 * Transform and renumber the Projs from a Div or Mod instruction.
4611 static ir_node *gen_Proj_Div(ir_node *node)
4613 ir_node *pred = get_Proj_pred(node);
4614 ir_node *new_pred = be_transform_node(pred);
4615 dbg_info *dbgi = get_irn_dbg_info(node);
4616 long proj = get_Proj_proj(node);
4618 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4619 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4621 switch ((pn_Div)proj) {
4623 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4624 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4625 } else if (is_ia32_xDiv(new_pred)) {
4626 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4627 } else if (is_ia32_vfdiv(new_pred)) {
4628 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4630 panic("Div transformed to unexpected thing %+F", new_pred);
4633 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4634 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4635 } else if (is_ia32_xDiv(new_pred)) {
4636 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4637 } else if (is_ia32_vfdiv(new_pred)) {
4638 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4640 panic("Div transformed to unexpected thing %+F", new_pred);
4642 case pn_Div_X_except:
4643 set_ia32_exc_label(new_pred, 1);
4644 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4645 case pn_Div_X_regular:
4646 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4649 panic("No idea how to transform proj->Div");
4653 * Transform and renumber the Projs from a Div or Mod instruction.
4655 static ir_node *gen_Proj_Mod(ir_node *node)
4657 ir_node *pred = get_Proj_pred(node);
4658 ir_node *new_pred = be_transform_node(pred);
4659 dbg_info *dbgi = get_irn_dbg_info(node);
4660 long proj = get_Proj_proj(node);
4662 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4663 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4664 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4666 switch ((pn_Mod)proj) {
4668 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4670 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4671 case pn_Mod_X_except:
4672 set_ia32_exc_label(new_pred, 1);
4673 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4674 case pn_Mod_X_regular:
4675 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4677 panic("No idea how to transform proj->Mod");
4681 * Transform and renumber the Projs from a CopyB.
4683 static ir_node *gen_Proj_CopyB(ir_node *node)
4685 ir_node *pred = get_Proj_pred(node);
4686 ir_node *new_pred = be_transform_node(pred);
4687 dbg_info *dbgi = get_irn_dbg_info(node);
4688 long proj = get_Proj_proj(node);
4690 switch ((pn_CopyB)proj) {
4692 if (is_ia32_CopyB_i(new_pred)) {
4693 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4694 } else if (is_ia32_CopyB(new_pred)) {
4695 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4698 case pn_CopyB_X_regular:
4699 if (is_ia32_CopyB_i(new_pred)) {
4700 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4701 } else if (is_ia32_CopyB(new_pred)) {
4702 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4705 case pn_CopyB_X_except:
4706 if (is_ia32_CopyB_i(new_pred)) {
4707 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4708 } else if (is_ia32_CopyB(new_pred)) {
4709 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4714 panic("No idea how to transform proj->CopyB");
4717 static ir_node *gen_be_Call(ir_node *node)
4719 dbg_info *const dbgi = get_irn_dbg_info(node);
4720 ir_node *const src_block = get_nodes_block(node);
4721 ir_node *const block = be_transform_node(src_block);
4722 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4723 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4724 ir_node *const sp = be_transform_node(src_sp);
4725 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4726 ia32_address_mode_t am;
4727 ia32_address_t *const addr = &am.addr;
4732 ir_node * eax = noreg_GP;
4733 ir_node * ecx = noreg_GP;
4734 ir_node * edx = noreg_GP;
4735 unsigned const pop = be_Call_get_pop(node);
4736 ir_type *const call_tp = be_Call_get_type(node);
4737 int old_no_pic_adjust;
4738 int throws_exception = ir_throws_exception(node);
4740 /* Run the x87 simulator if the call returns a float value */
4741 if (get_method_n_ress(call_tp) > 0) {
4742 ir_type *const res_type = get_method_res_type(call_tp, 0);
4743 ir_mode *const res_mode = get_type_mode(res_type);
4745 if (res_mode != NULL && mode_is_float(res_mode)) {
4746 ir_graph *irg = current_ir_graph;
4747 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4748 irg_data->do_x87_sim = 1;
4752 /* We do not want be_Call direct calls */
4753 assert(be_Call_get_entity(node) == NULL);
4755 /* special case for PIC trampoline calls */
4756 old_no_pic_adjust = ia32_no_pic_adjust;
4757 ia32_no_pic_adjust = be_get_irg_options(current_ir_graph)->pic;
4759 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4760 match_am | match_immediate);
4762 ia32_no_pic_adjust = old_no_pic_adjust;
4764 i = get_irn_arity(node) - 1;
4765 fpcw = be_transform_node(get_irn_n(node, i--));
4766 for (; i >= n_be_Call_first_arg; --i) {
4767 arch_register_req_t const *const req
4768 = arch_get_irn_register_req_in(node, i);
4769 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4771 assert(req->type == arch_register_req_type_limited);
4772 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4774 switch (*req->limited) {
4775 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4776 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4777 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4778 default: panic("Invalid GP register for register parameter");
4782 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4783 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4784 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4785 ir_set_throws_exception(call, throws_exception);
4786 set_am_attributes(call, &am);
4787 call = fix_mem_proj(call, &am);
4789 if (get_irn_pinned(node) == op_pin_state_pinned)
4790 set_irn_pinned(call, op_pin_state_pinned);
4792 SET_IA32_ORIG_NODE(call, node);
4794 if (ia32_cg_config.use_sse2) {
4795 /* remember this call for post-processing */
4796 ARR_APP1(ir_node *, call_list, call);
4797 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4804 * Transform Builtin trap
4806 static ir_node *gen_trap(ir_node *node)
4808 dbg_info *dbgi = get_irn_dbg_info(node);
4809 ir_node *block = be_transform_node(get_nodes_block(node));
4810 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4812 return new_bd_ia32_UD2(dbgi, block, mem);
4816 * Transform Builtin debugbreak
4818 static ir_node *gen_debugbreak(ir_node *node)
4820 dbg_info *dbgi = get_irn_dbg_info(node);
4821 ir_node *block = be_transform_node(get_nodes_block(node));
4822 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4824 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4828 * Transform Builtin return_address
4830 static ir_node *gen_return_address(ir_node *node)
4832 ir_node *param = get_Builtin_param(node, 0);
4833 ir_node *frame = get_Builtin_param(node, 1);
4834 dbg_info *dbgi = get_irn_dbg_info(node);
4835 ir_tarval *tv = get_Const_tarval(param);
4836 ir_graph *irg = get_irn_irg(node);
4837 unsigned long value = get_tarval_long(tv);
4839 ir_node *block = be_transform_node(get_nodes_block(node));
4840 ir_node *ptr = be_transform_node(frame);
4844 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4845 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4846 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4849 /* load the return address from this frame */
4850 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4852 set_irn_pinned(load, get_irn_pinned(node));
4853 set_ia32_op_type(load, ia32_AddrModeS);
4854 set_ia32_ls_mode(load, mode_Iu);
4856 set_ia32_am_offs_int(load, 0);
4857 set_ia32_use_frame(load);
4858 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4860 if (get_irn_pinned(node) == op_pin_state_floats) {
4861 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4862 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4863 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4864 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4867 SET_IA32_ORIG_NODE(load, node);
4868 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4872 * Transform Builtin frame_address
4874 static ir_node *gen_frame_address(ir_node *node)
4876 ir_node *param = get_Builtin_param(node, 0);
4877 ir_node *frame = get_Builtin_param(node, 1);
4878 dbg_info *dbgi = get_irn_dbg_info(node);
4879 ir_tarval *tv = get_Const_tarval(param);
4880 ir_graph *irg = get_irn_irg(node);
4881 unsigned long value = get_tarval_long(tv);
4883 ir_node *block = be_transform_node(get_nodes_block(node));
4884 ir_node *ptr = be_transform_node(frame);
4889 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4890 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4891 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4894 /* load the frame address from this frame */
4895 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4897 set_irn_pinned(load, get_irn_pinned(node));
4898 set_ia32_op_type(load, ia32_AddrModeS);
4899 set_ia32_ls_mode(load, mode_Iu);
4901 ent = ia32_get_frame_address_entity(irg);
4903 set_ia32_am_offs_int(load, 0);
4904 set_ia32_use_frame(load);
4905 set_ia32_frame_ent(load, ent);
4907 /* will fail anyway, but gcc does this: */
4908 set_ia32_am_offs_int(load, 0);
4911 if (get_irn_pinned(node) == op_pin_state_floats) {
4912 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4913 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4914 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4915 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4918 SET_IA32_ORIG_NODE(load, node);
4919 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4923 * Transform Builtin frame_address
4925 static ir_node *gen_prefetch(ir_node *node)
4928 ir_node *ptr, *block, *mem, *base, *idx;
4929 ir_node *param, *new_node;
4932 ia32_address_t addr;
4934 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4935 /* no prefetch at all, route memory */
4936 return be_transform_node(get_Builtin_mem(node));
4939 param = get_Builtin_param(node, 1);
4940 tv = get_Const_tarval(param);
4941 rw = get_tarval_long(tv);
4943 /* construct load address */
4944 memset(&addr, 0, sizeof(addr));
4945 ptr = get_Builtin_param(node, 0);
4946 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4953 base = be_transform_node(base);
4959 idx = be_transform_node(idx);
4962 dbgi = get_irn_dbg_info(node);
4963 block = be_transform_node(get_nodes_block(node));
4964 mem = be_transform_node(get_Builtin_mem(node));
4966 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4967 /* we have 3DNow!, this was already checked above */
4968 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4969 } else if (ia32_cg_config.use_sse_prefetch) {
4970 /* note: rw == 1 is IGNORED in that case */
4971 param = get_Builtin_param(node, 2);
4972 tv = get_Const_tarval(param);
4973 locality = get_tarval_long(tv);
4975 /* SSE style prefetch */
4978 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
4981 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
4984 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
4987 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
4991 assert(ia32_cg_config.use_3dnow_prefetch);
4992 /* 3DNow! style prefetch */
4993 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
4996 set_irn_pinned(new_node, get_irn_pinned(node));
4997 set_ia32_op_type(new_node, ia32_AddrModeS);
4998 set_ia32_ls_mode(new_node, mode_Bu);
4999 set_address(new_node, &addr);
5001 SET_IA32_ORIG_NODE(new_node, node);
5003 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5007 * Transform bsf like node
5009 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5011 ir_node *param = get_Builtin_param(node, 0);
5012 dbg_info *dbgi = get_irn_dbg_info(node);
5014 ir_node *block = get_nodes_block(node);
5015 ir_node *new_block = be_transform_node(block);
5017 ia32_address_mode_t am;
5018 ia32_address_t *addr = &am.addr;
5021 match_arguments(&am, block, NULL, param, NULL, match_am);
5023 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5024 set_am_attributes(cnt, &am);
5025 set_ia32_ls_mode(cnt, get_irn_mode(param));
5027 SET_IA32_ORIG_NODE(cnt, node);
5028 return fix_mem_proj(cnt, &am);
5032 * Transform builtin ffs.
5034 static ir_node *gen_ffs(ir_node *node)
5036 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5037 ir_node *real = skip_Proj(bsf);
5038 dbg_info *dbgi = get_irn_dbg_info(real);
5039 ir_node *block = get_nodes_block(real);
5040 ir_node *flag, *set, *conv, *neg, *orn, *add;
5043 if (get_irn_mode(real) != mode_T) {
5044 set_irn_mode(real, mode_T);
5045 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5048 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5051 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5052 SET_IA32_ORIG_NODE(set, node);
5055 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5056 SET_IA32_ORIG_NODE(conv, node);
5059 neg = new_bd_ia32_Neg(dbgi, block, conv);
5062 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5063 set_ia32_commutative(orn);
5066 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5067 add_ia32_am_offs_int(add, 1);
5072 * Transform builtin clz.
5074 static ir_node *gen_clz(ir_node *node)
5076 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5077 ir_node *real = skip_Proj(bsr);
5078 dbg_info *dbgi = get_irn_dbg_info(real);
5079 ir_node *block = get_nodes_block(real);
5080 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5082 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5086 * Transform builtin ctz.
5088 static ir_node *gen_ctz(ir_node *node)
5090 return gen_unop_AM(node, new_bd_ia32_Bsf);
5094 * Transform builtin parity.
5096 static ir_node *gen_parity(ir_node *node)
5098 dbg_info *dbgi = get_irn_dbg_info(node);
5099 ir_node *block = get_nodes_block(node);
5100 ir_node *new_block = be_transform_node(block);
5101 ir_node *param = get_Builtin_param(node, 0);
5102 ir_node *new_param = be_transform_node(param);
5105 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5106 * so we have to do complicated xoring first.
5107 * (we should also better lower this before the backend so we still have a
5108 * chance for CSE, constant folding and other goodies for some of these
5111 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5112 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5113 ir_node *xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5115 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xor);
5118 set_ia32_commutative(xor);
5120 set_irn_mode(xor2, mode_T);
5121 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5124 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5125 SET_IA32_ORIG_NODE(new_node, node);
5128 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5129 nomem, new_node, mode_Bu);
5130 SET_IA32_ORIG_NODE(new_node, node);
5135 * Transform builtin popcount
5137 static ir_node *gen_popcount(ir_node *node)
5139 ir_node *param = get_Builtin_param(node, 0);
5140 dbg_info *dbgi = get_irn_dbg_info(node);
5142 ir_node *block = get_nodes_block(node);
5143 ir_node *new_block = be_transform_node(block);
5146 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5148 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5149 if (ia32_cg_config.use_popcnt) {
5150 ia32_address_mode_t am;
5151 ia32_address_t *addr = &am.addr;
5154 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5156 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5157 set_am_attributes(cnt, &am);
5158 set_ia32_ls_mode(cnt, get_irn_mode(param));
5160 SET_IA32_ORIG_NODE(cnt, node);
5161 return fix_mem_proj(cnt, &am);
5164 new_param = be_transform_node(param);
5166 /* do the standard popcount algo */
5167 /* TODO: This is stupid, we should transform this before the backend,
5168 * to get CSE, localopts, etc. for the operations
5169 * TODO: This is also not the optimal algorithm (it is just the starting
5170 * example in hackers delight, they optimize it more on the following page)
5171 * But I'm too lazy to fix this now, as the code should get lowered before
5172 * the backend anyway.
5175 /* m1 = x & 0x55555555 */
5176 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5177 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5180 simm = ia32_create_Immediate(NULL, 0, 1);
5181 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5183 /* m2 = s1 & 0x55555555 */
5184 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5187 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5189 /* m4 = m3 & 0x33333333 */
5190 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5191 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5194 simm = ia32_create_Immediate(NULL, 0, 2);
5195 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5197 /* m5 = s2 & 0x33333333 */
5198 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5201 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5203 /* m7 = m6 & 0x0F0F0F0F */
5204 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5205 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5208 simm = ia32_create_Immediate(NULL, 0, 4);
5209 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5211 /* m8 = s3 & 0x0F0F0F0F */
5212 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5215 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5217 /* m10 = m9 & 0x00FF00FF */
5218 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5219 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5222 simm = ia32_create_Immediate(NULL, 0, 8);
5223 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5225 /* m11 = s4 & 0x00FF00FF */
5226 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5228 /* m12 = m10 + m11 */
5229 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5231 /* m13 = m12 & 0x0000FFFF */
5232 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5233 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5235 /* s5 = m12 >> 16 */
5236 simm = ia32_create_Immediate(NULL, 0, 16);
5237 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5239 /* res = m13 + s5 */
5240 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5244 * Transform builtin byte swap.
5246 static ir_node *gen_bswap(ir_node *node)
5248 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5249 dbg_info *dbgi = get_irn_dbg_info(node);
5251 ir_node *block = get_nodes_block(node);
5252 ir_node *new_block = be_transform_node(block);
5253 ir_mode *mode = get_irn_mode(param);
5254 unsigned size = get_mode_size_bits(mode);
5255 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5259 if (ia32_cg_config.use_i486) {
5260 /* swap available */
5261 return new_bd_ia32_Bswap(dbgi, new_block, param);
5263 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5264 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5266 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5267 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5269 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5271 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5272 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5274 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5275 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5278 /* swap16 always available */
5279 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5282 panic("Invalid bswap size (%d)", size);
5287 * Transform builtin outport.
5289 static ir_node *gen_outport(ir_node *node)
5291 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5292 ir_node *oldv = get_Builtin_param(node, 1);
5293 ir_mode *mode = get_irn_mode(oldv);
5294 ir_node *value = be_transform_node(oldv);
5295 ir_node *block = be_transform_node(get_nodes_block(node));
5296 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5297 dbg_info *dbgi = get_irn_dbg_info(node);
5299 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5300 set_ia32_ls_mode(res, mode);
5305 * Transform builtin inport.
5307 static ir_node *gen_inport(ir_node *node)
5309 ir_type *tp = get_Builtin_type(node);
5310 ir_type *rstp = get_method_res_type(tp, 0);
5311 ir_mode *mode = get_type_mode(rstp);
5312 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5313 ir_node *block = be_transform_node(get_nodes_block(node));
5314 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5315 dbg_info *dbgi = get_irn_dbg_info(node);
5317 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5318 set_ia32_ls_mode(res, mode);
5320 /* check for missing Result Proj */
5325 * Transform a builtin inner trampoline
5327 static ir_node *gen_inner_trampoline(ir_node *node)
5329 ir_node *ptr = get_Builtin_param(node, 0);
5330 ir_node *callee = get_Builtin_param(node, 1);
5331 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5332 ir_node *mem = get_Builtin_mem(node);
5333 ir_node *block = get_nodes_block(node);
5334 ir_node *new_block = be_transform_node(block);
5338 ir_node *trampoline;
5340 dbg_info *dbgi = get_irn_dbg_info(node);
5341 ia32_address_t addr;
5343 /* construct store address */
5344 memset(&addr, 0, sizeof(addr));
5345 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5347 if (addr.base == NULL) {
5348 addr.base = noreg_GP;
5350 addr.base = be_transform_node(addr.base);
5353 if (addr.index == NULL) {
5354 addr.index = noreg_GP;
5356 addr.index = be_transform_node(addr.index);
5358 addr.mem = be_transform_node(mem);
5360 /* mov ecx, <env> */
5361 val = ia32_create_Immediate(NULL, 0, 0xB9);
5362 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5363 addr.index, addr.mem, val);
5364 set_irn_pinned(store, get_irn_pinned(node));
5365 set_ia32_op_type(store, ia32_AddrModeD);
5366 set_ia32_ls_mode(store, mode_Bu);
5367 set_address(store, &addr);
5371 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5372 addr.index, addr.mem, env);
5373 set_irn_pinned(store, get_irn_pinned(node));
5374 set_ia32_op_type(store, ia32_AddrModeD);
5375 set_ia32_ls_mode(store, mode_Iu);
5376 set_address(store, &addr);
5380 /* jmp rel <callee> */
5381 val = ia32_create_Immediate(NULL, 0, 0xE9);
5382 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5383 addr.index, addr.mem, val);
5384 set_irn_pinned(store, get_irn_pinned(node));
5385 set_ia32_op_type(store, ia32_AddrModeD);
5386 set_ia32_ls_mode(store, mode_Bu);
5387 set_address(store, &addr);
5391 trampoline = be_transform_node(ptr);
5393 /* the callee is typically an immediate */
5394 if (is_SymConst(callee)) {
5395 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5397 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5398 add_ia32_am_offs_int(rel, -10);
5400 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5402 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5403 addr.index, addr.mem, rel);
5404 set_irn_pinned(store, get_irn_pinned(node));
5405 set_ia32_op_type(store, ia32_AddrModeD);
5406 set_ia32_ls_mode(store, mode_Iu);
5407 set_address(store, &addr);
5412 return new_r_Tuple(new_block, 2, in);
5416 * Transform Builtin node.
5418 static ir_node *gen_Builtin(ir_node *node)
5420 ir_builtin_kind kind = get_Builtin_kind(node);
5424 return gen_trap(node);
5425 case ir_bk_debugbreak:
5426 return gen_debugbreak(node);
5427 case ir_bk_return_address:
5428 return gen_return_address(node);
5429 case ir_bk_frame_address:
5430 return gen_frame_address(node);
5431 case ir_bk_prefetch:
5432 return gen_prefetch(node);
5434 return gen_ffs(node);
5436 return gen_clz(node);
5438 return gen_ctz(node);
5440 return gen_parity(node);
5441 case ir_bk_popcount:
5442 return gen_popcount(node);
5444 return gen_bswap(node);
5446 return gen_outport(node);
5448 return gen_inport(node);
5449 case ir_bk_inner_trampoline:
5450 return gen_inner_trampoline(node);
5452 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5456 * Transform Proj(Builtin) node.
5458 static ir_node *gen_Proj_Builtin(ir_node *proj)
5460 ir_node *node = get_Proj_pred(proj);
5461 ir_node *new_node = be_transform_node(node);
5462 ir_builtin_kind kind = get_Builtin_kind(node);
5465 case ir_bk_return_address:
5466 case ir_bk_frame_address:
5471 case ir_bk_popcount:
5473 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5476 case ir_bk_debugbreak:
5477 case ir_bk_prefetch:
5479 assert(get_Proj_proj(proj) == pn_Builtin_M);
5482 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5483 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5485 assert(get_Proj_proj(proj) == pn_Builtin_M);
5486 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5488 case ir_bk_inner_trampoline:
5489 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5490 return get_Tuple_pred(new_node, 1);
5492 assert(get_Proj_proj(proj) == pn_Builtin_M);
5493 return get_Tuple_pred(new_node, 0);
5496 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5499 static ir_node *gen_be_IncSP(ir_node *node)
5501 ir_node *res = be_duplicate_node(node);
5502 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5508 * Transform the Projs from a be_Call.
5510 static ir_node *gen_Proj_be_Call(ir_node *node)
5512 ir_node *call = get_Proj_pred(node);
5513 ir_node *new_call = be_transform_node(call);
5514 dbg_info *dbgi = get_irn_dbg_info(node);
5515 long proj = get_Proj_proj(node);
5516 ir_mode *mode = get_irn_mode(node);
5519 if (proj == pn_be_Call_M) {
5520 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5522 /* transform call modes */
5523 if (mode_is_data(mode)) {
5524 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5528 /* Map from be_Call to ia32_Call proj number */
5529 if (proj == pn_be_Call_sp) {
5530 proj = pn_ia32_Call_stack;
5531 } else if (proj == pn_be_Call_M) {
5532 proj = pn_ia32_Call_M;
5533 } else if (proj == pn_be_Call_X_except) {
5534 proj = pn_ia32_Call_X_except;
5535 } else if (proj == pn_be_Call_X_regular) {
5536 proj = pn_ia32_Call_X_regular;
5538 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5539 int const n_outs = arch_get_irn_n_outs(new_call);
5542 assert(proj >= pn_be_Call_first_res);
5543 assert(req->type & arch_register_req_type_limited);
5545 for (i = 0; i < n_outs; ++i) {
5546 arch_register_req_t const *const new_req
5547 = arch_get_irn_register_req_out(new_call, i);
5549 if (!(new_req->type & arch_register_req_type_limited) ||
5550 new_req->cls != req->cls ||
5551 *new_req->limited != *req->limited)
5560 res = new_rd_Proj(dbgi, new_call, mode, proj);
5562 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5564 case pn_ia32_Call_stack:
5565 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5568 case pn_ia32_Call_fpcw:
5569 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5577 * Transform the Projs from a Cmp.
5579 static ir_node *gen_Proj_Cmp(ir_node *node)
5581 /* this probably means not all mode_b nodes were lowered... */
5582 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5586 static ir_node *gen_Proj_ASM(ir_node *node)
5588 ir_mode *mode = get_irn_mode(node);
5589 ir_node *pred = get_Proj_pred(node);
5590 ir_node *new_pred = be_transform_node(pred);
5591 long pos = get_Proj_proj(node);
5593 if (mode == mode_M) {
5594 pos = arch_get_irn_n_outs(new_pred)-1;
5595 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5597 } else if (mode_is_float(mode)) {
5600 panic("unexpected proj mode at ASM");
5603 return new_r_Proj(new_pred, mode, pos);
5607 * Transform and potentially renumber Proj nodes.
5609 static ir_node *gen_Proj(ir_node *node)
5611 ir_node *pred = get_Proj_pred(node);
5614 switch (get_irn_opcode(pred)) {
5616 return gen_Proj_Load(node);
5618 return gen_Proj_Store(node);
5620 return gen_Proj_ASM(node);
5622 return gen_Proj_Builtin(node);
5624 return gen_Proj_Div(node);
5626 return gen_Proj_Mod(node);
5628 return gen_Proj_CopyB(node);
5630 return gen_Proj_be_SubSP(node);
5632 return gen_Proj_be_AddSP(node);
5634 return gen_Proj_be_Call(node);
5636 return gen_Proj_Cmp(node);
5638 proj = get_Proj_proj(node);
5640 case pn_Start_X_initial_exec: {
5641 ir_node *block = get_nodes_block(pred);
5642 ir_node *new_block = be_transform_node(block);
5643 dbg_info *dbgi = get_irn_dbg_info(node);
5644 /* we exchange the ProjX with a jump */
5645 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5653 if (is_ia32_l_FloattoLL(pred)) {
5654 return gen_Proj_l_FloattoLL(node);
5656 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5660 ir_mode *mode = get_irn_mode(node);
5661 if (ia32_mode_needs_gp_reg(mode)) {
5662 ir_node *new_pred = be_transform_node(pred);
5663 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5664 get_Proj_proj(node));
5665 new_proj->node_nr = node->node_nr;
5670 return be_duplicate_node(node);
5674 * Enters all transform functions into the generic pointer
5676 static void register_transformers(void)
5678 /* first clear the generic function pointer for all ops */
5679 be_start_transform_setup();
5681 be_set_transform_function(op_Add, gen_Add);
5682 be_set_transform_function(op_And, gen_And);
5683 be_set_transform_function(op_ASM, ia32_gen_ASM);
5684 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5685 be_set_transform_function(op_be_Call, gen_be_Call);
5686 be_set_transform_function(op_be_Copy, gen_be_Copy);
5687 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5688 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5689 be_set_transform_function(op_be_Return, gen_be_Return);
5690 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5691 be_set_transform_function(op_Builtin, gen_Builtin);
5692 be_set_transform_function(op_Cmp, gen_Cmp);
5693 be_set_transform_function(op_Cond, gen_Cond);
5694 be_set_transform_function(op_Const, gen_Const);
5695 be_set_transform_function(op_Conv, gen_Conv);
5696 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5697 be_set_transform_function(op_Div, gen_Div);
5698 be_set_transform_function(op_Eor, gen_Eor);
5699 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5700 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5701 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5702 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5703 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5704 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5705 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5706 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5707 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5708 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5709 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5710 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5711 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5712 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5713 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5714 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5715 be_set_transform_function(op_IJmp, gen_IJmp);
5716 be_set_transform_function(op_Jmp, gen_Jmp);
5717 be_set_transform_function(op_Load, gen_Load);
5718 be_set_transform_function(op_Minus, gen_Minus);
5719 be_set_transform_function(op_Mod, gen_Mod);
5720 be_set_transform_function(op_Mul, gen_Mul);
5721 be_set_transform_function(op_Mulh, gen_Mulh);
5722 be_set_transform_function(op_Mux, gen_Mux);
5723 be_set_transform_function(op_Not, gen_Not);
5724 be_set_transform_function(op_Or, gen_Or);
5725 be_set_transform_function(op_Phi, gen_Phi);
5726 be_set_transform_function(op_Proj, gen_Proj);
5727 be_set_transform_function(op_Rotl, gen_Rotl);
5728 be_set_transform_function(op_Shl, gen_Shl);
5729 be_set_transform_function(op_Shr, gen_Shr);
5730 be_set_transform_function(op_Shrs, gen_Shrs);
5731 be_set_transform_function(op_Store, gen_Store);
5732 be_set_transform_function(op_Sub, gen_Sub);
5733 be_set_transform_function(op_Switch, gen_Switch);
5734 be_set_transform_function(op_SymConst, gen_SymConst);
5735 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5739 * Pre-transform all unknown and noreg nodes.
5741 static void ia32_pretransform_node(void)
5743 ir_graph *irg = current_ir_graph;
5744 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5746 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5747 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5748 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5749 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5750 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5752 nomem = get_irg_no_mem(irg);
5753 noreg_GP = ia32_new_NoReg_gp(irg);
5757 * Post-process all calls if we are in SSE mode.
5758 * The ABI requires that the results are in st0, copy them
5759 * to a xmm register.
5761 static void postprocess_fp_call_results(void)
5765 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5766 ir_node *call = call_list[i];
5767 ir_type *mtp = call_types[i];
5770 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5771 ir_type *res_tp = get_method_res_type(mtp, j);
5772 ir_node *res, *new_res;
5773 const ir_edge_t *edge, *next;
5776 if (! is_atomic_type(res_tp)) {
5777 /* no floating point return */
5780 res_mode = get_type_mode(res_tp);
5781 if (! mode_is_float(res_mode)) {
5782 /* no floating point return */
5786 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5789 /* now patch the users */
5790 foreach_out_edge_safe(res, edge, next) {
5791 ir_node *succ = get_edge_src_irn(edge);
5794 if (be_is_Keep(succ))
5797 if (is_ia32_xStore(succ)) {
5798 /* an xStore can be patched into an vfst */
5799 dbg_info *db = get_irn_dbg_info(succ);
5800 ir_node *block = get_nodes_block(succ);
5801 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5802 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5803 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5804 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5805 ir_mode *mode = get_ia32_ls_mode(succ);
5807 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5808 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5809 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5810 if (is_ia32_use_frame(succ))
5811 set_ia32_use_frame(st);
5812 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5813 set_irn_pinned(st, get_irn_pinned(succ));
5814 set_ia32_op_type(st, ia32_AddrModeD);
5816 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5817 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5818 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5825 if (new_res == NULL) {
5826 dbg_info *db = get_irn_dbg_info(call);
5827 ir_node *block = get_nodes_block(call);
5828 ir_node *frame = get_irg_frame(current_ir_graph);
5829 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5830 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5831 ir_node *vfst, *xld, *new_mem;
5834 /* store st(0) on stack */
5835 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5837 set_ia32_op_type(vfst, ia32_AddrModeD);
5838 set_ia32_use_frame(vfst);
5840 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5842 /* load into SSE register */
5843 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5845 set_ia32_op_type(xld, ia32_AddrModeS);
5846 set_ia32_use_frame(xld);
5848 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5849 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5851 if (old_mem != NULL) {
5852 edges_reroute(old_mem, new_mem);
5856 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5862 /* do the transformation */
5863 void ia32_transform_graph(ir_graph *irg)
5867 register_transformers();
5868 initial_fpcw = NULL;
5869 ia32_no_pic_adjust = 0;
5871 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5873 be_timer_push(T_HEIGHTS);
5874 ia32_heights = heights_new(irg);
5875 be_timer_pop(T_HEIGHTS);
5876 ia32_calculate_non_address_mode_nodes(irg);
5878 /* the transform phase is not safe for CSE (yet) because several nodes get
5879 * attributes set after their creation */
5880 cse_last = get_opt_cse();
5883 call_list = NEW_ARR_F(ir_node *, 0);
5884 call_types = NEW_ARR_F(ir_type *, 0);
5885 be_transform_graph(irg, ia32_pretransform_node);
5887 if (ia32_cg_config.use_sse2)
5888 postprocess_fp_call_results();
5889 DEL_ARR_F(call_types);
5890 DEL_ARR_F(call_list);
5892 set_opt_cse(cse_last);
5894 ia32_free_non_address_mode_nodes();
5895 heights_free(ia32_heights);
5896 ia32_heights = NULL;
5899 void ia32_init_transform(void)
5901 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");