2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief This file implements the IR transformation from firm into
10 * @author Christian Wuerdig, Matthias Braun
19 #include "irgraph_t.h"
24 #include "iredges_t.h"
40 #include "betranshlp.h"
43 #include "bearch_ia32_t.h"
44 #include "ia32_common_transform.h"
45 #include "ia32_nodes_attr.h"
46 #include "ia32_transform.h"
47 #include "ia32_new_nodes.h"
48 #include "ia32_dbg_stat.h"
49 #include "ia32_optimize.h"
50 #include "ia32_address_mode.h"
51 #include "ia32_architecture.h"
53 #include "gen_ia32_regalloc_if.h"
55 /* define this to construct SSE constants instead of load them */
56 #undef CONSTRUCT_SSE_CONST
58 #define mode_fp (ia32_reg_classes[CLASS_ia32_fp].mode)
59 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
61 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
63 static ir_node *old_initial_fpcw = NULL;
64 static ir_node *initial_fpcw = NULL;
65 int ia32_no_pic_adjust;
67 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
68 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
71 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
72 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
75 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
76 ir_node *op1, ir_node *op2);
78 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
79 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
81 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
82 ir_node *base, ir_node *index, ir_node *mem);
84 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
85 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
88 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
90 static ir_node *create_immediate_or_transform(ir_node *node);
92 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
93 dbg_info *dbgi, ir_node *block,
94 ir_node *op, ir_node *orig_node);
96 /* its enough to have those once */
97 static ir_node *nomem, *noreg_GP;
99 /** a list to postprocess all calls */
100 static ir_node **call_list;
101 static ir_type **call_types;
103 /** Return non-zero is a node represents the 0 constant. */
104 static bool is_Const_0(ir_node *node)
106 return is_Const(node) && is_Const_null(node);
109 /** Return non-zero is a node represents the 1 constant. */
110 static bool is_Const_1(ir_node *node)
112 return is_Const(node) && is_Const_one(node);
115 /** Return non-zero is a node represents the -1 constant. */
116 static bool is_Const_Minus_1(ir_node *node)
118 return is_Const(node) && is_Const_all_one(node);
122 * returns true if constant can be created with a simple float command
124 static bool is_simple_x87_Const(ir_node *node)
126 ir_tarval *tv = get_Const_tarval(node);
127 if (tarval_is_null(tv) || tarval_is_one(tv))
130 /* TODO: match all the other float constants */
135 * returns true if constant can be created with a simple float command
137 static bool is_simple_sse_Const(ir_node *node)
139 ir_tarval *tv = get_Const_tarval(node);
140 ir_mode *mode = get_tarval_mode(tv);
145 if (tarval_is_null(tv)
146 #ifdef CONSTRUCT_SSE_CONST
151 #ifdef CONSTRUCT_SSE_CONST
152 if (mode == mode_D) {
153 unsigned val = get_tarval_sub_bits(tv, 0) |
154 (get_tarval_sub_bits(tv, 1) << 8) |
155 (get_tarval_sub_bits(tv, 2) << 16) |
156 (get_tarval_sub_bits(tv, 3) << 24);
158 /* lower 32bit are zero, really a 32bit constant */
161 #endif /* CONSTRUCT_SSE_CONST */
162 /* TODO: match all the other float constants */
167 * return NoREG or pic_base in case of PIC.
168 * This is necessary as base address for newly created symbols
170 static ir_node *get_symconst_base(void)
172 ir_graph *irg = current_ir_graph;
174 if (be_options.pic) {
175 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
176 return arch_env->impl->get_pic_base(irg);
183 * Transforms a Const.
185 static ir_node *gen_Const(ir_node *node)
187 ir_node *old_block = get_nodes_block(node);
188 ir_node *block = be_transform_node(old_block);
189 dbg_info *dbgi = get_irn_dbg_info(node);
190 ir_mode *mode = get_irn_mode(node);
191 ir_tarval *tv = get_Const_tarval(node);
193 if (mode_is_float(mode)) {
194 ir_graph *irg = get_irn_irg(node);
195 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
196 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
201 if (ia32_cg_config.use_sse2) {
202 if (tarval_is_null(tv)) {
203 load = new_bd_ia32_xZero(dbgi, block);
204 set_ia32_ls_mode(load, mode);
206 #ifdef CONSTRUCT_SSE_CONST
207 } else if (tarval_is_one(tv)) {
208 int cnst = mode == mode_F ? 26 : 55;
209 ir_node *imm1 = ia32_create_Immediate(irg, NULL, 0, cnst);
210 ir_node *imm2 = ia32_create_Immediate(irg, NULL, 0, 2);
211 ir_node *pslld, *psrld;
213 load = new_bd_ia32_xAllOnes(dbgi, block);
214 set_ia32_ls_mode(load, mode);
215 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
216 set_ia32_ls_mode(pslld, mode);
217 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
218 set_ia32_ls_mode(psrld, mode);
220 #endif /* CONSTRUCT_SSE_CONST */
221 } else if (mode == mode_F) {
222 /* we can place any 32bit constant by using a movd gp, sse */
223 unsigned val = get_tarval_sub_bits(tv, 0) |
224 (get_tarval_sub_bits(tv, 1) << 8) |
225 (get_tarval_sub_bits(tv, 2) << 16) |
226 (get_tarval_sub_bits(tv, 3) << 24);
227 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
228 load = new_bd_ia32_xMovd(dbgi, block, cnst);
229 set_ia32_ls_mode(load, mode);
233 #ifdef CONSTRUCT_SSE_CONST
234 if (mode == mode_D) {
235 unsigned val = get_tarval_sub_bits(tv, 0) |
236 (get_tarval_sub_bits(tv, 1) << 8) |
237 (get_tarval_sub_bits(tv, 2) << 16) |
238 (get_tarval_sub_bits(tv, 3) << 24);
240 ir_node *imm32 = ia32_create_Immediate(irg, NULL, 0, 32);
241 ir_node *cnst, *psllq;
243 /* fine, lower 32bit are zero, produce 32bit value */
244 val = get_tarval_sub_bits(tv, 4) |
245 (get_tarval_sub_bits(tv, 5) << 8) |
246 (get_tarval_sub_bits(tv, 6) << 16) |
247 (get_tarval_sub_bits(tv, 7) << 24);
248 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
249 load = new_bd_ia32_xMovd(dbgi, block, cnst);
250 set_ia32_ls_mode(load, mode);
251 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
252 set_ia32_ls_mode(psllq, mode);
257 #endif /* CONSTRUCT_SSE_CONST */
258 floatent = ia32_create_float_const_entity(isa, tv, NULL);
260 base = get_symconst_base();
261 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
263 set_ia32_op_type(load, ia32_AddrModeS);
264 set_ia32_am_sc(load, floatent);
265 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
266 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
269 if (tarval_is_null(tv)) {
270 load = new_bd_ia32_fldz(dbgi, block);
272 set_ia32_ls_mode(load, mode);
273 } else if (tarval_is_one(tv)) {
274 load = new_bd_ia32_fld1(dbgi, block);
276 set_ia32_ls_mode(load, mode);
281 floatent = ia32_create_float_const_entity(isa, tv, NULL);
282 /* create_float_const_ent is smart and sometimes creates
284 ls_mode = get_type_mode(get_entity_type(floatent));
285 base = get_symconst_base();
286 load = new_bd_ia32_fld(dbgi, block, base, noreg_GP, nomem,
288 set_ia32_op_type(load, ia32_AddrModeS);
289 set_ia32_am_sc(load, floatent);
290 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
291 res = new_r_Proj(load, mode_fp, pn_ia32_fld_res);
294 #ifdef CONSTRUCT_SSE_CONST
296 #endif /* CONSTRUCT_SSE_CONST */
297 SET_IA32_ORIG_NODE(load, node);
299 } else { /* non-float mode */
303 tv = tarval_convert_to(tv, mode_Iu);
305 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
307 panic("couldn't convert constant tarval (%+F)", node);
309 val = get_tarval_long(tv);
311 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
312 SET_IA32_ORIG_NODE(cnst, node);
319 * Transforms a SymConst.
321 static ir_node *gen_SymConst(ir_node *node)
323 ir_node *old_block = get_nodes_block(node);
324 ir_node *block = be_transform_node(old_block);
325 dbg_info *dbgi = get_irn_dbg_info(node);
326 ir_mode *mode = get_irn_mode(node);
329 if (mode_is_float(mode)) {
330 if (ia32_cg_config.use_sse2)
331 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
333 cnst = new_bd_ia32_fld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
334 set_ia32_am_sc(cnst, get_SymConst_entity(node));
335 set_ia32_use_frame(cnst);
339 if (get_SymConst_kind(node) != symconst_addr_ent) {
340 panic("backend only support symconst_addr_ent (at %+F)", node);
342 entity = get_SymConst_entity(node);
343 if (get_entity_owner(entity) == get_tls_type()) {
344 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
345 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
346 set_ia32_am_sc(lea, entity);
349 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
353 SET_IA32_ORIG_NODE(cnst, node);
358 static ir_type *make_array_type(ir_type *tp)
360 unsigned alignment = get_type_alignment_bytes(tp);
361 unsigned size = get_type_size_bytes(tp);
362 ir_type *res = new_type_array(1, tp);
363 set_type_alignment_bytes(res, alignment);
364 set_array_bounds_int(res, 0, 0, 2);
365 if (alignment > size)
367 set_type_size_bytes(res, 2 * size);
368 set_type_state(res, layout_fixed);
373 * Create a float[2] array type for the given atomic type.
375 * @param tp the atomic type
377 static ir_type *ia32_create_float_array(ir_type *tp)
379 ir_mode *mode = get_type_mode(tp);
382 if (mode == mode_F) {
383 static ir_type *float_F;
387 arr = float_F = make_array_type(tp);
388 } else if (mode == mode_D) {
389 static ir_type *float_D;
393 arr = float_D = make_array_type(tp);
395 static ir_type *float_E;
399 arr = float_E = make_array_type(tp);
404 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
405 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
407 static const struct {
409 const char *cnst_str;
411 } names [ia32_known_const_max] = {
412 { "C_sfp_sign", "0x80000000", 0 },
413 { "C_dfp_sign", "0x8000000000000000", 1 },
414 { "C_sfp_abs", "0x7FFFFFFF", 0 },
415 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
416 { "C_ull_bias", "0x10000000000000000", 2 }
418 static ir_entity *ent_cache[ia32_known_const_max];
420 ir_entity *ent = ent_cache[kct];
423 ir_graph *irg = current_ir_graph;
424 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
425 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
426 const char *cnst_str = names[kct].cnst_str;
427 ident *name = new_id_from_str(names[kct].name);
430 switch (names[kct].mode) {
431 case 0: mode = mode_Iu; break;
432 case 1: mode = mode_Lu; break;
433 case 2: mode = mode_F; break;
434 default: panic("internal compiler error");
436 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
438 if (kct == ia32_ULLBIAS) {
439 ir_type *type = ia32_get_prim_type(mode_F);
440 ir_type *atype = ia32_create_float_array(type);
441 ir_initializer_t *initializer;
443 ent = new_entity(get_glob_type(), name, atype);
445 set_entity_ld_ident(ent, name);
446 set_entity_visibility(ent, ir_visibility_private);
447 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
449 initializer = create_initializer_compound(2);
450 set_initializer_compound_value(initializer, 0,
451 create_initializer_tarval(get_mode_null(mode)));
452 set_initializer_compound_value(initializer, 1,
453 create_initializer_tarval(tv));
454 set_entity_initializer(ent, initializer);
456 ent = ia32_create_float_const_entity(isa, tv, name);
458 /* cache the entry */
459 ent_cache[kct] = ent;
462 return ent_cache[kct];
466 * return true if the node is a Proj(Load) and could be used in source address
467 * mode for another node. Will return only true if the @p other node is not
468 * dependent on the memory of the Load (for binary operations use the other
469 * input here, for unary operations use NULL).
471 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
472 ir_node *other, ir_node *other2,
479 /* float constants are always available */
480 if (is_Const(node)) {
481 mode = get_irn_mode(node);
482 if (mode_is_float(mode)) {
483 ir_tarval *tv = get_Const_tarval(node);
484 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
486 if (ia32_cg_config.use_sse2) {
487 if (is_simple_sse_Const(node))
490 if (is_simple_x87_Const(node))
493 if (get_irn_n_edges(node) > 1)
502 load = get_Proj_pred(node);
503 pn = get_Proj_proj(node);
504 if (!is_Load(load) || pn != pn_Load_res)
506 if (get_nodes_block(load) != block)
508 mode = get_irn_mode(node);
509 /* we can't fold mode_E AM */
510 if (mode == ia32_mode_E)
512 /* we only use address mode if we're the only user of the load */
513 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
515 /* in some edge cases with address mode we might reach the load normally
516 * and through some AM sequence, if it is already materialized then we
517 * can't create an AM node from it */
518 if (be_is_transformed(node))
521 /* don't do AM if other node inputs depend on the load (via mem-proj) */
522 if (other != NULL && ia32_prevents_AM(block, load, other))
525 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
531 typedef struct ia32_address_mode_t ia32_address_mode_t;
532 struct ia32_address_mode_t {
537 ia32_op_type_t op_type;
541 unsigned commutative : 1;
542 unsigned ins_permuted : 1;
545 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
547 /* construct load address */
548 memset(addr, 0, sizeof(addr[0]));
549 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
551 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
552 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
553 addr->mem = be_transform_node(mem);
556 static void build_address(ia32_address_mode_t *am, ir_node *node,
557 ia32_create_am_flags_t flags)
559 ia32_address_t *addr = &am->addr;
565 /* floating point immediates */
566 if (is_Const(node)) {
567 ir_graph *irg = get_irn_irg(node);
568 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
569 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
570 ir_tarval *tv = get_Const_tarval(node);
571 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
572 addr->base = get_symconst_base();
573 addr->index = noreg_GP;
575 addr->symconst_ent = entity;
576 addr->tls_segment = false;
578 am->ls_mode = get_type_mode(get_entity_type(entity));
579 am->pinned = op_pin_state_floats;
583 load = get_Proj_pred(node);
584 ptr = get_Load_ptr(load);
585 mem = get_Load_mem(load);
586 new_mem = be_transform_node(mem);
587 am->pinned = get_irn_pinned(load);
588 am->ls_mode = get_Load_mode(load);
589 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
592 /* construct load address */
593 ia32_create_address_mode(addr, ptr, flags);
595 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
596 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
600 static void set_address(ir_node *node, const ia32_address_t *addr)
602 set_ia32_am_scale(node, addr->scale);
603 set_ia32_am_sc(node, addr->symconst_ent);
604 set_ia32_am_offs_int(node, addr->offset);
605 set_ia32_am_tls_segment(node, addr->tls_segment);
606 if (addr->symconst_sign)
607 set_ia32_am_sc_sign(node);
609 set_ia32_use_frame(node);
610 set_ia32_frame_ent(node, addr->frame_entity);
614 * Apply attributes of a given address mode to a node.
616 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
618 set_address(node, &am->addr);
620 set_ia32_op_type(node, am->op_type);
621 set_ia32_ls_mode(node, am->ls_mode);
622 if (am->pinned == op_pin_state_pinned) {
623 /* beware: some nodes are already pinned and did not allow to change the state */
624 if (get_irn_pinned(node) != op_pin_state_pinned)
625 set_irn_pinned(node, op_pin_state_pinned);
628 set_ia32_commutative(node);
632 * Check, if a given node is a Down-Conv, i.e. a integer Conv
633 * from a mode with a mode with more bits to a mode with lesser bits.
634 * Moreover, we return only true if the node has not more than 1 user.
636 * @param node the node
637 * @return non-zero if node is a Down-Conv
639 static int is_downconv(const ir_node *node)
647 src_mode = get_irn_mode(get_Conv_op(node));
648 dest_mode = get_irn_mode(node);
650 ia32_mode_needs_gp_reg(src_mode) &&
651 ia32_mode_needs_gp_reg(dest_mode) &&
652 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
655 /** Skip all Down-Conv's on a given node and return the resulting node. */
656 ir_node *ia32_skip_downconv(ir_node *node)
658 while (is_downconv(node)) {
659 /* we only want to skip the conv when we're the only user
660 * (because this test is used in the context of address-mode selection
661 * and we don't want to use address mode for multiple users) */
662 if (get_irn_n_edges(node) > 1)
665 node = get_Conv_op(node);
671 static bool is_float_downconv(const ir_node *node)
675 ir_node *pred = get_Conv_op(node);
676 ir_mode *pred_mode = get_irn_mode(pred);
677 ir_mode *mode = get_irn_mode(node);
678 return mode_is_float(pred_mode)
679 && get_mode_size_bits(mode) <= get_mode_size_bits(pred_mode);
682 static ir_node *ia32_skip_float_downconv(ir_node *node)
684 while (is_float_downconv(node)) {
685 node = get_Conv_op(node);
690 static bool is_sameconv(ir_node *node)
698 /* we only want to skip the conv when we're the only user
699 * (because this test is used in the context of address-mode selection
700 * and we don't want to use address mode for multiple users) */
701 if (get_irn_n_edges(node) > 1)
704 src_mode = get_irn_mode(get_Conv_op(node));
705 dest_mode = get_irn_mode(node);
707 ia32_mode_needs_gp_reg(src_mode) &&
708 ia32_mode_needs_gp_reg(dest_mode) &&
709 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
712 /** Skip all signedness convs */
713 static ir_node *ia32_skip_sameconv(ir_node *node)
715 while (is_sameconv(node)) {
716 node = get_Conv_op(node);
722 static ir_node *transform_sext(ir_node *node, ir_node *orig_node)
724 ir_mode *mode = get_irn_mode(node);
725 ir_node *block = get_nodes_block(node);
726 dbg_info *dbgi = get_irn_dbg_info(node);
727 return create_I2I_Conv(mode, mode_Is, dbgi, block, node, orig_node);
730 static ir_node *transform_zext(ir_node *node, ir_node *orig_node)
732 ir_mode *mode = get_irn_mode(node);
733 ir_node *block = get_nodes_block(node);
734 dbg_info *dbgi = get_irn_dbg_info(node);
735 /* normalize to an unsigned mode */
736 switch (get_mode_size_bits(mode)) {
737 case 8: mode = mode_Bu; break;
738 case 16: mode = mode_Hu; break;
740 panic("ia32: invalid mode in zest: %+F", node);
742 return create_I2I_Conv(mode, mode_Iu, dbgi, block, node, orig_node);
745 static ir_node *transform_upconv(ir_node *node, ir_node *orig_node)
747 ir_mode *mode = get_irn_mode(node);
748 if (mode_is_signed(mode)) {
749 return transform_sext(node, orig_node);
751 return transform_zext(node, orig_node);
755 static ir_node *get_noreg(ir_mode *const mode)
757 if (!mode_is_float(mode)) {
759 } else if (ia32_cg_config.use_sse2) {
760 return ia32_new_NoReg_xmm(current_ir_graph);
762 return ia32_new_NoReg_fp(current_ir_graph);
767 * matches operands of a node into ia32 addressing/operand modes. This covers
768 * usage of source address mode, immediates, operations with non 32-bit modes,
770 * The resulting data is filled into the @p am struct. block is the block
771 * of the node whose arguments are matched. op1, op2 are the first and second
772 * input that are matched (op1 may be NULL). other_op is another unrelated
773 * input that is not matched! but which is needed sometimes to check if AM
774 * for op1/op2 is legal.
775 * @p flags describes the supported modes of the operation in detail.
777 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
778 ir_node *op1, ir_node *op2, ir_node *other_op,
781 ia32_address_t *addr = &am->addr;
782 ir_mode *mode = get_irn_mode(op2);
783 int mode_bits = get_mode_size_bits(mode);
784 ir_node *new_op1, *new_op2;
786 unsigned commutative;
787 int use_am_and_immediates;
790 memset(am, 0, sizeof(am[0]));
792 commutative = (flags & match_commutative) != 0;
793 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
794 use_am = (flags & match_am) != 0;
795 use_immediate = (flags & match_immediate) != 0;
796 assert(!use_am_and_immediates || use_immediate);
799 assert(!commutative || op1 != NULL);
800 assert(use_am || !(flags & match_8bit_am));
801 assert(use_am || !(flags & match_16bit_am));
803 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
804 (mode_bits == 16 && !(flags & match_16bit_am))) {
808 /* we can simply skip downconvs for mode neutral nodes: the upper bits
809 * can be random for these operations */
810 if (flags & match_mode_neutral) {
811 op2 = ia32_skip_downconv(op2);
813 op1 = ia32_skip_downconv(op1);
816 op2 = ia32_skip_sameconv(op2);
818 op1 = ia32_skip_sameconv(op1);
822 /* match immediates. firm nodes are normalized: constants are always on the
825 if (!(flags & match_try_am) && use_immediate) {
826 new_op2 = ia32_try_create_Immediate(op2, 'i');
829 if (new_op2 == NULL &&
830 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
831 build_address(am, op2, ia32_create_am_normal);
832 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
833 new_op2 = get_noreg(mode);
834 am->op_type = ia32_AddrModeS;
835 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
837 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
838 build_address(am, op1, ia32_create_am_normal);
840 ir_node *const noreg = get_noreg(mode);
841 if (new_op2 != NULL) {
844 new_op1 = be_transform_node(op2);
846 am->ins_permuted = true;
848 am->op_type = ia32_AddrModeS;
850 am->op_type = ia32_Normal;
852 if (flags & match_try_am) {
858 mode = get_irn_mode(op2);
859 if (get_mode_size_bits(mode) != 32
860 && (flags & (match_mode_neutral | match_upconv | match_zero_ext))) {
861 if (flags & match_upconv) {
862 new_op1 = (op1 == NULL ? NULL : transform_upconv(op1, op1));
864 new_op2 = transform_upconv(op2, op2);
865 } else if (flags & match_zero_ext) {
866 new_op1 = (op1 == NULL ? NULL : transform_zext(op1, op1));
868 new_op2 = transform_zext(op2, op2);
870 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
872 new_op2 = be_transform_node(op2);
873 assert(flags & match_mode_neutral);
877 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
879 new_op2 = be_transform_node(op2);
883 if (addr->base == NULL)
884 addr->base = noreg_GP;
885 if (addr->index == NULL)
886 addr->index = noreg_GP;
887 if (addr->mem == NULL)
890 am->new_op1 = new_op1;
891 am->new_op2 = new_op2;
892 am->commutative = commutative;
896 * "Fixes" a node that uses address mode by turning it into mode_T
897 * and returning a pn_ia32_res Proj.
899 * @param node the node
900 * @param am its address mode
902 * @return a Proj(pn_ia32_res) if a memory address mode is used,
905 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
910 if (am->mem_proj == NULL)
913 /* we have to create a mode_T so the old MemProj can attach to us */
914 mode = get_irn_mode(node);
915 load = get_Proj_pred(am->mem_proj);
917 be_set_transformed_node(load, node);
919 if (mode != mode_T) {
920 set_irn_mode(node, mode_T);
921 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
928 * Construct a standard binary operation, set AM and immediate if required.
930 * @param node The original node for which the binop is created
931 * @param op1 The first operand
932 * @param op2 The second operand
933 * @param func The node constructor function
934 * @return The constructed ia32 node.
936 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
937 construct_binop_func *func, match_flags_t flags)
940 ir_node *block, *new_block, *new_node;
941 ia32_address_mode_t am;
942 ia32_address_t *addr = &am.addr;
944 block = get_nodes_block(node);
945 match_arguments(&am, block, op1, op2, NULL, flags);
947 dbgi = get_irn_dbg_info(node);
948 new_block = be_transform_node(block);
949 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
950 am.new_op1, am.new_op2);
951 set_am_attributes(new_node, &am);
952 /* we can't use source address mode anymore when using immediates */
953 if (!(flags & match_am_and_immediates) &&
954 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
955 set_ia32_am_support(new_node, ia32_am_none);
956 SET_IA32_ORIG_NODE(new_node, node);
958 new_node = fix_mem_proj(new_node, &am);
964 * Generic names for the inputs of an ia32 binary op.
967 n_ia32_l_binop_left, /**< ia32 left input */
968 n_ia32_l_binop_right, /**< ia32 right input */
969 n_ia32_l_binop_eflags /**< ia32 eflags input */
971 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
972 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
973 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
974 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
975 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
976 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
979 * Construct a binary operation which also consumes the eflags.
981 * @param node The node to transform
982 * @param func The node constructor function
983 * @param flags The match flags
984 * @return The constructor ia32 node
986 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
989 ir_node *src_block = get_nodes_block(node);
990 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
991 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
992 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
994 ir_node *block, *new_node, *new_eflags;
995 ia32_address_mode_t am;
996 ia32_address_t *addr = &am.addr;
998 match_arguments(&am, src_block, op1, op2, eflags, flags);
1000 dbgi = get_irn_dbg_info(node);
1001 block = be_transform_node(src_block);
1002 new_eflags = be_transform_node(eflags);
1003 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1004 am.new_op1, am.new_op2, new_eflags);
1005 set_am_attributes(new_node, &am);
1006 /* we can't use source address mode anymore when using immediates */
1007 if (!(flags & match_am_and_immediates) &&
1008 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1009 set_ia32_am_support(new_node, ia32_am_none);
1010 SET_IA32_ORIG_NODE(new_node, node);
1012 new_node = fix_mem_proj(new_node, &am);
1017 static ir_node *get_fpcw(void)
1019 if (initial_fpcw != NULL)
1020 return initial_fpcw;
1022 initial_fpcw = be_transform_node(old_initial_fpcw);
1023 return initial_fpcw;
1026 static ir_node *skip_float_upconv(ir_node *node)
1028 ir_mode *mode = get_irn_mode(node);
1029 assert(mode_is_float(mode));
1031 while (is_Conv(node)) {
1032 ir_node *pred = get_Conv_op(node);
1033 ir_mode *pred_mode = get_irn_mode(pred);
1036 * suboptimal, but without this check the address mode matcher
1037 * can incorrectly think that something has only 1 user
1039 if (get_irn_n_edges(node) > 1)
1042 if (!mode_is_float(pred_mode)
1043 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1051 static void check_x87_floatmode(ir_mode *mode)
1053 if (mode != ia32_mode_E) {
1054 panic("ia32: x87 only supports x86 extended float mode");
1059 * Construct a standard binary operation, set AM and immediate if required.
1061 * @param op1 The first operand
1062 * @param op2 The second operand
1063 * @param func The node constructor function
1064 * @return The constructed ia32 node.
1066 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1067 construct_binop_float_func *func)
1073 ia32_address_mode_t am;
1074 ia32_address_t *addr = &am.addr;
1075 ia32_x87_attr_t *attr;
1076 /* All operations are considered commutative, because there are reverse
1078 match_flags_t flags = match_commutative | match_am;
1080 = is_Div(node) ? get_Div_resmode(node) : get_irn_mode(node);
1081 check_x87_floatmode(mode);
1083 op1 = skip_float_upconv(op1);
1084 op2 = skip_float_upconv(op2);
1086 block = get_nodes_block(node);
1087 match_arguments(&am, block, op1, op2, NULL, flags);
1089 dbgi = get_irn_dbg_info(node);
1090 new_block = be_transform_node(block);
1091 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1092 am.new_op1, am.new_op2, get_fpcw());
1093 set_am_attributes(new_node, &am);
1095 attr = get_ia32_x87_attr(new_node);
1096 attr->attr.data.ins_permuted = am.ins_permuted;
1098 SET_IA32_ORIG_NODE(new_node, node);
1100 new_node = fix_mem_proj(new_node, &am);
1106 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1108 * @param op1 The first operand
1109 * @param op2 The second operand
1110 * @param func The node constructor function
1111 * @return The constructed ia32 node.
1113 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1114 construct_shift_func *func,
1115 match_flags_t flags)
1117 ir_mode *mode = get_irn_mode(node);
1119 assert(! mode_is_float(mode));
1120 assert(flags & match_immediate);
1121 assert((flags & ~(match_mode_neutral | match_zero_ext | match_upconv | match_immediate)) == 0);
1123 if (get_mode_modulo_shift(mode) != 32) {
1124 /* TODO: implement special cases for non-modulo shifts */
1125 panic("modulo shift!=32 not supported by ia32 backend");
1130 if (flags & match_mode_neutral) {
1131 op1 = ia32_skip_downconv(op1);
1132 new_op1 = be_transform_node(op1);
1134 op1 = ia32_skip_sameconv(op1);
1135 if (get_mode_size_bits(mode) != 32) {
1136 if (flags & match_upconv) {
1137 new_op1 = transform_upconv(op1, node);
1138 } else if (flags & match_zero_ext) {
1139 new_op1 = transform_zext(op1, node);
1141 /* match_mode_neutral not handled here because it makes no
1142 * sense for shift operations */
1143 panic("ia32 code selection failed for %+F", node);
1146 new_op1 = be_transform_node(op1);
1150 /* the shift amount can be any mode that is bigger than 5 bits, since all
1151 * other bits are ignored anyway */
1152 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1153 ir_node *const op = get_Conv_op(op2);
1154 if (mode_is_float(get_irn_mode(op)))
1157 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1159 new_op2 = create_immediate_or_transform(op2);
1161 dbg_info *dbgi = get_irn_dbg_info(node);
1162 ir_node *block = get_nodes_block(node);
1163 ir_node *new_block = be_transform_node(block);
1164 ir_node *new_node = func(dbgi, new_block, new_op1, new_op2);
1165 SET_IA32_ORIG_NODE(new_node, node);
1167 /* lowered shift instruction may have a dependency operand, handle it here */
1168 if (get_irn_arity(node) == 3) {
1169 /* we have a dependency */
1170 ir_node* dep = get_irn_n(node, 2);
1171 if (get_irn_n_edges(dep) > 1) {
1172 /* ... which has at least one user other than 'node' */
1173 ir_node *new_dep = be_transform_node(dep);
1174 add_irn_dep(new_node, new_dep);
1183 * Construct a standard unary operation, set AM and immediate if required.
1185 * @param op The operand
1186 * @param func The node constructor function
1187 * @return The constructed ia32 node.
1189 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1190 match_flags_t flags)
1193 ir_node *block, *new_block, *new_op, *new_node;
1195 assert(flags == 0 || flags == match_mode_neutral);
1196 if (flags & match_mode_neutral) {
1197 op = ia32_skip_downconv(op);
1200 new_op = be_transform_node(op);
1201 dbgi = get_irn_dbg_info(node);
1202 block = get_nodes_block(node);
1203 new_block = be_transform_node(block);
1204 new_node = func(dbgi, new_block, new_op);
1206 SET_IA32_ORIG_NODE(new_node, node);
1211 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1212 ia32_address_t *addr)
1222 base = be_transform_node(base);
1229 idx = be_transform_node(idx);
1232 /* segment overrides are ineffective for Leas :-( so we have to patch
1234 if (addr->tls_segment) {
1235 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1236 assert(addr->symconst_ent != NULL);
1237 if (base == noreg_GP)
1240 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1241 addr->tls_segment = false;
1244 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1245 set_address(res, addr);
1251 * Returns non-zero if a given address mode has a symbolic or
1252 * numerical offset != 0.
1254 static int am_has_immediates(const ia32_address_t *addr)
1256 return addr->offset != 0 || addr->symconst_ent != NULL
1257 || addr->frame_entity || addr->use_frame;
1260 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1261 ir_node *high, ir_node *low,
1265 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1266 * op1 - target to be shifted
1267 * op2 - contains bits to be shifted into target
1269 * Only op3 can be an immediate.
1271 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1272 ir_node *high, ir_node *low, ir_node *count,
1273 new_shiftd_func func)
1275 ir_node *new_block = be_transform_node(block);
1276 ir_node *new_high = be_transform_node(high);
1277 ir_node *new_low = be_transform_node(low);
1281 /* the shift amount can be any mode that is bigger than 5 bits, since all
1282 * other bits are ignored anyway */
1283 while (is_Conv(count) &&
1284 get_irn_n_edges(count) == 1 &&
1285 mode_is_int(get_irn_mode(count))) {
1286 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1287 count = get_Conv_op(count);
1289 new_count = create_immediate_or_transform(count);
1291 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1296 * Tests whether 2 values result in 'x' and '32-x' when interpreted as a shift
1299 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1301 if (is_Const(value1) && is_Const(value2)) {
1302 ir_tarval *tv1 = get_Const_tarval(value1);
1303 ir_tarval *tv2 = get_Const_tarval(value2);
1304 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1305 long v1 = get_tarval_long(tv1);
1306 long v2 = get_tarval_long(tv2);
1307 return v1 <= v2 && v2 == 32-v1;
1313 static ir_node *match_64bit_shift(ir_node *node)
1315 ir_node *op1 = get_binop_left(node);
1316 ir_node *op2 = get_binop_right(node);
1317 assert(is_Or(node) || is_Add(node));
1325 /* match ShlD operation */
1326 if (is_Shl(op1) && is_Shr(op2)) {
1327 ir_node *shl_right = get_Shl_right(op1);
1328 ir_node *shl_left = get_Shl_left(op1);
1329 ir_node *shr_right = get_Shr_right(op2);
1330 ir_node *shr_left = get_Shr_left(op2);
1331 /* constant ShlD operation */
1332 if (is_complementary_shifts(shl_right, shr_right)) {
1333 dbg_info *dbgi = get_irn_dbg_info(node);
1334 ir_node *block = get_nodes_block(node);
1335 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1338 /* constant ShrD operation */
1339 if (is_complementary_shifts(shr_right, shl_right)) {
1340 dbg_info *dbgi = get_irn_dbg_info(node);
1341 ir_node *block = get_nodes_block(node);
1342 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1345 /* lower_dw produces the following for ShlD:
1346 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1347 if (is_Shr(shr_left) && is_Not(shr_right)
1348 && is_Const_1(get_Shr_right(shr_left))
1349 && get_Not_op(shr_right) == shl_right) {
1350 dbg_info *dbgi = get_irn_dbg_info(node);
1351 ir_node *block = get_nodes_block(node);
1352 ir_node *val_h = get_Shr_left(shr_left);
1353 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1356 /* lower_dw produces the following for ShrD:
1357 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1358 if (is_Shl(shl_left) && is_Not(shl_right)
1359 && is_Const_1(get_Shl_right(shl_left))
1360 && get_Not_op(shl_right) == shr_right) {
1361 dbg_info *dbgi = get_irn_dbg_info(node);
1362 ir_node *block = get_nodes_block(node);
1363 ir_node *val_h = get_Shl_left(shl_left);
1364 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1373 * Creates an ia32 Add.
1375 * @return the created ia32 Add node
1377 static ir_node *gen_Add(ir_node *node)
1379 ir_mode *mode = get_irn_mode(node);
1380 ir_node *op1 = get_Add_left(node);
1381 ir_node *op2 = get_Add_right(node);
1383 ir_node *block, *new_block, *new_node, *add_immediate_op;
1384 ia32_address_t addr;
1385 ia32_address_mode_t am;
1387 new_node = match_64bit_shift(node);
1388 if (new_node != NULL)
1391 if (mode_is_float(mode)) {
1392 if (ia32_cg_config.use_sse2)
1393 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1394 match_commutative | match_am);
1396 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fadd);
1399 ia32_mark_non_am(node);
1403 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1404 * 1. Add with immediate -> Lea
1405 * 2. Add with possible source address mode -> Add
1406 * 3. Otherwise -> Lea
1408 memset(&addr, 0, sizeof(addr));
1409 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1410 add_immediate_op = NULL;
1412 dbgi = get_irn_dbg_info(node);
1413 block = get_nodes_block(node);
1414 new_block = be_transform_node(block);
1417 if (addr.base == NULL && addr.index == NULL) {
1418 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1419 addr.symconst_sign, 0, addr.offset);
1420 SET_IA32_ORIG_NODE(new_node, node);
1423 /* add with immediate? */
1424 if (addr.index == NULL) {
1425 add_immediate_op = addr.base;
1426 } else if (addr.base == NULL && addr.scale == 0) {
1427 add_immediate_op = addr.index;
1430 if (add_immediate_op != NULL) {
1431 if (!am_has_immediates(&addr)) {
1432 #ifdef DEBUG_libfirm
1433 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1436 return be_transform_node(add_immediate_op);
1439 new_node = create_lea_from_address(dbgi, new_block, &addr);
1440 SET_IA32_ORIG_NODE(new_node, node);
1444 /* test if we can use source address mode */
1445 match_arguments(&am, block, op1, op2, NULL, match_commutative
1446 | match_mode_neutral | match_am | match_immediate | match_try_am);
1448 /* construct an Add with source address mode */
1449 if (am.op_type == ia32_AddrModeS) {
1450 ia32_address_t *am_addr = &am.addr;
1451 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1452 am_addr->index, am_addr->mem, am.new_op1,
1454 set_am_attributes(new_node, &am);
1455 SET_IA32_ORIG_NODE(new_node, node);
1457 new_node = fix_mem_proj(new_node, &am);
1462 /* otherwise construct a lea */
1463 new_node = create_lea_from_address(dbgi, new_block, &addr);
1464 SET_IA32_ORIG_NODE(new_node, node);
1469 * Creates an ia32 Mul.
1471 * @return the created ia32 Mul node
1473 static ir_node *gen_Mul(ir_node *node)
1475 ir_node *op1 = get_Mul_left(node);
1476 ir_node *op2 = get_Mul_right(node);
1477 ir_mode *mode = get_irn_mode(node);
1479 if (mode_is_float(mode)) {
1480 if (ia32_cg_config.use_sse2)
1481 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1482 match_commutative | match_am);
1484 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fmul);
1486 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1487 match_commutative | match_am | match_mode_neutral |
1488 match_immediate | match_am_and_immediates);
1492 * Creates an ia32 Mulh.
1493 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1494 * this result while Mul returns the lower 32 bit.
1496 * @return the created ia32 Mulh node
1498 static ir_node *gen_Mulh(ir_node *node)
1500 dbg_info *dbgi = get_irn_dbg_info(node);
1501 ir_node *op1 = get_Mulh_left(node);
1502 ir_node *op2 = get_Mulh_right(node);
1503 ir_mode *mode = get_irn_mode(node);
1505 ir_node *proj_res_high;
1507 if (get_mode_size_bits(mode) != 32) {
1508 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1511 if (mode_is_signed(mode)) {
1512 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1513 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1515 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1516 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1518 return proj_res_high;
1522 * Creates an ia32 And.
1524 * @return The created ia32 And node
1526 static ir_node *gen_And(ir_node *node)
1528 ir_node *op1 = get_And_left(node);
1529 ir_node *op2 = get_And_right(node);
1530 assert(! mode_is_float(get_irn_mode(node)));
1532 /* is it a zero extension? */
1533 if (is_Const(op2)) {
1534 ir_tarval *tv = get_Const_tarval(op2);
1535 long v = get_tarval_long(tv);
1537 if (v == 0xFF || v == 0xFFFF) {
1538 dbg_info *dbgi = get_irn_dbg_info(node);
1539 ir_node *block = get_nodes_block(node);
1546 assert(v == 0xFFFF);
1549 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1554 return gen_binop(node, op1, op2, new_bd_ia32_And,
1555 match_commutative | match_mode_neutral | match_am | match_immediate);
1559 * Creates an ia32 Or.
1561 * @return The created ia32 Or node
1563 static ir_node *gen_Or(ir_node *node)
1565 ir_node *op1 = get_Or_left(node);
1566 ir_node *op2 = get_Or_right(node);
1569 res = match_64bit_shift(node);
1573 assert (! mode_is_float(get_irn_mode(node)));
1574 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1575 | match_mode_neutral | match_am | match_immediate);
1581 * Creates an ia32 Eor.
1583 * @return The created ia32 Eor node
1585 static ir_node *gen_Eor(ir_node *node)
1587 ir_node *op1 = get_Eor_left(node);
1588 ir_node *op2 = get_Eor_right(node);
1590 assert(! mode_is_float(get_irn_mode(node)));
1591 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1592 | match_mode_neutral | match_am | match_immediate);
1597 * Creates an ia32 Sub.
1599 * @return The created ia32 Sub node
1601 static ir_node *gen_Sub(ir_node *node)
1603 ir_node *op1 = get_Sub_left(node);
1604 ir_node *op2 = get_Sub_right(node);
1605 ir_mode *mode = get_irn_mode(node);
1607 if (mode_is_float(mode)) {
1608 if (ia32_cg_config.use_sse2)
1609 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1611 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fsub);
1614 if (is_Const(op2)) {
1615 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1619 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1620 | match_am | match_immediate);
1623 static ir_node *transform_AM_mem(ir_node *const block,
1624 ir_node *const src_val,
1625 ir_node *const src_mem,
1626 ir_node *const am_mem)
1628 if (is_NoMem(am_mem)) {
1629 return be_transform_node(src_mem);
1630 } else if (is_Proj(src_val) &&
1632 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1633 /* avoid memory loop */
1635 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1636 ir_node *const ptr_pred = get_Proj_pred(src_val);
1637 int const arity = get_Sync_n_preds(src_mem);
1642 NEW_ARR_A(ir_node*, ins, arity + 1);
1644 /* NOTE: This sometimes produces dead-code because the old sync in
1645 * src_mem might not be used anymore, we should detect this case
1646 * and kill the sync... */
1647 for (i = arity - 1; i >= 0; --i) {
1648 ir_node *const pred = get_Sync_pred(src_mem, i);
1650 /* avoid memory loop */
1651 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1654 ins[n++] = be_transform_node(pred);
1657 if (n==1 && ins[0] == am_mem) {
1659 /* creating a new Sync and relying on CSE may fail,
1660 * if am_mem is a ProjM, which does not yet verify. */
1664 return new_r_Sync(block, n, ins);
1668 ins[0] = be_transform_node(src_mem);
1670 return new_r_Sync(block, 2, ins);
1675 * Create a 32bit to 64bit signed extension.
1677 * @param dbgi debug info
1678 * @param block the block where node nodes should be placed
1679 * @param val the value to extend
1680 * @param orig the original node
1682 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1683 ir_node *val, const ir_node *orig)
1688 if (ia32_cg_config.use_short_sex_eax) {
1689 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1690 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1692 ir_graph *const irg = get_Block_irg(block);
1693 ir_node *const imm31 = ia32_create_Immediate(irg, NULL, 0, 31);
1694 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1696 SET_IA32_ORIG_NODE(res, orig);
1701 * Generates an ia32 Div with additional infrastructure for the
1702 * register allocator if needed.
1704 static ir_node *create_Div(ir_node *node)
1706 dbg_info *dbgi = get_irn_dbg_info(node);
1707 ir_node *block = get_nodes_block(node);
1708 ir_node *new_block = be_transform_node(block);
1709 int throws_exception = ir_throws_exception(node);
1716 ir_node *sign_extension;
1717 ia32_address_mode_t am;
1718 ia32_address_t *addr = &am.addr;
1720 /* the upper bits have random contents for smaller modes */
1721 switch (get_irn_opcode(node)) {
1723 op1 = get_Div_left(node);
1724 op2 = get_Div_right(node);
1725 mem = get_Div_mem(node);
1726 mode = get_Div_resmode(node);
1729 op1 = get_Mod_left(node);
1730 op2 = get_Mod_right(node);
1731 mem = get_Mod_mem(node);
1732 mode = get_Mod_resmode(node);
1735 panic("invalid divmod node %+F", node);
1738 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv);
1740 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1741 is the memory of the consumed address. We can have only the second op as address
1742 in Div nodes, so check only op2. */
1743 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1745 if (mode_is_signed(mode)) {
1746 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1747 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1748 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1750 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1752 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1753 addr->index, new_mem, am.new_op2,
1754 am.new_op1, sign_extension);
1756 ir_set_throws_exception(new_node, throws_exception);
1758 set_irn_pinned(new_node, get_irn_pinned(node));
1760 set_am_attributes(new_node, &am);
1761 SET_IA32_ORIG_NODE(new_node, node);
1763 new_node = fix_mem_proj(new_node, &am);
1769 * Generates an ia32 Mod.
1771 static ir_node *gen_Mod(ir_node *node)
1773 return create_Div(node);
1777 * Generates an ia32 Div.
1779 static ir_node *gen_Div(ir_node *node)
1781 ir_mode *mode = get_Div_resmode(node);
1782 if (mode_is_float(mode)) {
1783 ir_node *op1 = get_Div_left(node);
1784 ir_node *op2 = get_Div_right(node);
1786 if (ia32_cg_config.use_sse2) {
1787 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1789 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fdiv);
1793 return create_Div(node);
1797 * Creates an ia32 Shl.
1799 * @return The created ia32 Shl node
1801 static ir_node *gen_Shl(ir_node *node)
1803 ir_node *left = get_Shl_left(node);
1804 ir_node *right = get_Shl_right(node);
1806 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1807 match_mode_neutral | match_immediate);
1811 * Creates an ia32 Shr.
1813 * @return The created ia32 Shr node
1815 static ir_node *gen_Shr(ir_node *node)
1817 ir_node *left = get_Shr_left(node);
1818 ir_node *right = get_Shr_right(node);
1820 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
1821 match_immediate | match_zero_ext);
1825 * Creates an ia32 Sar.
1827 * @return The created ia32 Shrs node
1829 static ir_node *gen_Shrs(ir_node *node)
1831 ir_node *left = get_Shrs_left(node);
1832 ir_node *right = get_Shrs_right(node);
1834 if (is_Const(right)) {
1835 ir_tarval *tv = get_Const_tarval(right);
1836 long val = get_tarval_long(tv);
1838 /* this is a sign extension */
1839 dbg_info *dbgi = get_irn_dbg_info(node);
1840 ir_node *block = be_transform_node(get_nodes_block(node));
1841 ir_node *new_op = be_transform_node(left);
1843 return create_sex_32_64(dbgi, block, new_op, node);
1847 /* 8 or 16 bit sign extension? */
1848 if (is_Const(right) && is_Shl(left)) {
1849 ir_node *shl_left = get_Shl_left(left);
1850 ir_node *shl_right = get_Shl_right(left);
1851 if (is_Const(shl_right)) {
1852 ir_tarval *tv1 = get_Const_tarval(right);
1853 ir_tarval *tv2 = get_Const_tarval(shl_right);
1854 if (tv1 == tv2 && tarval_is_long(tv1)) {
1855 long val = get_tarval_long(tv1);
1856 if (val == 16 || val == 24) {
1857 dbg_info *dbgi = get_irn_dbg_info(node);
1858 ir_node *block = get_nodes_block(node);
1868 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1877 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
1878 match_immediate | match_upconv);
1884 * Creates an ia32 Rol.
1886 * @param op1 The first operator
1887 * @param op2 The second operator
1888 * @return The created ia32 RotL node
1890 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1892 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1898 * Creates an ia32 Ror.
1899 * NOTE: There is no RotR with immediate because this would always be a RotL
1900 * "imm-mode_size_bits" which can be pre-calculated.
1902 * @param op1 The first operator
1903 * @param op2 The second operator
1904 * @return The created ia32 RotR node
1906 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1908 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1914 * Creates an ia32 RotR or RotL (depending on the found pattern).
1916 * @return The created ia32 RotL or RotR node
1918 static ir_node *gen_Rotl(ir_node *node)
1920 ir_node *op1 = get_Rotl_left(node);
1921 ir_node *op2 = get_Rotl_right(node);
1923 if (is_Minus(op2)) {
1924 return gen_Ror(node, op1, get_Minus_op(op2));
1927 return gen_Rol(node, op1, op2);
1933 * Transforms a Minus node.
1935 * @return The created ia32 Minus node
1937 static ir_node *gen_Minus(ir_node *node)
1939 ir_node *op = get_Minus_op(node);
1940 ir_node *block = be_transform_node(get_nodes_block(node));
1941 dbg_info *dbgi = get_irn_dbg_info(node);
1942 ir_mode *mode = get_irn_mode(node);
1947 if (mode_is_float(mode)) {
1948 ir_node *new_op = be_transform_node(op);
1949 if (ia32_cg_config.use_sse2) {
1950 /* TODO: non-optimal... if we have many xXors, then we should
1951 * rather create a load for the const and use that instead of
1952 * several AM nodes... */
1953 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1955 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1956 noreg_GP, nomem, new_op, noreg_xmm);
1958 size = get_mode_size_bits(mode);
1959 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1961 set_ia32_am_sc(new_node, ent);
1962 set_ia32_op_type(new_node, ia32_AddrModeS);
1963 set_ia32_ls_mode(new_node, mode);
1965 new_node = new_bd_ia32_fchs(dbgi, block, new_op);
1968 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1971 SET_IA32_ORIG_NODE(new_node, node);
1977 * Transforms a Not node.
1979 * @return The created ia32 Not node
1981 static ir_node *gen_Not(ir_node *node)
1983 ir_node *op = get_Not_op(node);
1985 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1986 assert(!mode_is_float(get_irn_mode(node)));
1988 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1991 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1992 bool negate, ir_node *node)
1994 ir_node *new_block = be_transform_node(block);
1995 ir_mode *mode = get_irn_mode(op);
1996 ir_node *new_op = be_transform_node(op);
2001 assert(mode_is_float(mode));
2003 if (ia32_cg_config.use_sse2) {
2004 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
2005 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
2006 noreg_GP, nomem, new_op, noreg_fp);
2008 size = get_mode_size_bits(mode);
2009 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
2011 set_ia32_am_sc(new_node, ent);
2013 SET_IA32_ORIG_NODE(new_node, node);
2015 set_ia32_op_type(new_node, ia32_AddrModeS);
2016 set_ia32_ls_mode(new_node, mode);
2018 /* TODO, implement -Abs case */
2021 check_x87_floatmode(mode);
2022 new_node = new_bd_ia32_fabs(dbgi, new_block, new_op);
2023 SET_IA32_ORIG_NODE(new_node, node);
2025 new_node = new_bd_ia32_fchs(dbgi, new_block, new_node);
2026 SET_IA32_ORIG_NODE(new_node, node);
2034 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2036 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2038 dbg_info *dbgi = get_irn_dbg_info(cmp);
2039 ir_node *block = get_nodes_block(cmp);
2040 ir_node *new_block = be_transform_node(block);
2041 ir_node *op1 = be_transform_node(x);
2042 ir_node *op2 = be_transform_node(n);
2044 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2047 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2049 bool overflow_possible)
2051 if (mode_is_float(mode)) {
2053 case ir_relation_equal: return ia32_cc_float_equal;
2054 case ir_relation_less: return ia32_cc_float_below;
2055 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2056 case ir_relation_greater: return ia32_cc_float_above;
2057 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2058 case ir_relation_less_greater: return ia32_cc_not_equal;
2059 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2060 case ir_relation_unordered: return ia32_cc_parity;
2061 case ir_relation_unordered_equal: return ia32_cc_equal;
2062 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2063 case ir_relation_unordered_less_equal:
2064 return ia32_cc_float_unordered_below_equal;
2065 case ir_relation_unordered_greater:
2066 return ia32_cc_float_unordered_above;
2067 case ir_relation_unordered_greater_equal:
2068 return ia32_cc_float_unordered_above_equal;
2069 case ir_relation_unordered_less_greater:
2070 return ia32_cc_float_not_equal;
2071 case ir_relation_false:
2072 case ir_relation_true:
2073 /* should we introduce a jump always/jump never? */
2076 panic("Unexpected float pnc");
2077 } else if (mode_is_signed(mode)) {
2079 case ir_relation_unordered_equal:
2080 case ir_relation_equal: return ia32_cc_equal;
2081 case ir_relation_unordered_less:
2082 case ir_relation_less:
2083 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2084 case ir_relation_unordered_less_equal:
2085 case ir_relation_less_equal: return ia32_cc_less_equal;
2086 case ir_relation_unordered_greater:
2087 case ir_relation_greater: return ia32_cc_greater;
2088 case ir_relation_unordered_greater_equal:
2089 case ir_relation_greater_equal:
2090 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2091 case ir_relation_unordered_less_greater:
2092 case ir_relation_less_greater: return ia32_cc_not_equal;
2093 case ir_relation_less_equal_greater:
2094 case ir_relation_unordered:
2095 case ir_relation_false:
2096 case ir_relation_true:
2097 /* introduce jump always/jump never? */
2100 panic("Unexpected pnc");
2103 case ir_relation_unordered_equal:
2104 case ir_relation_equal: return ia32_cc_equal;
2105 case ir_relation_unordered_less:
2106 case ir_relation_less: return ia32_cc_below;
2107 case ir_relation_unordered_less_equal:
2108 case ir_relation_less_equal: return ia32_cc_below_equal;
2109 case ir_relation_unordered_greater:
2110 case ir_relation_greater: return ia32_cc_above;
2111 case ir_relation_unordered_greater_equal:
2112 case ir_relation_greater_equal: return ia32_cc_above_equal;
2113 case ir_relation_unordered_less_greater:
2114 case ir_relation_less_greater: return ia32_cc_not_equal;
2115 case ir_relation_less_equal_greater:
2116 case ir_relation_unordered:
2117 case ir_relation_false:
2118 case ir_relation_true:
2119 /* introduce jump always/jump never? */
2122 panic("Unexpected pnc");
2126 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2128 /* must have a Cmp as input */
2129 ir_relation relation = get_Cmp_relation(cmp);
2130 ir_node *l = get_Cmp_left(cmp);
2131 ir_node *r = get_Cmp_right(cmp);
2132 ir_mode *mode = get_irn_mode(l);
2133 bool overflow_possible;
2136 /* check for bit-test */
2137 if (ia32_cg_config.use_bt
2138 && (relation == ir_relation_equal
2139 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2140 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2142 ir_node *la = get_And_left(l);
2143 ir_node *ra = get_And_right(l);
2150 ir_node *c = get_Shl_left(la);
2151 if (is_Const_1(c) && is_Const_0(r)) {
2152 /* (1 << n) & ra) */
2153 ir_node *n = get_Shl_right(la);
2154 flags = gen_bt(cmp, ra, n);
2155 /* the bit is copied into the CF flag */
2156 if (relation & ir_relation_equal)
2157 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2159 *cc_out = ia32_cc_below; /* test for CF=1 */
2165 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2166 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2167 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2168 * a predecessor node). So add the < bit.
2169 * (Note that we do not want to produce <=> (which can happen for
2170 * unoptimized code), because no x86 flag can represent that */
2171 if (!(relation & ir_relation_equal) && relation & ir_relation_less_greater)
2172 relation |= get_negated_relation(ir_get_possible_cmp_relations(l, r)) & ir_relation_less_greater;
2174 overflow_possible = true;
2175 if (is_Const(r) && is_Const_null(r))
2176 overflow_possible = false;
2178 /* just do a normal transformation of the Cmp */
2179 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2180 flags = be_transform_node(cmp);
2185 * Transforms a Load.
2187 * @return the created ia32 Load node
2189 static ir_node *gen_Load(ir_node *node)
2191 ir_node *old_block = get_nodes_block(node);
2192 ir_node *block = be_transform_node(old_block);
2193 ir_node *ptr = get_Load_ptr(node);
2194 ir_node *mem = get_Load_mem(node);
2195 ir_node *new_mem = be_transform_node(mem);
2196 dbg_info *dbgi = get_irn_dbg_info(node);
2197 ir_mode *mode = get_Load_mode(node);
2198 int throws_exception = ir_throws_exception(node);
2202 ia32_address_t addr;
2204 /* construct load address */
2205 memset(&addr, 0, sizeof(addr));
2206 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2213 base = be_transform_node(base);
2219 idx = be_transform_node(idx);
2222 if (mode_is_float(mode)) {
2223 if (ia32_cg_config.use_sse2) {
2224 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2227 new_node = new_bd_ia32_fld(dbgi, block, base, idx, new_mem,
2231 assert(mode != mode_b);
2233 /* create a conv node with address mode for smaller modes */
2234 if (get_mode_size_bits(mode) < 32) {
2235 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2236 new_mem, noreg_GP, mode);
2238 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2241 ir_set_throws_exception(new_node, throws_exception);
2243 set_irn_pinned(new_node, get_irn_pinned(node));
2244 set_ia32_op_type(new_node, ia32_AddrModeS);
2245 set_ia32_ls_mode(new_node, mode);
2246 set_address(new_node, &addr);
2248 if (get_irn_pinned(node) == op_pin_state_floats) {
2249 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
2250 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
2251 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2252 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2255 SET_IA32_ORIG_NODE(new_node, node);
2260 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2261 ir_node *ptr, ir_node *other)
2268 /* we only use address mode if we're the only user of the load */
2269 if (get_irn_n_edges(node) > 1)
2272 load = get_Proj_pred(node);
2275 if (get_nodes_block(load) != block)
2278 /* store should have the same pointer as the load */
2279 if (get_Load_ptr(load) != ptr)
2282 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2283 if (other != NULL &&
2284 get_nodes_block(other) == block &&
2285 heights_reachable_in_block(ia32_heights, other, load)) {
2289 if (ia32_prevents_AM(block, load, mem))
2291 /* Store should be attached to the load via mem */
2292 assert(heights_reachable_in_block(ia32_heights, mem, load));
2297 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2298 ir_node *mem, ir_node *ptr, ir_mode *mode,
2299 construct_binop_dest_func *func,
2300 construct_binop_dest_func *func8bit,
2301 match_flags_t flags)
2303 ir_node *src_block = get_nodes_block(node);
2311 ia32_address_mode_t am;
2312 ia32_address_t *addr = &am.addr;
2313 memset(&am, 0, sizeof(am));
2315 assert(flags & match_immediate); /* there is no destam node without... */
2316 commutative = (flags & match_commutative) != 0;
2318 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2319 build_address(&am, op1, ia32_create_am_double_use);
2320 new_op = create_immediate_or_transform(op2);
2321 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2322 build_address(&am, op2, ia32_create_am_double_use);
2323 new_op = create_immediate_or_transform(op1);
2328 if (addr->base == NULL)
2329 addr->base = noreg_GP;
2330 if (addr->index == NULL)
2331 addr->index = noreg_GP;
2332 if (addr->mem == NULL)
2335 dbgi = get_irn_dbg_info(node);
2336 block = be_transform_node(src_block);
2337 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2339 if (get_mode_size_bits(mode) == 8) {
2340 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2342 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2344 set_address(new_node, addr);
2345 set_ia32_op_type(new_node, ia32_AddrModeD);
2346 set_ia32_ls_mode(new_node, mode);
2347 SET_IA32_ORIG_NODE(new_node, node);
2349 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2350 mem_proj = be_transform_node(am.mem_proj);
2351 be_set_transformed_node(am.mem_proj, new_node);
2352 be_set_transformed_node(mem_proj, new_node);
2357 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2358 ir_node *ptr, ir_mode *mode,
2359 construct_unop_dest_func *func)
2361 ir_node *src_block = get_nodes_block(node);
2367 ia32_address_mode_t am;
2368 ia32_address_t *addr = &am.addr;
2370 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2373 memset(&am, 0, sizeof(am));
2374 build_address(&am, op, ia32_create_am_double_use);
2376 dbgi = get_irn_dbg_info(node);
2377 block = be_transform_node(src_block);
2378 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2379 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2380 set_address(new_node, addr);
2381 set_ia32_op_type(new_node, ia32_AddrModeD);
2382 set_ia32_ls_mode(new_node, mode);
2383 SET_IA32_ORIG_NODE(new_node, node);
2385 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2386 mem_proj = be_transform_node(am.mem_proj);
2387 be_set_transformed_node(am.mem_proj, new_node);
2388 be_set_transformed_node(mem_proj, new_node);
2393 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2395 ir_mode *mode = get_irn_mode(node);
2396 ir_node *mux_true = get_Mux_true(node);
2397 ir_node *mux_false = get_Mux_false(node);
2405 ia32_condition_code_t cc;
2406 ia32_address_t addr;
2408 if (get_mode_size_bits(mode) != 8)
2411 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2413 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2419 cond = get_Mux_sel(node);
2420 flags = get_flags_node(cond, &cc);
2421 /* we can't handle the float special cases with SetM */
2422 if (cc & ia32_cc_additional_float_cases)
2425 cc = ia32_negate_condition_code(cc);
2427 build_address_ptr(&addr, ptr, mem);
2429 dbgi = get_irn_dbg_info(node);
2430 block = get_nodes_block(node);
2431 new_block = be_transform_node(block);
2432 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2433 addr.index, addr.mem, flags, cc);
2434 set_address(new_node, &addr);
2435 set_ia32_op_type(new_node, ia32_AddrModeD);
2436 set_ia32_ls_mode(new_node, mode);
2437 SET_IA32_ORIG_NODE(new_node, node);
2442 static ir_node *try_create_dest_am(ir_node *node)
2444 ir_node *val = get_Store_value(node);
2445 ir_node *mem = get_Store_mem(node);
2446 ir_node *ptr = get_Store_ptr(node);
2447 ir_mode *mode = get_irn_mode(val);
2448 unsigned bits = get_mode_size_bits(mode);
2453 /* handle only GP modes for now... */
2454 if (!ia32_mode_needs_gp_reg(mode))
2458 /* store must be the only user of the val node */
2459 if (get_irn_n_edges(val) > 1)
2461 /* skip pointless convs */
2463 ir_node *conv_op = get_Conv_op(val);
2464 ir_mode *pred_mode = get_irn_mode(conv_op);
2465 if (!ia32_mode_needs_gp_reg(pred_mode))
2467 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2475 /* value must be in the same block */
2476 if (get_nodes_block(node) != get_nodes_block(val))
2479 switch (get_irn_opcode(val)) {
2481 op1 = get_Add_left(val);
2482 op2 = get_Add_right(val);
2483 if (ia32_cg_config.use_incdec) {
2484 if (is_Const_1(op2)) {
2485 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2487 } else if (is_Const_Minus_1(op2)) {
2488 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2492 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2493 new_bd_ia32_AddMem, new_bd_ia32_AddMem_8bit,
2494 match_commutative | match_immediate);
2497 op1 = get_Sub_left(val);
2498 op2 = get_Sub_right(val);
2499 if (is_Const(op2)) {
2500 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2502 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2503 new_bd_ia32_SubMem, new_bd_ia32_SubMem_8bit,
2507 op1 = get_And_left(val);
2508 op2 = get_And_right(val);
2509 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2510 new_bd_ia32_AndMem, new_bd_ia32_AndMem_8bit,
2511 match_commutative | match_immediate);
2514 op1 = get_Or_left(val);
2515 op2 = get_Or_right(val);
2516 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2517 new_bd_ia32_OrMem, new_bd_ia32_OrMem_8bit,
2518 match_commutative | match_immediate);
2521 op1 = get_Eor_left(val);
2522 op2 = get_Eor_right(val);
2523 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2524 new_bd_ia32_XorMem, new_bd_ia32_XorMem_8bit,
2525 match_commutative | match_immediate);
2528 op1 = get_Shl_left(val);
2529 op2 = get_Shl_right(val);
2530 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2531 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2535 op1 = get_Shr_left(val);
2536 op2 = get_Shr_right(val);
2537 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2538 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2542 op1 = get_Shrs_left(val);
2543 op2 = get_Shrs_right(val);
2544 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2545 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2549 op1 = get_Rotl_left(val);
2550 op2 = get_Rotl_right(val);
2551 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2552 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2555 /* TODO: match ROR patterns... */
2557 new_node = try_create_SetMem(val, ptr, mem);
2561 op1 = get_Minus_op(val);
2562 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2565 /* should be lowered already */
2566 assert(mode != mode_b);
2567 op1 = get_Not_op(val);
2568 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2574 if (new_node != NULL) {
2575 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2576 get_irn_pinned(node) == op_pin_state_pinned) {
2577 set_irn_pinned(new_node, op_pin_state_pinned);
2584 static bool possible_int_mode_for_fp(ir_mode *mode)
2588 if (!mode_is_signed(mode))
2590 size = get_mode_size_bits(mode);
2591 if (size != 16 && size != 32)
2596 static int is_float_to_int_conv(const ir_node *node)
2598 ir_mode *mode = get_irn_mode(node);
2602 if (!possible_int_mode_for_fp(mode))
2607 conv_op = get_Conv_op(node);
2608 conv_mode = get_irn_mode(conv_op);
2610 if (!mode_is_float(conv_mode))
2617 * Transform a Store(floatConst) into a sequence of
2620 * @return the created ia32 Store node
2622 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2624 ir_mode *mode = get_irn_mode(cns);
2625 unsigned size = get_mode_size_bytes(mode);
2626 ir_tarval *tv = get_Const_tarval(cns);
2627 ir_node *block = get_nodes_block(node);
2628 ir_node *new_block = be_transform_node(block);
2629 ir_node *ptr = get_Store_ptr(node);
2630 ir_node *mem = get_Store_mem(node);
2631 dbg_info *dbgi = get_irn_dbg_info(node);
2634 int throws_exception = ir_throws_exception(node);
2636 ia32_address_t addr;
2638 build_address_ptr(&addr, ptr, mem);
2645 val= get_tarval_sub_bits(tv, ofs) |
2646 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2647 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2648 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2651 } else if (size >= 2) {
2652 val= get_tarval_sub_bits(tv, ofs) |
2653 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2657 panic("invalid size of Store float to mem (%+F)", node);
2659 ir_graph *const irg = get_Block_irg(new_block);
2660 ir_node *const imm = ia32_create_Immediate(irg, NULL, 0, val);
2662 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2663 addr.index, addr.mem, imm);
2664 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2666 ir_set_throws_exception(new_node, throws_exception);
2667 set_irn_pinned(new_node, get_irn_pinned(node));
2668 set_ia32_op_type(new_node, ia32_AddrModeD);
2669 set_ia32_ls_mode(new_node, mode);
2670 set_address(new_node, &addr);
2671 SET_IA32_ORIG_NODE(new_node, node);
2678 addr.offset += delta;
2679 } while (size != 0);
2682 return new_rd_Sync(dbgi, new_block, i, ins);
2684 return get_Proj_pred(ins[0]);
2689 * Generate a vfist or vfisttp instruction.
2691 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2692 ir_node *index, ir_node *mem, ir_node *val)
2694 if (ia32_cg_config.use_fisttp) {
2695 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2696 if other users exists */
2697 ir_node *vfisttp = new_bd_ia32_fisttp(dbgi, block, base, index, mem, val);
2698 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_fisttp_res);
2699 be_new_Keep(block, 1, &value);
2703 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2706 ir_node *vfist = new_bd_ia32_fist(dbgi, block, base, index, mem, val, trunc_mode);
2712 * Transforms a general (no special case) Store.
2714 * @return the created ia32 Store node
2716 static ir_node *gen_general_Store(ir_node *node)
2718 ir_node *val = get_Store_value(node);
2719 ir_mode *mode = get_irn_mode(val);
2720 ir_node *block = get_nodes_block(node);
2721 ir_node *new_block = be_transform_node(block);
2722 ir_node *ptr = get_Store_ptr(node);
2723 ir_node *mem = get_Store_mem(node);
2724 dbg_info *dbgi = get_irn_dbg_info(node);
2725 int throws_exception = ir_throws_exception(node);
2728 ia32_address_t addr;
2730 /* check for destination address mode */
2731 new_node = try_create_dest_am(node);
2732 if (new_node != NULL)
2735 /* construct store address */
2736 memset(&addr, 0, sizeof(addr));
2737 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2739 if (addr.base == NULL) {
2740 addr.base = noreg_GP;
2742 addr.base = be_transform_node(addr.base);
2745 if (addr.index == NULL) {
2746 addr.index = noreg_GP;
2748 addr.index = be_transform_node(addr.index);
2750 addr.mem = be_transform_node(mem);
2752 if (mode_is_float(mode)) {
2753 if (ia32_cg_config.use_sse2) {
2754 new_val = be_transform_node(val);
2755 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2756 addr.index, addr.mem, new_val);
2758 val = ia32_skip_float_downconv(val);
2759 new_val = be_transform_node(val);
2760 new_node = new_bd_ia32_fst(dbgi, new_block, addr.base,
2761 addr.index, addr.mem, new_val, mode);
2763 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2764 val = get_Conv_op(val);
2765 new_val = be_transform_node(val);
2766 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2768 unsigned dest_bits = get_mode_size_bits(mode);
2769 while (is_downconv(val)
2770 && get_mode_size_bits(get_irn_mode(val)) >= dest_bits) {
2771 val = get_Conv_op(val);
2773 new_val = create_immediate_or_transform(val);
2774 assert(mode != mode_b);
2776 new_node = dest_bits == 8
2777 ? new_bd_ia32_Store_8bit(dbgi, new_block, addr.base, addr.index, addr.mem, new_val)
2778 : new_bd_ia32_Store (dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2780 ir_set_throws_exception(new_node, throws_exception);
2782 set_irn_pinned(new_node, get_irn_pinned(node));
2783 set_ia32_op_type(new_node, ia32_AddrModeD);
2784 set_ia32_ls_mode(new_node, mode);
2786 set_address(new_node, &addr);
2787 SET_IA32_ORIG_NODE(new_node, node);
2793 * Transforms a Store.
2795 * @return the created ia32 Store node
2797 static ir_node *gen_Store(ir_node *node)
2799 ir_node *val = get_Store_value(node);
2800 ir_mode *mode = get_irn_mode(val);
2802 if (mode_is_float(mode) && is_Const(val)) {
2803 /* We can transform every floating const store
2804 into a sequence of integer stores.
2805 If the constant is already in a register,
2806 it would be better to use it, but we don't
2807 have this information here. */
2808 return gen_float_const_Store(node, val);
2810 return gen_general_Store(node);
2814 * Transforms a Switch.
2816 * @return the created ia32 SwitchJmp node
2818 static ir_node *gen_Switch(ir_node *node)
2820 dbg_info *dbgi = get_irn_dbg_info(node);
2821 ir_graph *irg = get_irn_irg(node);
2822 ir_node *block = be_transform_node(get_nodes_block(node));
2823 ir_node *sel = get_Switch_selector(node);
2824 ir_node *new_sel = be_transform_node(sel);
2825 ir_mode *sel_mode = get_irn_mode(sel);
2826 const ir_switch_table *table = get_Switch_table(node);
2827 unsigned n_outs = get_Switch_n_outs(node);
2830 assert(get_mode_size_bits(sel_mode) <= 32);
2831 assert(!mode_is_float(sel_mode));
2832 sel = ia32_skip_sameconv(sel);
2833 if (get_mode_size_bits(sel_mode) < 32)
2834 new_sel = transform_upconv(sel, node);
2836 ir_type *const utype = get_unknown_type();
2837 ir_entity *const entity = new_entity(utype, id_unique("TBL%u"), utype);
2838 set_entity_visibility(entity, ir_visibility_private);
2839 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2841 table = ir_switch_table_duplicate(irg, table);
2843 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2844 set_ia32_am_scale(new_node, 2);
2845 set_ia32_am_sc(new_node, entity);
2846 set_ia32_op_type(new_node, ia32_AddrModeS);
2847 set_ia32_ls_mode(new_node, mode_Iu);
2848 SET_IA32_ORIG_NODE(new_node, node);
2849 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2850 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2856 * Transform a Cond node.
2858 static ir_node *gen_Cond(ir_node *node)
2860 ir_node *block = get_nodes_block(node);
2861 ir_node *new_block = be_transform_node(block);
2862 dbg_info *dbgi = get_irn_dbg_info(node);
2863 ir_node *sel = get_Cond_selector(node);
2864 ir_node *flags = NULL;
2866 ia32_condition_code_t cc;
2868 /* we get flags from a Cmp */
2869 flags = get_flags_node(sel, &cc);
2871 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2872 SET_IA32_ORIG_NODE(new_node, node);
2878 * Transform a be_Copy.
2880 static ir_node *gen_be_Copy(ir_node *node)
2882 ir_node *new_node = be_duplicate_node(node);
2883 ir_mode *mode = get_irn_mode(new_node);
2885 if (ia32_mode_needs_gp_reg(mode)) {
2886 set_irn_mode(new_node, mode_Iu);
2892 static ir_node *create_Fucom(ir_node *node)
2894 dbg_info *dbgi = get_irn_dbg_info(node);
2895 ir_node *block = get_nodes_block(node);
2896 ir_node *new_block = be_transform_node(block);
2897 ir_node *left = get_Cmp_left(node);
2898 ir_node *new_left = be_transform_node(left);
2899 ir_node *right = get_Cmp_right(node);
2900 ir_mode *cmp_mode = get_irn_mode(left);
2903 check_x87_floatmode(cmp_mode);
2905 if (ia32_cg_config.use_fucomi) {
2906 new_right = be_transform_node(right);
2907 new_node = new_bd_ia32_Fucomi(dbgi, new_block, new_left,
2909 set_ia32_commutative(new_node);
2910 SET_IA32_ORIG_NODE(new_node, node);
2912 if (is_Const_0(right)) {
2913 new_node = new_bd_ia32_FtstFnstsw(dbgi, new_block, new_left, 0);
2915 new_right = be_transform_node(right);
2916 new_node = new_bd_ia32_FucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2917 set_ia32_commutative(new_node);
2920 SET_IA32_ORIG_NODE(new_node, node);
2922 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2923 SET_IA32_ORIG_NODE(new_node, node);
2929 static ir_node *create_Ucomi(ir_node *node)
2931 dbg_info *dbgi = get_irn_dbg_info(node);
2932 ir_node *src_block = get_nodes_block(node);
2933 ir_node *new_block = be_transform_node(src_block);
2934 ir_node *left = get_Cmp_left(node);
2935 ir_node *right = get_Cmp_right(node);
2937 ia32_address_mode_t am;
2938 ia32_address_t *addr = &am.addr;
2940 match_arguments(&am, src_block, left, right, NULL,
2941 match_commutative | match_am);
2943 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2944 addr->mem, am.new_op1, am.new_op2,
2946 set_am_attributes(new_node, &am);
2948 SET_IA32_ORIG_NODE(new_node, node);
2950 new_node = fix_mem_proj(new_node, &am);
2955 static bool ia32_mux_upper_bits_clean(const ir_node *node, ir_mode *mode)
2957 ir_node *mux_true = get_Mux_true(node);
2958 ir_node *mux_false = get_Mux_false(node);
2959 ir_mode *mux_mode = get_irn_mode(node);
2960 /* mux nodes which get transformed to the set instruction are not clean */
2961 if (is_Const(mux_true) && is_Const(mux_false)
2962 && get_mode_size_bits(mux_mode) == 8) {
2965 return be_upper_bits_clean(mux_true, mode)
2966 && be_upper_bits_clean(mux_false, mode);
2970 * Generate code for a Cmp.
2972 static ir_node *gen_Cmp(ir_node *node)
2974 dbg_info *dbgi = get_irn_dbg_info(node);
2975 ir_node *block = get_nodes_block(node);
2976 ir_node *new_block = be_transform_node(block);
2977 ir_node *left = get_Cmp_left(node);
2978 ir_node *right = get_Cmp_right(node);
2979 ir_mode *cmp_mode = get_irn_mode(left);
2981 ia32_address_mode_t am;
2982 ia32_address_t *addr = &am.addr;
2984 if (mode_is_float(cmp_mode)) {
2985 if (ia32_cg_config.use_sse2) {
2986 return create_Ucomi(node);
2988 return create_Fucom(node);
2992 assert(ia32_mode_needs_gp_reg(cmp_mode));
2994 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2995 if (is_Const_0(right) &&
2997 get_irn_n_edges(left) == 1) {
2998 /* Test(and_left, and_right) */
2999 ir_node *and_left = get_And_left(left);
3000 ir_node *and_right = get_And_right(left);
3002 /* matze: code here used mode instead of cmd_mode, I think it is always
3003 * the same as cmp_mode, but I leave this here to see if this is really
3006 assert(get_irn_mode(and_left) == cmp_mode);
3008 match_arguments(&am, block, and_left, and_right, NULL,
3010 match_am | match_8bit_am | match_16bit_am |
3011 match_am_and_immediates | match_immediate);
3013 /* use 32bit compare mode if possible since the opcode is smaller */
3014 if (am.op_type == ia32_Normal &&
3015 be_upper_bits_clean(and_left, cmp_mode) &&
3016 be_upper_bits_clean(and_right, cmp_mode)) {
3017 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3020 new_node = get_mode_size_bits(cmp_mode) == 8
3021 ? new_bd_ia32_Test_8bit(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted)
3022 : new_bd_ia32_Test (dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3024 /* Cmp(left, right) */
3025 match_arguments(&am, block, left, right, NULL,
3027 match_am | match_8bit_am | match_16bit_am |
3028 match_am_and_immediates | match_immediate);
3029 /* use 32bit compare mode if possible since the opcode is smaller */
3030 if (am.op_type == ia32_Normal &&
3031 be_upper_bits_clean(left, cmp_mode) &&
3032 be_upper_bits_clean(right, cmp_mode)) {
3033 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3036 new_node = get_mode_size_bits(cmp_mode) == 8
3037 ? new_bd_ia32_Cmp_8bit(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted)
3038 : new_bd_ia32_Cmp (dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3040 set_am_attributes(new_node, &am);
3041 set_ia32_ls_mode(new_node, cmp_mode);
3043 SET_IA32_ORIG_NODE(new_node, node);
3045 new_node = fix_mem_proj(new_node, &am);
3050 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3051 ia32_condition_code_t cc)
3053 dbg_info *dbgi = get_irn_dbg_info(node);
3054 ir_node *block = get_nodes_block(node);
3055 ir_node *new_block = be_transform_node(block);
3056 ir_node *val_true = get_Mux_true(node);
3057 ir_node *val_false = get_Mux_false(node);
3059 ia32_address_mode_t am;
3060 ia32_address_t *addr;
3062 assert(ia32_cg_config.use_cmov);
3063 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3067 match_arguments(&am, block, val_false, val_true, flags,
3068 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3070 if (am.ins_permuted)
3071 cc = ia32_negate_condition_code(cc);
3073 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3074 addr->mem, am.new_op1, am.new_op2, new_flags,
3076 set_am_attributes(new_node, &am);
3078 SET_IA32_ORIG_NODE(new_node, node);
3080 new_node = fix_mem_proj(new_node, &am);
3086 * Creates a ia32 Setcc instruction.
3088 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3089 ir_node *flags, ia32_condition_code_t cc,
3092 ir_mode *mode = get_irn_mode(orig_node);
3095 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3096 SET_IA32_ORIG_NODE(new_node, orig_node);
3098 /* we might need to conv the result up */
3099 if (get_mode_size_bits(mode) > 8) {
3100 new_node = new_bd_ia32_Conv_I2I_8bit(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, mode_Bu);
3101 SET_IA32_ORIG_NODE(new_node, orig_node);
3108 * Create instruction for an unsigned Difference or Zero.
3110 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3112 ir_mode *mode = get_irn_mode(psi);
3122 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3123 match_mode_neutral | match_am | match_immediate | match_two_users);
3125 block = get_nodes_block(new_node);
3127 if (is_Proj(new_node)) {
3128 sub = get_Proj_pred(new_node);
3131 set_irn_mode(sub, mode_T);
3132 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3134 assert(is_ia32_Sub(sub));
3135 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3137 dbgi = get_irn_dbg_info(psi);
3138 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3139 set_ia32_ls_mode(sbb, mode_Iu);
3140 notn = new_bd_ia32_Not(dbgi, block, sbb);
3142 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3143 set_ia32_ls_mode(new_node, mode_Iu);
3144 set_ia32_commutative(new_node);
3149 * Create an const array of two float consts.
3151 * @param c0 the first constant
3152 * @param c1 the second constant
3153 * @param new_mode IN/OUT for the mode of the constants, if NULL
3154 * smallest possible mode will be used
3156 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3159 ir_mode *mode = *new_mode;
3161 ir_initializer_t *initializer;
3162 ir_tarval *tv0 = get_Const_tarval(c0);
3163 ir_tarval *tv1 = get_Const_tarval(c1);
3166 /* detect the best mode for the constants */
3167 mode = get_tarval_mode(tv0);
3169 if (mode != mode_F) {
3170 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3171 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3173 tv0 = tarval_convert_to(tv0, mode);
3174 tv1 = tarval_convert_to(tv1, mode);
3175 } else if (mode != mode_D) {
3176 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3177 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3179 tv0 = tarval_convert_to(tv0, mode);
3180 tv1 = tarval_convert_to(tv1, mode);
3187 tp = ia32_get_prim_type(mode);
3188 tp = ia32_create_float_array(tp);
3190 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3192 set_entity_ld_ident(ent, get_entity_ident(ent));
3193 set_entity_visibility(ent, ir_visibility_private);
3194 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3196 initializer = create_initializer_compound(2);
3198 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3199 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3201 set_entity_initializer(ent, initializer);
3208 * Possible transformations for creating a Setcc.
3210 enum setcc_transform_insn {
3222 typedef struct setcc_transform {
3224 ia32_condition_code_t cc;
3226 enum setcc_transform_insn transform;
3230 } setcc_transform_t;
3233 * Setcc can only handle 0 and 1 result.
3234 * Find a transformation that creates 0 and 1 from
3237 static void find_const_transform(ia32_condition_code_t cc,
3238 ir_tarval *t, ir_tarval *f,
3239 setcc_transform_t *res)
3245 if (tarval_is_null(t)) {
3249 cc = ia32_negate_condition_code(cc);
3250 } else if (tarval_cmp(t, f) == ir_relation_less) {
3251 // now, t is the bigger one
3255 cc = ia32_negate_condition_code(cc);
3259 if (! tarval_is_null(f)) {
3260 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3263 res->steps[step].transform = SETCC_TR_ADD;
3265 if (t == tarval_bad)
3266 panic("constant subtract failed");
3267 if (! tarval_is_long(f))
3268 panic("tarval is not long");
3270 res->steps[step].val = get_tarval_long(f);
3272 f = tarval_sub(f, f, NULL);
3273 assert(tarval_is_null(f));
3276 if (tarval_is_one(t)) {
3277 res->steps[step].transform = SETCC_TR_SET;
3278 res->num_steps = ++step;
3282 if (tarval_is_minus_one(t)) {
3283 res->steps[step].transform = SETCC_TR_NEG;
3285 res->steps[step].transform = SETCC_TR_SET;
3286 res->num_steps = ++step;
3289 if (tarval_is_long(t)) {
3290 long v = get_tarval_long(t);
3292 res->steps[step].val = 0;
3295 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3297 res->steps[step].transform = SETCC_TR_LEAxx;
3298 res->steps[step].scale = 3; /* (a << 3) + a */
3301 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3303 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3304 res->steps[step].scale = 3; /* (a << 3) */
3307 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3309 res->steps[step].transform = SETCC_TR_LEAxx;
3310 res->steps[step].scale = 2; /* (a << 2) + a */
3313 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3315 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3316 res->steps[step].scale = 2; /* (a << 2) */
3319 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3321 res->steps[step].transform = SETCC_TR_LEAxx;
3322 res->steps[step].scale = 1; /* (a << 1) + a */
3325 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3327 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3328 res->steps[step].scale = 1; /* (a << 1) */
3331 res->num_steps = step;
3334 if (! tarval_is_single_bit(t)) {
3335 res->steps[step].transform = SETCC_TR_AND;
3336 res->steps[step].val = v;
3338 res->steps[step].transform = SETCC_TR_NEG;
3340 int val = get_tarval_lowest_bit(t);
3343 res->steps[step].transform = SETCC_TR_SHL;
3344 res->steps[step].scale = val;
3348 res->steps[step].transform = SETCC_TR_SET;
3349 res->num_steps = ++step;
3352 panic("tarval is not long");
3356 * Transforms a Mux node into some code sequence.
3358 * @return The transformed node.
3360 static ir_node *gen_Mux(ir_node *node)
3362 dbg_info *dbgi = get_irn_dbg_info(node);
3363 ir_node *block = get_nodes_block(node);
3364 ir_node *new_block = be_transform_node(block);
3365 ir_node *mux_true = get_Mux_true(node);
3366 ir_node *mux_false = get_Mux_false(node);
3367 ir_node *sel = get_Mux_sel(node);
3368 ir_mode *mode = get_irn_mode(node);
3372 ia32_condition_code_t cc;
3374 assert(get_irn_mode(sel) == mode_b);
3376 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3378 if (ia32_mode_needs_gp_reg(mode)) {
3379 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3382 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3383 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3387 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3388 if (mode_is_float(mode)) {
3389 ir_node *cmp_left = get_Cmp_left(sel);
3390 ir_node *cmp_right = get_Cmp_right(sel);
3391 ir_relation relation = get_Cmp_relation(sel);
3393 if (ia32_cg_config.use_sse2) {
3394 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3395 if (cmp_left == mux_true && cmp_right == mux_false) {
3396 /* Mux(a <= b, a, b) => MIN */
3397 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3398 match_commutative | match_am | match_two_users);
3399 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3400 /* Mux(a <= b, b, a) => MAX */
3401 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3402 match_commutative | match_am | match_two_users);
3404 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3405 if (cmp_left == mux_true && cmp_right == mux_false) {
3406 /* Mux(a >= b, a, b) => MAX */
3407 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3408 match_commutative | match_am | match_two_users);
3409 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3410 /* Mux(a >= b, b, a) => MIN */
3411 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3412 match_commutative | match_am | match_two_users);
3417 if (is_Const(mux_true) && is_Const(mux_false)) {
3418 ia32_address_mode_t am;
3423 flags = get_flags_node(sel, &cc);
3424 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3426 if (ia32_cg_config.use_sse2) {
3427 /* cannot load from different mode on SSE */
3430 /* x87 can load any mode */
3434 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3436 if (new_mode == mode_F) {
3438 } else if (new_mode == mode_D) {
3440 } else if (new_mode == ia32_mode_E) {
3441 /* arg, shift 16 NOT supported */
3443 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3445 panic("Unsupported constant size");
3448 am.ls_mode = new_mode;
3449 am.addr.base = get_symconst_base();
3450 am.addr.index = new_node;
3451 am.addr.mem = nomem;
3453 am.addr.scale = scale;
3454 am.addr.use_frame = 0;
3455 am.addr.tls_segment = false;
3456 am.addr.frame_entity = NULL;
3457 am.addr.symconst_sign = 0;
3458 am.mem_proj = am.addr.mem;
3459 am.op_type = ia32_AddrModeS;
3462 am.pinned = op_pin_state_floats;
3464 am.ins_permuted = false;
3466 if (ia32_cg_config.use_sse2)
3467 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3469 load = new_bd_ia32_fld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3470 set_am_attributes(load, &am);
3472 return new_rd_Proj(NULL, load, mode_fp, pn_ia32_res);
3474 panic("cannot transform floating point Mux");
3477 assert(ia32_mode_needs_gp_reg(mode));
3480 ir_node *cmp_left = get_Cmp_left(sel);
3481 ir_node *cmp_right = get_Cmp_right(sel);
3482 ir_relation relation = get_Cmp_relation(sel);
3483 ir_node *val_true = mux_true;
3484 ir_node *val_false = mux_false;
3486 if (is_Const(val_true) && is_Const_null(val_true)) {
3487 ir_node *tmp = val_false;
3488 val_false = val_true;
3490 relation = get_negated_relation(relation);
3492 if (is_Const_0(val_false) && is_Sub(val_true)) {
3493 if ((relation & ir_relation_greater)
3494 && get_Sub_left(val_true) == cmp_left
3495 && get_Sub_right(val_true) == cmp_right) {
3496 return create_doz(node, cmp_left, cmp_right);
3498 if ((relation & ir_relation_less)
3499 && get_Sub_left(val_true) == cmp_right
3500 && get_Sub_right(val_true) == cmp_left) {
3501 return create_doz(node, cmp_right, cmp_left);
3506 flags = get_flags_node(sel, &cc);
3508 if (is_Const(mux_true) && is_Const(mux_false)) {
3509 /* both are const, good */
3510 ir_tarval *tv_true = get_Const_tarval(mux_true);
3511 ir_tarval *tv_false = get_Const_tarval(mux_false);
3512 setcc_transform_t res;
3515 find_const_transform(cc, tv_true, tv_false, &res);
3517 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3520 switch (res.steps[step].transform) {
3522 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3523 add_ia32_am_offs_int(new_node, res.steps[step].val);
3525 case SETCC_TR_ADDxx:
3526 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3529 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3530 set_ia32_am_scale(new_node, res.steps[step].scale);
3531 set_ia32_am_offs_int(new_node, res.steps[step].val);
3533 case SETCC_TR_LEAxx:
3534 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3535 set_ia32_am_scale(new_node, res.steps[step].scale);
3536 set_ia32_am_offs_int(new_node, res.steps[step].val);
3539 imm = ia32_immediate_from_long(res.steps[step].scale);
3540 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3543 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3546 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3549 imm = ia32_immediate_from_long(res.steps[step].val);
3550 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3553 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3556 panic("unknown setcc transform");
3560 new_node = create_CMov(node, sel, flags, cc);
3567 * Create a conversion from x87 state register to general purpose.
3569 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3571 ir_node *block = be_transform_node(get_nodes_block(node));
3572 ir_node *op = get_Conv_op(node);
3573 ir_node *new_op = be_transform_node(op);
3574 ir_graph *irg = current_ir_graph;
3575 dbg_info *dbgi = get_irn_dbg_info(node);
3576 ir_mode *mode = get_irn_mode(node);
3577 ir_node *frame = get_irg_frame(irg);
3578 ir_node *fist, *load, *mem;
3580 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3581 set_irn_pinned(fist, op_pin_state_floats);
3582 set_ia32_use_frame(fist);
3583 set_ia32_op_type(fist, ia32_AddrModeD);
3584 arch_add_irn_flags(fist, arch_irn_flags_spill);
3586 assert((long)pn_ia32_fist_M == (long) pn_ia32_fisttp_M);
3587 mem = new_r_Proj(fist, mode_M, pn_ia32_fist_M);
3589 assert(get_mode_size_bits(mode) <= 32);
3590 /* exception we can only store signed 32 bit integers, so for unsigned
3591 we store a 64bit (signed) integer and load the lower bits */
3592 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3593 set_ia32_ls_mode(fist, mode_Ls);
3595 set_ia32_ls_mode(fist, mode_Is);
3597 SET_IA32_ORIG_NODE(fist, node);
3600 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3602 set_irn_pinned(load, op_pin_state_floats);
3603 set_ia32_use_frame(load);
3604 set_ia32_op_type(load, ia32_AddrModeS);
3605 set_ia32_ls_mode(load, mode_Is);
3606 if (get_ia32_ls_mode(fist) == mode_Ls) {
3607 ia32_attr_t *attr = get_ia32_attr(load);
3608 attr->data.need_64bit_stackent = 1;
3610 ia32_attr_t *attr = get_ia32_attr(load);
3611 attr->data.need_32bit_stackent = 1;
3613 SET_IA32_ORIG_NODE(load, node);
3615 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3619 * Creates a x87 Conv by placing a Store and a Load
3621 static ir_node *gen_x87_conv(ir_mode *tgt_mode, ir_node *node)
3623 ir_node *block = get_nodes_block(node);
3624 ir_graph *irg = get_Block_irg(block);
3625 dbg_info *dbgi = get_irn_dbg_info(node);
3626 ir_node *frame = get_irg_frame(irg);
3628 ir_node *store, *load;
3631 store = new_bd_ia32_fst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3632 set_ia32_use_frame(store);
3633 set_ia32_op_type(store, ia32_AddrModeD);
3634 arch_add_irn_flags(store, arch_irn_flags_spill);
3635 SET_IA32_ORIG_NODE(store, node);
3637 store_mem = new_r_Proj(store, mode_M, pn_ia32_fst_M);
3639 load = new_bd_ia32_fld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3640 set_ia32_use_frame(load);
3641 set_ia32_op_type(load, ia32_AddrModeS);
3642 SET_IA32_ORIG_NODE(load, node);
3644 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_fld_res);
3648 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3649 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3651 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3653 func = get_mode_size_bits(mode) == 8 ?
3654 new_bd_ia32_Conv_I2I_8bit : new_bd_ia32_Conv_I2I;
3655 return func(dbgi, block, base, index, mem, val, mode);
3659 * Create a conversion from general purpose to x87 register
3661 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3663 ir_node *src_block = get_nodes_block(node);
3664 ir_node *block = be_transform_node(src_block);
3665 ir_graph *irg = get_Block_irg(block);
3666 dbg_info *dbgi = get_irn_dbg_info(node);
3667 ir_node *op = get_Conv_op(node);
3668 ir_node *new_op = NULL;
3670 ir_mode *store_mode;
3676 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3677 if (possible_int_mode_for_fp(src_mode)) {
3678 ia32_address_mode_t am;
3680 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am | match_upconv);
3681 if (am.op_type == ia32_AddrModeS) {
3682 ia32_address_t *addr = &am.addr;
3684 fild = new_bd_ia32_fild(dbgi, block, addr->base, addr->index, addr->mem);
3685 new_node = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
3687 set_am_attributes(fild, &am);
3688 SET_IA32_ORIG_NODE(fild, node);
3690 fix_mem_proj(fild, &am);
3695 if (new_op == NULL) {
3696 new_op = be_transform_node(op);
3699 mode = get_irn_mode(op);
3701 /* first convert to 32 bit signed if necessary */
3702 if (get_mode_size_bits(src_mode) < 32) {
3703 if (!be_upper_bits_clean(op, src_mode)) {
3704 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3705 SET_IA32_ORIG_NODE(new_op, node);
3710 assert(get_mode_size_bits(mode) == 32);
3713 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3715 set_ia32_use_frame(store);
3716 set_ia32_op_type(store, ia32_AddrModeD);
3717 set_ia32_ls_mode(store, mode_Iu);
3718 arch_add_irn_flags(store, arch_irn_flags_spill);
3720 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3722 /* exception for 32bit unsigned, do a 64bit spill+load */
3723 if (!mode_is_signed(mode)) {
3726 ir_node *zero_const = ia32_create_Immediate(irg, NULL, 0, 0);
3728 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3729 noreg_GP, nomem, zero_const);
3730 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3732 set_ia32_use_frame(zero_store);
3733 set_ia32_op_type(zero_store, ia32_AddrModeD);
3734 add_ia32_am_offs_int(zero_store, 4);
3735 set_ia32_ls_mode(zero_store, mode_Iu);
3736 arch_add_irn_flags(zero_store, arch_irn_flags_spill);
3738 in[0] = zero_store_mem;
3741 store_mem = new_rd_Sync(dbgi, block, 2, in);
3742 store_mode = mode_Ls;
3744 store_mode = mode_Is;
3748 fild = new_bd_ia32_fild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3750 set_ia32_use_frame(fild);
3751 set_ia32_op_type(fild, ia32_AddrModeS);
3752 set_ia32_ls_mode(fild, store_mode);
3754 new_node = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
3760 * Create a conversion from one integer mode into another one
3762 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3763 dbg_info *dbgi, ir_node *block, ir_node *op,
3766 ir_node *new_block = be_transform_node(block);
3768 ia32_address_mode_t am;
3769 ia32_address_t *addr = &am.addr;
3772 assert(get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode));
3774 #ifdef DEBUG_libfirm
3776 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3781 if (be_upper_bits_clean(op, src_mode)) {
3782 return be_transform_node(op);
3785 match_arguments(&am, block, NULL, op, NULL,
3786 match_am | match_8bit_am | match_16bit_am);
3788 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3789 addr->mem, am.new_op2, src_mode);
3790 set_am_attributes(new_node, &am);
3791 /* match_arguments assume that out-mode = in-mode, this isn't true here
3793 set_ia32_ls_mode(new_node, src_mode);
3794 SET_IA32_ORIG_NODE(new_node, node);
3795 new_node = fix_mem_proj(new_node, &am);
3800 * Transforms a Conv node.
3802 * @return The created ia32 Conv node
3804 static ir_node *gen_Conv(ir_node *node)
3806 ir_node *block = get_nodes_block(node);
3807 ir_node *new_block = be_transform_node(block);
3808 ir_node *op = get_Conv_op(node);
3809 ir_node *new_op = NULL;
3810 dbg_info *dbgi = get_irn_dbg_info(node);
3811 ir_mode *src_mode = get_irn_mode(op);
3812 ir_mode *tgt_mode = get_irn_mode(node);
3813 int src_bits = get_mode_size_bits(src_mode);
3814 int tgt_bits = get_mode_size_bits(tgt_mode);
3815 ir_node *res = NULL;
3817 assert(!mode_is_int(src_mode) || src_bits <= 32);
3818 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3820 /* modeB -> X should already be lowered by the lower_mode_b pass */
3821 if (src_mode == mode_b) {
3822 panic("ConvB not lowered %+F", node);
3825 if (src_mode == tgt_mode) {
3826 /* this should be optimized already, but who knows... */
3827 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3828 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3829 return be_transform_node(op);
3832 if (mode_is_float(src_mode)) {
3833 new_op = be_transform_node(op);
3834 /* we convert from float ... */
3835 if (mode_is_float(tgt_mode)) {
3837 if (ia32_cg_config.use_sse2) {
3838 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3839 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3841 set_ia32_ls_mode(res, tgt_mode);
3843 if (src_bits < tgt_bits) {
3844 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3847 res = gen_x87_conv(tgt_mode, new_op);
3848 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3854 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3855 if (ia32_cg_config.use_sse2) {
3856 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3858 set_ia32_ls_mode(res, src_mode);
3860 return gen_x87_fp_to_gp(node);
3864 /* we convert from int ... */
3865 if (mode_is_float(tgt_mode)) {
3867 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3868 if (ia32_cg_config.use_sse2) {
3869 new_op = be_transform_node(op);
3870 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3872 set_ia32_ls_mode(res, tgt_mode);
3874 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3875 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3876 res = gen_x87_gp_to_fp(node, src_mode);
3878 /* we need a float-conv, if the int mode has more bits than the
3880 if (float_mantissa < int_mantissa) {
3881 res = gen_x87_conv(tgt_mode, res);
3882 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3886 } else if (tgt_mode == mode_b) {
3887 /* mode_b lowering already took care that we only have 0/1 values */
3888 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3889 src_mode, tgt_mode));
3890 return be_transform_node(op);
3893 if (src_bits >= tgt_bits) {
3894 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3895 src_mode, tgt_mode));
3896 return be_transform_node(op);
3899 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3907 static ir_node *create_immediate_or_transform(ir_node *const node)
3909 ir_node *new_node = ia32_try_create_Immediate(node, 'i');
3910 if (new_node == NULL) {
3911 new_node = be_transform_node(node);
3917 * Transforms a FrameAddr into an ia32 Add.
3919 static ir_node *gen_be_FrameAddr(ir_node *node)
3921 ir_node *block = be_transform_node(get_nodes_block(node));
3922 ir_node *op = be_get_FrameAddr_frame(node);
3923 ir_node *new_op = be_transform_node(op);
3924 dbg_info *dbgi = get_irn_dbg_info(node);
3927 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3928 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3929 set_ia32_use_frame(new_node);
3931 SET_IA32_ORIG_NODE(new_node, node);
3937 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3939 static ir_node *gen_be_Return(ir_node *node)
3941 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
3942 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
3943 ir_node *new_ret_val = be_transform_node(ret_val);
3944 ir_node *new_ret_mem = be_transform_node(ret_mem);
3945 dbg_info *dbgi = get_irn_dbg_info(node);
3946 ir_node *block = be_transform_node(get_nodes_block(node));
3947 ir_graph *irg = get_Block_irg(block);
3948 ir_entity *ent = get_irg_entity(irg);
3949 ir_type *tp = get_entity_type(ent);
3962 assert(ret_val != NULL);
3963 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3964 return be_duplicate_node(node);
3967 res_type = get_method_res_type(tp, 0);
3969 if (! is_Primitive_type(res_type)) {
3970 return be_duplicate_node(node);
3973 mode = get_type_mode(res_type);
3974 if (! mode_is_float(mode)) {
3975 return be_duplicate_node(node);
3978 assert(get_method_n_ress(tp) == 1);
3980 frame = get_irg_frame(irg);
3982 /* store xmm0 onto stack */
3983 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3984 new_ret_mem, new_ret_val);
3985 set_ia32_ls_mode(sse_store, mode);
3986 set_ia32_op_type(sse_store, ia32_AddrModeD);
3987 set_ia32_use_frame(sse_store);
3988 arch_add_irn_flags(sse_store, arch_irn_flags_spill);
3989 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
3991 /* load into x87 register */
3992 fld = new_bd_ia32_fld(dbgi, block, frame, noreg_GP, store_mem, mode);
3993 set_ia32_op_type(fld, ia32_AddrModeS);
3994 set_ia32_use_frame(fld);
3996 mproj = new_r_Proj(fld, mode_M, pn_ia32_fld_M);
3997 fld = new_r_Proj(fld, mode_fp, pn_ia32_fld_res);
3999 /* create a new return */
4000 arity = get_irn_arity(node);
4001 in = ALLOCAN(ir_node*, arity);
4002 pop = be_Return_get_pop(node);
4003 for (i = 0; i < arity; ++i) {
4004 ir_node *op = get_irn_n(node, i);
4005 if (op == ret_val) {
4007 } else if (op == ret_mem) {
4010 in[i] = be_transform_node(op);
4013 ir_node *const new_node = be_new_Return(dbgi, block, arity, pop, arity, in);
4014 copy_node_attr(irg, node, new_node);
4020 * Transform a be_AddSP into an ia32_SubSP.
4022 static ir_node *gen_be_AddSP(ir_node *node)
4024 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4025 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4027 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4028 match_am | match_immediate);
4029 assert(is_ia32_SubSP(new_node));
4030 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4031 &ia32_registers[REG_ESP]);
4036 * Transform a be_SubSP into an ia32_AddSP
4038 static ir_node *gen_be_SubSP(ir_node *node)
4040 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4041 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4043 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4044 match_am | match_immediate);
4045 assert(is_ia32_AddSP(new_node));
4046 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4047 &ia32_registers[REG_ESP]);
4051 static ir_node *gen_Phi(ir_node *node)
4053 ir_mode *mode = get_irn_mode(node);
4054 const arch_register_req_t *req;
4055 if (ia32_mode_needs_gp_reg(mode)) {
4056 /* we shouldn't have any 64bit stuff around anymore */
4057 assert(get_mode_size_bits(mode) <= 32);
4058 /* all integer operations are on 32bit registers now */
4060 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4061 } else if (mode_is_float(mode)) {
4062 if (ia32_cg_config.use_sse2) {
4064 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4067 req = ia32_reg_classes[CLASS_ia32_fp].class_req;
4070 req = arch_no_register_req;
4073 return be_transform_phi(node, req);
4076 static ir_node *gen_Jmp(ir_node *node)
4078 ir_node *block = get_nodes_block(node);
4079 ir_node *new_block = be_transform_node(block);
4080 dbg_info *dbgi = get_irn_dbg_info(node);
4083 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4084 SET_IA32_ORIG_NODE(new_node, node);
4092 static ir_node *gen_IJmp(ir_node *node)
4094 ir_node *block = get_nodes_block(node);
4095 ir_node *new_block = be_transform_node(block);
4096 dbg_info *dbgi = get_irn_dbg_info(node);
4097 ir_node *op = get_IJmp_target(node);
4099 ia32_address_mode_t am;
4100 ia32_address_t *addr = &am.addr;
4102 assert(get_irn_mode(op) == mode_P);
4104 match_arguments(&am, block, NULL, op, NULL,
4105 match_am | match_immediate | match_upconv);
4107 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4108 addr->mem, am.new_op2);
4109 set_am_attributes(new_node, &am);
4110 SET_IA32_ORIG_NODE(new_node, node);
4112 new_node = fix_mem_proj(new_node, &am);
4117 static ir_node *gen_ia32_l_Add(ir_node *node)
4119 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4120 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4121 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4122 match_commutative | match_am | match_immediate |
4123 match_mode_neutral);
4125 if (is_Proj(lowered)) {
4126 lowered = get_Proj_pred(lowered);
4128 assert(is_ia32_Add(lowered));
4129 set_irn_mode(lowered, mode_T);
4135 static ir_node *gen_ia32_l_Adc(ir_node *node)
4137 return gen_binop_flags(node, new_bd_ia32_Adc,
4138 match_commutative | match_am | match_immediate |
4139 match_mode_neutral);
4143 * Transforms a l_MulS into a "real" MulS node.
4145 * @return the created ia32 Mul node
4147 static ir_node *gen_ia32_l_Mul(ir_node *node)
4149 ir_node *left = get_binop_left(node);
4150 ir_node *right = get_binop_right(node);
4152 return gen_binop(node, left, right, new_bd_ia32_Mul,
4153 match_commutative | match_am | match_mode_neutral);
4157 * Transforms a l_IMulS into a "real" IMul1OPS node.
4159 * @return the created ia32 IMul1OP node
4161 static ir_node *gen_ia32_l_IMul(ir_node *node)
4163 ir_node *left = get_binop_left(node);
4164 ir_node *right = get_binop_right(node);
4166 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4167 match_commutative | match_am | match_mode_neutral);
4170 static ir_node *gen_ia32_l_Sub(ir_node *node)
4172 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4173 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4174 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4175 match_am | match_immediate | match_mode_neutral);
4177 if (is_Proj(lowered)) {
4178 lowered = get_Proj_pred(lowered);
4180 assert(is_ia32_Sub(lowered));
4181 set_irn_mode(lowered, mode_T);
4187 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4189 return gen_binop_flags(node, new_bd_ia32_Sbb,
4190 match_am | match_immediate | match_mode_neutral);
4193 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4195 ir_node *src_block = get_nodes_block(node);
4196 ir_node *block = be_transform_node(src_block);
4197 ir_graph *irg = get_Block_irg(block);
4198 dbg_info *dbgi = get_irn_dbg_info(node);
4199 ir_node *frame = get_irg_frame(irg);
4200 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4201 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4202 ir_node *new_val_low = be_transform_node(val_low);
4203 ir_node *new_val_high = be_transform_node(val_high);
4205 ir_node *sync, *fild, *res;
4207 ir_node *store_high;
4211 if (ia32_cg_config.use_sse2) {
4212 panic("not implemented for SSE2");
4216 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4218 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4220 SET_IA32_ORIG_NODE(store_low, node);
4221 SET_IA32_ORIG_NODE(store_high, node);
4223 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4224 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4226 set_ia32_use_frame(store_low);
4227 set_ia32_use_frame(store_high);
4228 set_ia32_op_type(store_low, ia32_AddrModeD);
4229 set_ia32_op_type(store_high, ia32_AddrModeD);
4230 set_ia32_ls_mode(store_low, mode_Iu);
4231 set_ia32_ls_mode(store_high, mode_Is);
4232 arch_add_irn_flags(store_low, arch_irn_flags_spill);
4233 arch_add_irn_flags(store_high, arch_irn_flags_spill);
4234 add_ia32_am_offs_int(store_high, 4);
4238 sync = new_rd_Sync(dbgi, block, 2, in);
4241 fild = new_bd_ia32_fild(dbgi, block, frame, noreg_GP, sync);
4243 set_ia32_use_frame(fild);
4244 set_ia32_op_type(fild, ia32_AddrModeS);
4245 set_ia32_ls_mode(fild, mode_Ls);
4247 SET_IA32_ORIG_NODE(fild, node);
4249 res = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
4251 if (! mode_is_signed(get_irn_mode(val_high))) {
4252 ia32_address_mode_t am;
4254 ir_node *count = ia32_create_Immediate(irg, NULL, 0, 31);
4257 am.addr.base = get_symconst_base();
4258 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4259 am.addr.mem = nomem;
4262 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4263 am.addr.tls_segment = false;
4264 am.addr.use_frame = 0;
4265 am.addr.frame_entity = NULL;
4266 am.addr.symconst_sign = 0;
4267 am.ls_mode = mode_F;
4268 am.mem_proj = nomem;
4269 am.op_type = ia32_AddrModeS;
4271 am.new_op2 = ia32_new_NoReg_fp(irg);
4272 am.pinned = op_pin_state_floats;
4274 am.ins_permuted = false;
4276 fadd = new_bd_ia32_fadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4277 am.new_op1, am.new_op2, get_fpcw());
4278 set_am_attributes(fadd, &am);
4280 set_irn_mode(fadd, mode_T);
4281 res = new_rd_Proj(NULL, fadd, mode_fp, pn_ia32_res);
4286 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4288 ir_node *src_block = get_nodes_block(node);
4289 ir_node *block = be_transform_node(src_block);
4290 ir_graph *irg = get_Block_irg(block);
4291 dbg_info *dbgi = get_irn_dbg_info(node);
4292 ir_node *frame = get_irg_frame(irg);
4293 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4294 ir_node *new_val = be_transform_node(val);
4297 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4298 SET_IA32_ORIG_NODE(fist, node);
4299 set_ia32_use_frame(fist);
4300 set_ia32_op_type(fist, ia32_AddrModeD);
4301 set_ia32_ls_mode(fist, mode_Ls);
4302 arch_add_irn_flags(fist, arch_irn_flags_spill);
4304 assert((long)pn_ia32_fist_M == (long) pn_ia32_fisttp_M);
4305 return new_r_Proj(fist, mode_M, pn_ia32_fist_M);
4308 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4310 ir_node *block = be_transform_node(get_nodes_block(node));
4311 ir_graph *irg = get_Block_irg(block);
4312 ir_node *pred = get_Proj_pred(node);
4313 ir_node *new_pred = be_transform_node(pred);
4314 ir_node *frame = get_irg_frame(irg);
4315 dbg_info *dbgi = get_irn_dbg_info(node);
4316 long pn = get_Proj_proj(node);
4321 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4322 SET_IA32_ORIG_NODE(load, node);
4323 set_ia32_use_frame(load);
4324 set_ia32_op_type(load, ia32_AddrModeS);
4325 set_ia32_ls_mode(load, mode_Iu);
4326 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4327 * 32 bit from it with this particular load */
4328 attr = get_ia32_attr(load);
4329 attr->data.need_64bit_stackent = 1;
4331 if (pn == pn_ia32_l_FloattoLL_res_high) {
4332 add_ia32_am_offs_int(load, 4);
4334 assert(pn == pn_ia32_l_FloattoLL_res_low);
4337 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4343 * Transform the Projs of an AddSP.
4345 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4347 ir_node *pred = get_Proj_pred(node);
4348 ir_node *new_pred = be_transform_node(pred);
4349 dbg_info *dbgi = get_irn_dbg_info(node);
4350 long proj = get_Proj_proj(node);
4352 if (proj == pn_be_AddSP_sp) {
4353 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4354 pn_ia32_SubSP_stack);
4355 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4357 } else if (proj == pn_be_AddSP_res) {
4358 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4359 pn_ia32_SubSP_addr);
4360 } else if (proj == pn_be_AddSP_M) {
4361 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4364 panic("No idea how to transform proj->AddSP");
4368 * Transform the Projs of a SubSP.
4370 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4372 ir_node *pred = get_Proj_pred(node);
4373 ir_node *new_pred = be_transform_node(pred);
4374 dbg_info *dbgi = get_irn_dbg_info(node);
4375 long proj = get_Proj_proj(node);
4377 if (proj == pn_be_SubSP_sp) {
4378 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4379 pn_ia32_AddSP_stack);
4380 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4382 } else if (proj == pn_be_SubSP_M) {
4383 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4386 panic("No idea how to transform proj->SubSP");
4390 * Transform and renumber the Projs from a Load.
4392 static ir_node *gen_Proj_Load(ir_node *node)
4395 ir_node *pred = get_Proj_pred(node);
4396 dbg_info *dbgi = get_irn_dbg_info(node);
4397 long proj = get_Proj_proj(node);
4399 /* loads might be part of source address mode matches, so we don't
4400 * transform the ProjMs yet (with the exception of loads whose result is
4403 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4406 /* this is needed, because sometimes we have loops that are only
4407 reachable through the ProjM */
4408 be_enqueue_preds(node);
4409 /* do it in 2 steps, to silence firm verifier */
4410 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4411 set_Proj_proj(res, pn_ia32_mem);
4415 /* renumber the proj */
4416 new_pred = be_transform_node(pred);
4417 if (is_ia32_Load(new_pred)) {
4418 switch ((pn_Load)proj) {
4420 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4422 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4423 case pn_Load_X_except:
4424 /* This Load might raise an exception. Mark it. */
4425 set_ia32_exc_label(new_pred, 1);
4426 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4427 case pn_Load_X_regular:
4428 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4430 } else if (is_ia32_Conv_I2I(new_pred)) {
4431 set_irn_mode(new_pred, mode_T);
4432 switch ((pn_Load)proj) {
4434 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4436 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4437 case pn_Load_X_except:
4438 /* This Load might raise an exception. Mark it. */
4439 set_ia32_exc_label(new_pred, 1);
4440 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4441 case pn_Load_X_regular:
4442 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4444 } else if (is_ia32_xLoad(new_pred)) {
4445 switch ((pn_Load)proj) {
4447 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4449 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4450 case pn_Load_X_except:
4451 /* This Load might raise an exception. Mark it. */
4452 set_ia32_exc_label(new_pred, 1);
4453 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4454 case pn_Load_X_regular:
4455 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4457 } else if (is_ia32_fld(new_pred)) {
4458 switch ((pn_Load)proj) {
4460 return new_rd_Proj(dbgi, new_pred, mode_fp, pn_ia32_fld_res);
4462 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fld_M);
4463 case pn_Load_X_except:
4464 /* This Load might raise an exception. Mark it. */
4465 set_ia32_exc_label(new_pred, 1);
4466 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fld_X_except);
4467 case pn_Load_X_regular:
4468 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fld_X_regular);
4471 /* can happen for ProJMs when source address mode happened for the
4474 /* however it should not be the result proj, as that would mean the
4475 load had multiple users and should not have been used for
4477 if (proj != pn_Load_M) {
4478 panic("internal error: transformed node not a Load");
4480 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4483 panic("No idea how to transform Proj(Load) %+F", node);
4486 static ir_node *gen_Proj_Store(ir_node *node)
4488 ir_node *pred = get_Proj_pred(node);
4489 ir_node *new_pred = be_transform_node(pred);
4490 dbg_info *dbgi = get_irn_dbg_info(node);
4491 long pn = get_Proj_proj(node);
4493 if (is_ia32_Store(new_pred)) {
4494 switch ((pn_Store)pn) {
4496 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4497 case pn_Store_X_except:
4498 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4499 case pn_Store_X_regular:
4500 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4502 } else if (is_ia32_fist(new_pred)) {
4503 switch ((pn_Store)pn) {
4505 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fist_M);
4506 case pn_Store_X_except:
4507 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fist_X_except);
4508 case pn_Store_X_regular:
4509 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fist_X_regular);
4511 } else if (is_ia32_fisttp(new_pred)) {
4512 switch ((pn_Store)pn) {
4514 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fisttp_M);
4515 case pn_Store_X_except:
4516 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fisttp_X_except);
4517 case pn_Store_X_regular:
4518 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fisttp_X_regular);
4520 } else if (is_ia32_fst(new_pred)) {
4521 switch ((pn_Store)pn) {
4523 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fst_M);
4524 case pn_Store_X_except:
4525 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fst_X_except);
4526 case pn_Store_X_regular:
4527 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fst_X_regular);
4529 } else if (is_ia32_xStore(new_pred)) {
4530 switch ((pn_Store)pn) {
4532 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4533 case pn_Store_X_except:
4534 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4535 case pn_Store_X_regular:
4536 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4538 } else if (is_Sync(new_pred)) {
4539 /* hack for the case that gen_float_const_Store produced a Sync */
4540 if (pn == pn_Store_M) {
4543 panic("exception control flow not implemented yet");
4544 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4545 /* destination address mode */
4546 if (pn == pn_Store_M) {
4549 panic("exception control flow for destination AM not implemented yet");
4552 panic("No idea how to transform Proj(Store) %+F", node);
4556 * Transform and renumber the Projs from a Div or Mod instruction.
4558 static ir_node *gen_Proj_Div(ir_node *node)
4560 ir_node *pred = get_Proj_pred(node);
4561 ir_node *new_pred = be_transform_node(pred);
4562 dbg_info *dbgi = get_irn_dbg_info(node);
4563 long proj = get_Proj_proj(node);
4565 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4566 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4568 switch ((pn_Div)proj) {
4570 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4571 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4572 } else if (is_ia32_xDiv(new_pred)) {
4573 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4574 } else if (is_ia32_fdiv(new_pred)) {
4575 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fdiv_M);
4577 panic("Div transformed to unexpected thing %+F", new_pred);
4580 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4581 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4582 } else if (is_ia32_xDiv(new_pred)) {
4583 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4584 } else if (is_ia32_fdiv(new_pred)) {
4585 return new_rd_Proj(dbgi, new_pred, mode_fp, pn_ia32_fdiv_res);
4587 panic("Div transformed to unexpected thing %+F", new_pred);
4589 case pn_Div_X_except:
4590 set_ia32_exc_label(new_pred, 1);
4591 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4592 case pn_Div_X_regular:
4593 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4596 panic("No idea how to transform proj->Div");
4600 * Transform and renumber the Projs from a Div or Mod instruction.
4602 static ir_node *gen_Proj_Mod(ir_node *node)
4604 ir_node *pred = get_Proj_pred(node);
4605 ir_node *new_pred = be_transform_node(pred);
4606 dbg_info *dbgi = get_irn_dbg_info(node);
4607 long proj = get_Proj_proj(node);
4609 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4610 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4611 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4613 switch ((pn_Mod)proj) {
4615 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4617 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4618 case pn_Mod_X_except:
4619 set_ia32_exc_label(new_pred, 1);
4620 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4621 case pn_Mod_X_regular:
4622 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4624 panic("No idea how to transform proj->Mod");
4628 * Transform and renumber the Projs from a CopyB.
4630 static ir_node *gen_Proj_CopyB(ir_node *node)
4632 ir_node *pred = get_Proj_pred(node);
4633 ir_node *new_pred = be_transform_node(pred);
4634 dbg_info *dbgi = get_irn_dbg_info(node);
4635 long proj = get_Proj_proj(node);
4637 switch ((pn_CopyB)proj) {
4639 if (is_ia32_CopyB_i(new_pred)) {
4640 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4641 } else if (is_ia32_CopyB(new_pred)) {
4642 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4645 case pn_CopyB_X_regular:
4646 if (is_ia32_CopyB_i(new_pred)) {
4647 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4648 } else if (is_ia32_CopyB(new_pred)) {
4649 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4652 case pn_CopyB_X_except:
4653 if (is_ia32_CopyB_i(new_pred)) {
4654 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4655 } else if (is_ia32_CopyB(new_pred)) {
4656 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4661 panic("No idea how to transform proj->CopyB");
4664 static ir_node *gen_be_Call(ir_node *node)
4666 dbg_info *const dbgi = get_irn_dbg_info(node);
4667 ir_node *const src_block = get_nodes_block(node);
4668 ir_node *const block = be_transform_node(src_block);
4669 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4670 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4671 ir_node *const sp = be_transform_node(src_sp);
4672 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4673 ia32_address_mode_t am;
4674 ia32_address_t *const addr = &am.addr;
4679 ir_node * eax = noreg_GP;
4680 ir_node * ecx = noreg_GP;
4681 ir_node * edx = noreg_GP;
4682 unsigned const pop = be_Call_get_pop(node);
4683 ir_type *const call_tp = be_Call_get_type(node);
4684 int old_no_pic_adjust;
4685 int throws_exception = ir_throws_exception(node);
4687 /* Run the x87 simulator if the call returns a float value */
4688 if (get_method_n_ress(call_tp) > 0) {
4689 ir_type *const res_type = get_method_res_type(call_tp, 0);
4690 ir_mode *const res_mode = get_type_mode(res_type);
4692 if (res_mode != NULL && mode_is_float(res_mode)) {
4693 ir_graph *const irg = get_Block_irg(block);
4694 ia32_request_x87_sim(irg);
4698 /* We do not want be_Call direct calls */
4699 assert(be_Call_get_entity(node) == NULL);
4701 /* special case for PIC trampoline calls */
4702 old_no_pic_adjust = ia32_no_pic_adjust;
4703 ia32_no_pic_adjust = be_options.pic;
4705 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4706 match_am | match_immediate | match_upconv);
4708 ia32_no_pic_adjust = old_no_pic_adjust;
4710 i = get_irn_arity(node) - 1;
4711 fpcw = be_transform_node(get_irn_n(node, i--));
4712 for (; i >= n_be_Call_first_arg; --i) {
4713 arch_register_req_t const *const req
4714 = arch_get_irn_register_req_in(node, i);
4715 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4717 assert(req->type == arch_register_req_type_limited);
4718 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4720 switch (*req->limited) {
4721 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4722 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4723 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4724 default: panic("Invalid GP register for register parameter");
4728 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4729 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4730 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4731 ir_set_throws_exception(call, throws_exception);
4732 set_am_attributes(call, &am);
4733 call = fix_mem_proj(call, &am);
4735 if (get_irn_pinned(node) == op_pin_state_pinned)
4736 set_irn_pinned(call, op_pin_state_pinned);
4738 SET_IA32_ORIG_NODE(call, node);
4740 if (ia32_cg_config.use_sse2) {
4741 /* remember this call for post-processing */
4742 ARR_APP1(ir_node *, call_list, call);
4743 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4750 * Transform Builtin trap
4752 static ir_node *gen_trap(ir_node *node)
4754 dbg_info *dbgi = get_irn_dbg_info(node);
4755 ir_node *block = be_transform_node(get_nodes_block(node));
4756 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4758 return new_bd_ia32_UD2(dbgi, block, mem);
4762 * Transform Builtin debugbreak
4764 static ir_node *gen_debugbreak(ir_node *node)
4766 dbg_info *dbgi = get_irn_dbg_info(node);
4767 ir_node *block = be_transform_node(get_nodes_block(node));
4768 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4770 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4774 * Transform Builtin return_address
4776 static ir_node *gen_return_address(ir_node *node)
4778 ir_node *param = get_Builtin_param(node, 0);
4779 ir_node *frame = get_Builtin_param(node, 1);
4780 dbg_info *dbgi = get_irn_dbg_info(node);
4781 ir_tarval *tv = get_Const_tarval(param);
4782 ir_graph *irg = get_irn_irg(node);
4783 unsigned long value = get_tarval_long(tv);
4785 ir_node *block = be_transform_node(get_nodes_block(node));
4786 ir_node *ptr = be_transform_node(frame);
4790 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4791 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4792 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4795 /* load the return address from this frame */
4796 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4798 set_irn_pinned(load, get_irn_pinned(node));
4799 set_ia32_op_type(load, ia32_AddrModeS);
4800 set_ia32_ls_mode(load, mode_Iu);
4802 set_ia32_am_offs_int(load, 0);
4803 set_ia32_use_frame(load);
4804 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4806 if (get_irn_pinned(node) == op_pin_state_floats) {
4807 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
4808 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
4809 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4810 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4813 SET_IA32_ORIG_NODE(load, node);
4814 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4818 * Transform Builtin frame_address
4820 static ir_node *gen_frame_address(ir_node *node)
4822 ir_node *param = get_Builtin_param(node, 0);
4823 ir_node *frame = get_Builtin_param(node, 1);
4824 dbg_info *dbgi = get_irn_dbg_info(node);
4825 ir_tarval *tv = get_Const_tarval(param);
4826 ir_graph *irg = get_irn_irg(node);
4827 unsigned long value = get_tarval_long(tv);
4829 ir_node *block = be_transform_node(get_nodes_block(node));
4830 ir_node *ptr = be_transform_node(frame);
4835 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4836 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4837 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4840 /* load the frame address from this frame */
4841 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4843 set_irn_pinned(load, get_irn_pinned(node));
4844 set_ia32_op_type(load, ia32_AddrModeS);
4845 set_ia32_ls_mode(load, mode_Iu);
4847 ent = ia32_get_frame_address_entity(irg);
4849 set_ia32_am_offs_int(load, 0);
4850 set_ia32_use_frame(load);
4851 set_ia32_frame_ent(load, ent);
4853 /* will fail anyway, but gcc does this: */
4854 set_ia32_am_offs_int(load, 0);
4857 if (get_irn_pinned(node) == op_pin_state_floats) {
4858 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
4859 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
4860 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4861 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4864 SET_IA32_ORIG_NODE(load, node);
4865 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4869 * Transform Builtin frame_address
4871 static ir_node *gen_prefetch(ir_node *node)
4874 ir_node *ptr, *block, *mem, *base, *idx;
4875 ir_node *param, *new_node;
4878 ia32_address_t addr;
4880 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4881 /* no prefetch at all, route memory */
4882 return be_transform_node(get_Builtin_mem(node));
4885 param = get_Builtin_param(node, 1);
4886 tv = get_Const_tarval(param);
4887 rw = get_tarval_long(tv);
4889 /* construct load address */
4890 memset(&addr, 0, sizeof(addr));
4891 ptr = get_Builtin_param(node, 0);
4892 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4899 base = be_transform_node(base);
4905 idx = be_transform_node(idx);
4908 dbgi = get_irn_dbg_info(node);
4909 block = be_transform_node(get_nodes_block(node));
4910 mem = be_transform_node(get_Builtin_mem(node));
4912 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4913 /* we have 3DNow!, this was already checked above */
4914 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4915 } else if (ia32_cg_config.use_sse_prefetch) {
4916 /* note: rw == 1 is IGNORED in that case */
4917 param = get_Builtin_param(node, 2);
4918 tv = get_Const_tarval(param);
4919 locality = get_tarval_long(tv);
4921 /* SSE style prefetch */
4924 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
4927 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
4930 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
4933 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
4937 assert(ia32_cg_config.use_3dnow_prefetch);
4938 /* 3DNow! style prefetch */
4939 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
4942 set_irn_pinned(new_node, get_irn_pinned(node));
4943 set_ia32_op_type(new_node, ia32_AddrModeS);
4944 set_ia32_ls_mode(new_node, mode_Bu);
4945 set_address(new_node, &addr);
4947 SET_IA32_ORIG_NODE(new_node, node);
4949 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4953 * Transform bsf like node
4955 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4957 ir_node *param = get_Builtin_param(node, 0);
4958 dbg_info *dbgi = get_irn_dbg_info(node);
4960 ir_node *block = get_nodes_block(node);
4961 ir_node *new_block = be_transform_node(block);
4963 ia32_address_mode_t am;
4964 ia32_address_t *addr = &am.addr;
4967 match_arguments(&am, block, NULL, param, NULL, match_am);
4969 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4970 set_am_attributes(cnt, &am);
4971 set_ia32_ls_mode(cnt, get_irn_mode(param));
4973 SET_IA32_ORIG_NODE(cnt, node);
4974 return fix_mem_proj(cnt, &am);
4978 * Transform builtin ffs.
4980 static ir_node *gen_ffs(ir_node *node)
4982 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4983 ir_node *real = skip_Proj(bsf);
4984 dbg_info *dbgi = get_irn_dbg_info(real);
4985 ir_node *block = get_nodes_block(real);
4986 ir_node *flag, *set, *conv, *neg, *orn, *add;
4989 if (get_irn_mode(real) != mode_T) {
4990 set_irn_mode(real, mode_T);
4991 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
4994 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
4997 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
4998 SET_IA32_ORIG_NODE(set, node);
5001 conv = new_bd_ia32_Conv_I2I_8bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5002 SET_IA32_ORIG_NODE(conv, node);
5005 neg = new_bd_ia32_Neg(dbgi, block, conv);
5008 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5009 set_ia32_ls_mode(orn, mode_Iu);
5010 set_ia32_commutative(orn);
5013 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5014 add_ia32_am_offs_int(add, 1);
5019 * Transform builtin clz.
5021 static ir_node *gen_clz(ir_node *node)
5023 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5024 ir_node *real = skip_Proj(bsr);
5025 dbg_info *dbgi = get_irn_dbg_info(real);
5026 ir_node *block = get_nodes_block(real);
5027 ir_graph *irg = get_Block_irg(block);
5028 ir_node *imm = ia32_create_Immediate(irg, NULL, 0, 31);
5030 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5034 * Transform builtin ctz.
5036 static ir_node *gen_ctz(ir_node *node)
5038 return gen_unop_AM(node, new_bd_ia32_Bsf);
5042 * Transform builtin parity.
5044 static ir_node *gen_parity(ir_node *node)
5046 dbg_info *dbgi = get_irn_dbg_info(node);
5047 ir_node *block = get_nodes_block(node);
5048 ir_node *new_block = be_transform_node(block);
5049 ir_node *param = get_Builtin_param(node, 0);
5050 ir_node *new_param = be_transform_node(param);
5053 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5054 * so we have to do complicated xoring first.
5055 * (we should also better lower this before the backend so we still have a
5056 * chance for CSE, constant folding and other goodies for some of these
5059 ir_graph *const irg = get_Block_irg(new_block);
5060 ir_node *const count = ia32_create_Immediate(irg, NULL, 0, 16);
5061 ir_node *const shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5062 ir_node *const xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem, shr, new_param);
5063 ir_node *const xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xorn);
5066 set_ia32_ls_mode(xorn, mode_Iu);
5067 set_ia32_commutative(xorn);
5069 set_irn_mode(xor2, mode_T);
5070 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5073 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5074 SET_IA32_ORIG_NODE(new_node, node);
5077 new_node = new_bd_ia32_Conv_I2I_8bit(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, mode_Bu);
5078 SET_IA32_ORIG_NODE(new_node, node);
5083 * Transform builtin popcount
5085 static ir_node *gen_popcount(ir_node *node)
5087 ir_node *param = get_Builtin_param(node, 0);
5088 dbg_info *dbgi = get_irn_dbg_info(node);
5090 ir_node *block = get_nodes_block(node);
5091 ir_node *new_block = be_transform_node(block);
5094 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5096 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5097 if (ia32_cg_config.use_popcnt) {
5098 ia32_address_mode_t am;
5099 ia32_address_t *addr = &am.addr;
5102 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am | match_upconv);
5104 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5105 set_am_attributes(cnt, &am);
5106 set_ia32_ls_mode(cnt, get_irn_mode(param));
5108 SET_IA32_ORIG_NODE(cnt, node);
5109 return fix_mem_proj(cnt, &am);
5112 new_param = be_transform_node(param);
5114 /* do the standard popcount algo */
5115 /* TODO: This is stupid, we should transform this before the backend,
5116 * to get CSE, localopts, etc. for the operations
5117 * TODO: This is also not the optimal algorithm (it is just the starting
5118 * example in hackers delight, they optimize it more on the following page)
5119 * But I'm too lazy to fix this now, as the code should get lowered before
5120 * the backend anyway.
5122 ir_graph *const irg = get_Block_irg(new_block);
5124 /* m1 = x & 0x55555555 */
5125 imm = ia32_create_Immediate(irg, NULL, 0, 0x55555555);
5126 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5129 simm = ia32_create_Immediate(irg, NULL, 0, 1);
5130 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5132 /* m2 = s1 & 0x55555555 */
5133 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5136 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5138 /* m4 = m3 & 0x33333333 */
5139 imm = ia32_create_Immediate(irg, NULL, 0, 0x33333333);
5140 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5143 simm = ia32_create_Immediate(irg, NULL, 0, 2);
5144 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5146 /* m5 = s2 & 0x33333333 */
5147 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5150 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5152 /* m7 = m6 & 0x0F0F0F0F */
5153 imm = ia32_create_Immediate(irg, NULL, 0, 0x0F0F0F0F);
5154 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5157 simm = ia32_create_Immediate(irg, NULL, 0, 4);
5158 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5160 /* m8 = s3 & 0x0F0F0F0F */
5161 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5164 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5166 /* m10 = m9 & 0x00FF00FF */
5167 imm = ia32_create_Immediate(irg, NULL, 0, 0x00FF00FF);
5168 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5171 simm = ia32_create_Immediate(irg, NULL, 0, 8);
5172 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5174 /* m11 = s4 & 0x00FF00FF */
5175 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5177 /* m12 = m10 + m11 */
5178 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5180 /* m13 = m12 & 0x0000FFFF */
5181 imm = ia32_create_Immediate(irg, NULL, 0, 0x0000FFFF);
5182 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5184 /* s5 = m12 >> 16 */
5185 simm = ia32_create_Immediate(irg, NULL, 0, 16);
5186 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5188 /* res = m13 + s5 */
5189 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5193 * Transform builtin byte swap.
5195 static ir_node *gen_bswap(ir_node *node)
5197 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5198 dbg_info *dbgi = get_irn_dbg_info(node);
5200 ir_node *block = get_nodes_block(node);
5201 ir_node *new_block = be_transform_node(block);
5202 ir_mode *mode = get_irn_mode(param);
5203 unsigned size = get_mode_size_bits(mode);
5207 if (ia32_cg_config.use_bswap) {
5208 /* swap available */
5209 return new_bd_ia32_Bswap(dbgi, new_block, param);
5211 ir_graph *const irg = get_Block_irg(new_block);
5212 ir_node *const i8 = ia32_create_Immediate(irg, NULL, 0, 8);
5213 ir_node *const rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5214 ir_node *const i16 = ia32_create_Immediate(irg, NULL, 0, 16);
5215 ir_node *const rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5216 ir_node *const rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5217 set_ia32_ls_mode(rol1, mode_Hu);
5218 set_ia32_ls_mode(rol2, mode_Iu);
5219 set_ia32_ls_mode(rol3, mode_Hu);
5224 /* swap16 always available */
5225 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5228 panic("Invalid bswap size (%d)", size);
5233 * Transform builtin outport.
5235 static ir_node *gen_outport(ir_node *node)
5237 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0));
5238 ir_node *oldv = get_Builtin_param(node, 1);
5239 ir_mode *mode = get_irn_mode(oldv);
5240 ir_node *value = be_transform_node(oldv);
5241 ir_node *block = be_transform_node(get_nodes_block(node));
5242 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5243 dbg_info *dbgi = get_irn_dbg_info(node);
5245 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5246 set_ia32_ls_mode(res, mode);
5251 * Transform builtin inport.
5253 static ir_node *gen_inport(ir_node *node)
5255 ir_type *tp = get_Builtin_type(node);
5256 ir_type *rstp = get_method_res_type(tp, 0);
5257 ir_mode *mode = get_type_mode(rstp);
5258 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0));
5259 ir_node *block = be_transform_node(get_nodes_block(node));
5260 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5261 dbg_info *dbgi = get_irn_dbg_info(node);
5263 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5264 set_ia32_ls_mode(res, mode);
5266 /* check for missing Result Proj */
5271 * Transform a builtin inner trampoline
5273 static ir_node *gen_inner_trampoline(ir_node *node)
5275 ir_node *ptr = get_Builtin_param(node, 0);
5276 ir_node *callee = get_Builtin_param(node, 1);
5277 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5278 ir_node *mem = get_Builtin_mem(node);
5279 ir_node *block = get_nodes_block(node);
5280 ir_node *new_block = be_transform_node(block);
5284 ir_node *trampoline;
5286 dbg_info *dbgi = get_irn_dbg_info(node);
5287 ia32_address_t addr;
5289 /* construct store address */
5290 memset(&addr, 0, sizeof(addr));
5291 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5293 if (addr.base == NULL) {
5294 addr.base = noreg_GP;
5296 addr.base = be_transform_node(addr.base);
5299 if (addr.index == NULL) {
5300 addr.index = noreg_GP;
5302 addr.index = be_transform_node(addr.index);
5304 addr.mem = be_transform_node(mem);
5306 ir_graph *const irg = get_Block_irg(new_block);
5307 /* mov ecx, <env> */
5308 val = ia32_create_Immediate(irg, NULL, 0, 0xB9);
5309 store = new_bd_ia32_Store_8bit(dbgi, new_block, addr.base, addr.index, addr.mem, val);
5310 set_irn_pinned(store, get_irn_pinned(node));
5311 set_ia32_op_type(store, ia32_AddrModeD);
5312 set_ia32_ls_mode(store, mode_Bu);
5313 set_address(store, &addr);
5317 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5318 addr.index, addr.mem, env);
5319 set_irn_pinned(store, get_irn_pinned(node));
5320 set_ia32_op_type(store, ia32_AddrModeD);
5321 set_ia32_ls_mode(store, mode_Iu);
5322 set_address(store, &addr);
5326 /* jmp rel <callee> */
5327 val = ia32_create_Immediate(irg, NULL, 0, 0xE9);
5328 store = new_bd_ia32_Store_8bit(dbgi, new_block, addr.base, addr.index, addr.mem, val);
5329 set_irn_pinned(store, get_irn_pinned(node));
5330 set_ia32_op_type(store, ia32_AddrModeD);
5331 set_ia32_ls_mode(store, mode_Bu);
5332 set_address(store, &addr);
5336 trampoline = be_transform_node(ptr);
5338 /* the callee is typically an immediate */
5339 if (is_SymConst(callee)) {
5340 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5342 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5343 add_ia32_am_offs_int(rel, -10);
5345 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5347 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5348 addr.index, addr.mem, rel);
5349 set_irn_pinned(store, get_irn_pinned(node));
5350 set_ia32_op_type(store, ia32_AddrModeD);
5351 set_ia32_ls_mode(store, mode_Iu);
5352 set_address(store, &addr);
5357 return new_r_Tuple(new_block, 2, in);
5361 * Transform Builtin node.
5363 static ir_node *gen_Builtin(ir_node *node)
5365 ir_builtin_kind kind = get_Builtin_kind(node);
5369 return gen_trap(node);
5370 case ir_bk_debugbreak:
5371 return gen_debugbreak(node);
5372 case ir_bk_return_address:
5373 return gen_return_address(node);
5374 case ir_bk_frame_address:
5375 return gen_frame_address(node);
5376 case ir_bk_prefetch:
5377 return gen_prefetch(node);
5379 return gen_ffs(node);
5381 return gen_clz(node);
5383 return gen_ctz(node);
5385 return gen_parity(node);
5386 case ir_bk_popcount:
5387 return gen_popcount(node);
5389 return gen_bswap(node);
5391 return gen_outport(node);
5393 return gen_inport(node);
5394 case ir_bk_inner_trampoline:
5395 return gen_inner_trampoline(node);
5397 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5401 * Transform Proj(Builtin) node.
5403 static ir_node *gen_Proj_Builtin(ir_node *proj)
5405 ir_node *node = get_Proj_pred(proj);
5406 ir_node *new_node = be_transform_node(node);
5407 ir_builtin_kind kind = get_Builtin_kind(node);
5410 case ir_bk_return_address:
5411 case ir_bk_frame_address:
5416 case ir_bk_popcount:
5418 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5421 case ir_bk_debugbreak:
5422 case ir_bk_prefetch:
5424 assert(get_Proj_proj(proj) == pn_Builtin_M);
5427 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5428 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5430 assert(get_Proj_proj(proj) == pn_Builtin_M);
5431 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5433 case ir_bk_inner_trampoline:
5434 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5435 return get_Tuple_pred(new_node, 1);
5437 assert(get_Proj_proj(proj) == pn_Builtin_M);
5438 return get_Tuple_pred(new_node, 0);
5441 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5444 static ir_node *gen_be_IncSP(ir_node *node)
5446 ir_node *res = be_duplicate_node(node);
5447 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5453 * Transform the Projs from a be_Call.
5455 static ir_node *gen_Proj_be_Call(ir_node *node)
5457 ir_node *call = get_Proj_pred(node);
5458 ir_node *new_call = be_transform_node(call);
5459 dbg_info *dbgi = get_irn_dbg_info(node);
5460 long proj = get_Proj_proj(node);
5461 ir_mode *mode = get_irn_mode(node);
5464 if (proj == pn_be_Call_M) {
5465 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5467 /* transform call modes */
5468 if (mode_is_data(mode)) {
5469 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5473 /* Map from be_Call to ia32_Call proj number */
5474 if (proj == pn_be_Call_sp) {
5475 proj = pn_ia32_Call_stack;
5476 } else if (proj == pn_be_Call_M) {
5477 proj = pn_ia32_Call_M;
5478 } else if (proj == pn_be_Call_X_except) {
5479 proj = pn_ia32_Call_X_except;
5480 } else if (proj == pn_be_Call_X_regular) {
5481 proj = pn_ia32_Call_X_regular;
5483 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5485 assert(proj >= pn_be_Call_first_res);
5486 assert(arch_register_req_is(req, limited));
5488 be_foreach_out(new_call, i) {
5489 arch_register_req_t const *const new_req = arch_get_irn_register_req_out(new_call, i);
5490 if (!arch_register_req_is(new_req, limited) ||
5491 new_req->cls != req->cls ||
5492 *new_req->limited != *req->limited)
5498 panic("no matching out requirement found");
5502 res = new_rd_Proj(dbgi, new_call, mode, proj);
5504 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5506 case pn_ia32_Call_stack:
5507 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5510 case pn_ia32_Call_fpcw:
5511 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5518 static ir_node *gen_Proj_ASM(ir_node *node)
5520 ir_mode *mode = get_irn_mode(node);
5521 ir_node *pred = get_Proj_pred(node);
5522 ir_node *new_pred = be_transform_node(pred);
5523 long pos = get_Proj_proj(node);
5525 if (mode == mode_M) {
5526 pos = arch_get_irn_n_outs(new_pred)-1;
5527 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5529 } else if (mode_is_float(mode)) {
5532 panic("unexpected proj mode at ASM");
5535 return new_r_Proj(new_pred, mode, pos);
5539 * Transform and potentially renumber Proj nodes.
5541 static ir_node *gen_Proj(ir_node *node)
5543 ir_node *pred = get_Proj_pred(node);
5546 switch (get_irn_opcode(pred)) {
5548 return gen_Proj_Load(node);
5550 return gen_Proj_Store(node);
5552 return gen_Proj_ASM(node);
5554 return gen_Proj_Builtin(node);
5556 return gen_Proj_Div(node);
5558 return gen_Proj_Mod(node);
5560 return gen_Proj_CopyB(node);
5562 return gen_Proj_be_SubSP(node);
5564 return gen_Proj_be_AddSP(node);
5566 return gen_Proj_be_Call(node);
5568 proj = get_Proj_proj(node);
5570 case pn_Start_X_initial_exec: {
5571 ir_node *block = get_nodes_block(pred);
5572 ir_node *new_block = be_transform_node(block);
5573 dbg_info *dbgi = get_irn_dbg_info(node);
5574 /* we exchange the ProjX with a jump */
5575 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5583 if (is_ia32_l_FloattoLL(pred)) {
5584 return gen_Proj_l_FloattoLL(node);
5586 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5590 ir_mode *mode = get_irn_mode(node);
5591 if (ia32_mode_needs_gp_reg(mode)) {
5592 ir_node *new_pred = be_transform_node(pred);
5593 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5594 get_Proj_proj(node));
5595 new_proj->node_nr = node->node_nr;
5600 return be_duplicate_node(node);
5604 * Enters all transform functions into the generic pointer
5606 static void register_transformers(void)
5608 /* first clear the generic function pointer for all ops */
5609 be_start_transform_setup();
5611 be_set_transform_function(op_Add, gen_Add);
5612 be_set_transform_function(op_And, gen_And);
5613 be_set_transform_function(op_ASM, ia32_gen_ASM);
5614 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5615 be_set_transform_function(op_be_Call, gen_be_Call);
5616 be_set_transform_function(op_be_Copy, gen_be_Copy);
5617 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5618 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5619 be_set_transform_function(op_be_Return, gen_be_Return);
5620 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5621 be_set_transform_function(op_Builtin, gen_Builtin);
5622 be_set_transform_function(op_Cmp, gen_Cmp);
5623 be_set_transform_function(op_Cond, gen_Cond);
5624 be_set_transform_function(op_Const, gen_Const);
5625 be_set_transform_function(op_Conv, gen_Conv);
5626 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5627 be_set_transform_function(op_Div, gen_Div);
5628 be_set_transform_function(op_Eor, gen_Eor);
5629 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5630 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5631 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5632 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5633 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5634 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5635 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5636 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5637 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5638 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5639 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5640 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5641 be_set_transform_function(op_ia32_NoReg_FP, be_duplicate_node);
5642 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5643 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5644 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5645 be_set_transform_function(op_IJmp, gen_IJmp);
5646 be_set_transform_function(op_Jmp, gen_Jmp);
5647 be_set_transform_function(op_Load, gen_Load);
5648 be_set_transform_function(op_Minus, gen_Minus);
5649 be_set_transform_function(op_Mod, gen_Mod);
5650 be_set_transform_function(op_Mul, gen_Mul);
5651 be_set_transform_function(op_Mulh, gen_Mulh);
5652 be_set_transform_function(op_Mux, gen_Mux);
5653 be_set_transform_function(op_Not, gen_Not);
5654 be_set_transform_function(op_Or, gen_Or);
5655 be_set_transform_function(op_Phi, gen_Phi);
5656 be_set_transform_function(op_Proj, gen_Proj);
5657 be_set_transform_function(op_Rotl, gen_Rotl);
5658 be_set_transform_function(op_Shl, gen_Shl);
5659 be_set_transform_function(op_Shr, gen_Shr);
5660 be_set_transform_function(op_Shrs, gen_Shrs);
5661 be_set_transform_function(op_Store, gen_Store);
5662 be_set_transform_function(op_Sub, gen_Sub);
5663 be_set_transform_function(op_Switch, gen_Switch);
5664 be_set_transform_function(op_SymConst, gen_SymConst);
5665 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5667 be_set_upper_bits_clean_function(op_Mux, ia32_mux_upper_bits_clean);
5671 * Pre-transform all unknown and noreg nodes.
5673 static void ia32_pretransform_node(void)
5675 ir_graph *irg = current_ir_graph;
5676 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
5678 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5679 irg_data->noreg_fp = be_pre_transform_node(irg_data->noreg_fp);
5680 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5681 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5682 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5684 nomem = get_irg_no_mem(irg);
5685 noreg_GP = ia32_new_NoReg_gp(irg);
5689 * Post-process all calls if we are in SSE mode.
5690 * The ABI requires that the results are in st0, copy them
5691 * to a xmm register.
5693 static void postprocess_fp_call_results(void)
5697 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5698 ir_node *call = call_list[i];
5699 ir_type *mtp = call_types[i];
5702 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5703 ir_type *res_tp = get_method_res_type(mtp, j);
5704 ir_node *res, *new_res;
5707 if (! is_atomic_type(res_tp)) {
5708 /* no floating point return */
5711 res_mode = get_type_mode(res_tp);
5712 if (! mode_is_float(res_mode)) {
5713 /* no floating point return */
5717 res = be_get_Proj_for_pn(call, pn_ia32_Call_st0 + j);
5720 /* now patch the users */
5721 foreach_out_edge_safe(res, edge) {
5722 ir_node *succ = get_edge_src_irn(edge);
5725 if (be_is_Keep(succ))
5728 if (is_ia32_xStore(succ)) {
5729 /* an xStore can be patched into an vfst */
5730 dbg_info *db = get_irn_dbg_info(succ);
5731 ir_node *block = get_nodes_block(succ);
5732 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5733 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5734 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5735 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5736 ir_mode *mode = get_ia32_ls_mode(succ);
5738 ir_node *st = new_bd_ia32_fst(db, block, base, idx, mem, value, mode);
5739 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_fst_M);
5740 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5741 if (is_ia32_use_frame(succ))
5742 set_ia32_use_frame(st);
5743 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5744 set_irn_pinned(st, get_irn_pinned(succ));
5745 set_ia32_op_type(st, ia32_AddrModeD);
5747 assert((long)pn_ia32_xStore_M == (long)pn_ia32_fst_M);
5748 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_fst_X_regular);
5749 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_fst_X_except);
5756 if (new_res == NULL) {
5757 dbg_info *db = get_irn_dbg_info(call);
5758 ir_node *block = get_nodes_block(call);
5759 ir_node *frame = get_irg_frame(current_ir_graph);
5760 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5761 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5762 ir_node *vfst, *xld, *new_mem;
5765 /* store st(0) on stack */
5766 vfst = new_bd_ia32_fst(db, block, frame, noreg_GP, call_mem,
5768 set_ia32_op_type(vfst, ia32_AddrModeD);
5769 set_ia32_use_frame(vfst);
5770 arch_add_irn_flags(vfst, arch_irn_flags_spill);
5772 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_fst_M);
5774 /* load into SSE register */
5775 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5777 set_ia32_op_type(xld, ia32_AddrModeS);
5778 set_ia32_use_frame(xld);
5780 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5781 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5783 if (old_mem != NULL) {
5784 edges_reroute(old_mem, new_mem);
5788 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5794 /* do the transformation */
5795 void ia32_transform_graph(ir_graph *irg)
5799 register_transformers();
5800 initial_fpcw = NULL;
5801 ia32_no_pic_adjust = 0;
5803 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5805 be_timer_push(T_HEIGHTS);
5806 ia32_heights = heights_new(irg);
5807 be_timer_pop(T_HEIGHTS);
5808 ia32_calculate_non_address_mode_nodes(irg);
5810 /* the transform phase is not safe for CSE (yet) because several nodes get
5811 * attributes set after their creation */
5812 cse_last = get_opt_cse();
5815 call_list = NEW_ARR_F(ir_node *, 0);
5816 call_types = NEW_ARR_F(ir_type *, 0);
5817 be_transform_graph(irg, ia32_pretransform_node);
5819 if (ia32_cg_config.use_sse2)
5820 postprocess_fp_call_results();
5821 DEL_ARR_F(call_types);
5822 DEL_ARR_F(call_list);
5824 set_opt_cse(cse_last);
5826 ia32_free_non_address_mode_nodes();
5827 heights_free(ia32_heights);
5828 ia32_heights = NULL;
5831 void ia32_init_transform(void)
5833 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");