2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
55 #include "betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_address_mode.h"
66 #include "ia32_architecture.h"
68 #include "gen_ia32_regalloc_if.h"
70 /* define this to construct SSE constants instead of load them */
71 #undef CONSTRUCT_SSE_CONST
73 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
74 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
76 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
78 static ir_node *old_initial_fpcw = NULL;
79 static ir_node *initial_fpcw = NULL;
80 int ia32_no_pic_adjust;
82 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
83 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
86 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
87 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
90 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
91 ir_node *op1, ir_node *op2);
93 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
94 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
96 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem);
99 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
100 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
103 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
105 static ir_node *create_immediate_or_transform(ir_node *node,
106 char immediate_constraint_type);
108 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
109 dbg_info *dbgi, ir_node *block,
110 ir_node *op, ir_node *orig_node);
112 /* its enough to have those once */
113 static ir_node *nomem, *noreg_GP;
115 /** a list to postprocess all calls */
116 static ir_node **call_list;
117 static ir_type **call_types;
119 /** Return non-zero is a node represents the 0 constant. */
120 static bool is_Const_0(ir_node *node)
122 return is_Const(node) && is_Const_null(node);
125 /** Return non-zero is a node represents the 1 constant. */
126 static bool is_Const_1(ir_node *node)
128 return is_Const(node) && is_Const_one(node);
131 /** Return non-zero is a node represents the -1 constant. */
132 static bool is_Const_Minus_1(ir_node *node)
134 return is_Const(node) && is_Const_all_one(node);
138 * returns true if constant can be created with a simple float command
140 static bool is_simple_x87_Const(ir_node *node)
142 ir_tarval *tv = get_Const_tarval(node);
143 if (tarval_is_null(tv) || tarval_is_one(tv))
146 /* TODO: match all the other float constants */
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_sse_Const(ir_node *node)
155 ir_tarval *tv = get_Const_tarval(node);
156 ir_mode *mode = get_tarval_mode(tv);
161 if (tarval_is_null(tv)
162 #ifdef CONSTRUCT_SSE_CONST
167 #ifdef CONSTRUCT_SSE_CONST
168 if (mode == mode_D) {
169 unsigned val = get_tarval_sub_bits(tv, 0) |
170 (get_tarval_sub_bits(tv, 1) << 8) |
171 (get_tarval_sub_bits(tv, 2) << 16) |
172 (get_tarval_sub_bits(tv, 3) << 24);
174 /* lower 32bit are zero, really a 32bit constant */
177 #endif /* CONSTRUCT_SSE_CONST */
178 /* TODO: match all the other float constants */
183 * return NoREG or pic_base in case of PIC.
184 * This is necessary as base address for newly created symbols
186 static ir_node *get_symconst_base(void)
188 ir_graph *irg = current_ir_graph;
190 if (be_options.pic) {
191 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
192 return arch_env->impl->get_pic_base(irg);
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
207 ir_tarval *tv = get_Const_tarval(node);
209 assert(is_Const(node));
211 if (mode_is_float(mode)) {
212 ir_graph *irg = get_irn_irg(node);
213 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
214 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
219 if (ia32_cg_config.use_sse2) {
220 if (tarval_is_null(tv)) {
221 load = new_bd_ia32_xZero(dbgi, block);
222 set_ia32_ls_mode(load, mode);
224 #ifdef CONSTRUCT_SSE_CONST
225 } else if (tarval_is_one(tv)) {
226 int cnst = mode == mode_F ? 26 : 55;
227 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
228 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
229 ir_node *pslld, *psrld;
231 load = new_bd_ia32_xAllOnes(dbgi, block);
232 set_ia32_ls_mode(load, mode);
233 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
234 set_ia32_ls_mode(pslld, mode);
235 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
236 set_ia32_ls_mode(psrld, mode);
238 #endif /* CONSTRUCT_SSE_CONST */
239 } else if (mode == mode_F) {
240 /* we can place any 32bit constant by using a movd gp, sse */
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
245 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
246 load = new_bd_ia32_xMovd(dbgi, block, cnst);
247 set_ia32_ls_mode(load, mode);
251 #ifdef CONSTRUCT_SSE_CONST
252 if (mode == mode_D) {
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
259 ir_node *cnst, *psllq;
261 /* fine, lower 32bit are zero, produce 32bit value */
262 val = get_tarval_sub_bits(tv, 4) |
263 (get_tarval_sub_bits(tv, 5) << 8) |
264 (get_tarval_sub_bits(tv, 6) << 16) |
265 (get_tarval_sub_bits(tv, 7) << 24);
266 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
267 load = new_bd_ia32_xMovd(dbgi, block, cnst);
268 set_ia32_ls_mode(load, mode);
269 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
270 set_ia32_ls_mode(psllq, mode);
275 #endif /* CONSTRUCT_SSE_CONST */
276 floatent = ia32_create_float_const_entity(isa, tv, NULL);
278 base = get_symconst_base();
279 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
281 set_ia32_op_type(load, ia32_AddrModeS);
282 set_ia32_am_sc(load, floatent);
283 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
284 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
287 if (tarval_is_null(tv)) {
288 load = new_bd_ia32_vfldz(dbgi, block);
290 set_ia32_ls_mode(load, mode);
291 } else if (tarval_is_one(tv)) {
292 load = new_bd_ia32_vfld1(dbgi, block);
294 set_ia32_ls_mode(load, mode);
299 floatent = ia32_create_float_const_entity(isa, tv, NULL);
300 /* create_float_const_ent is smart and sometimes creates
302 ls_mode = get_type_mode(get_entity_type(floatent));
303 base = get_symconst_base();
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 } else { /* non-float mode */
321 tv = tarval_convert_to(tv, mode_Iu);
323 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
325 panic("couldn't convert constant tarval (%+F)", node);
327 val = get_tarval_long(tv);
329 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
330 SET_IA32_ORIG_NODE(cnst, node);
337 * Transforms a SymConst.
339 static ir_node *gen_SymConst(ir_node *node)
341 ir_node *old_block = get_nodes_block(node);
342 ir_node *block = be_transform_node(old_block);
343 dbg_info *dbgi = get_irn_dbg_info(node);
344 ir_mode *mode = get_irn_mode(node);
347 if (mode_is_float(mode)) {
348 if (ia32_cg_config.use_sse2)
349 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
351 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
352 set_ia32_am_sc(cnst, get_SymConst_entity(node));
353 set_ia32_use_frame(cnst);
357 if (get_SymConst_kind(node) != symconst_addr_ent) {
358 panic("backend only support symconst_addr_ent (at %+F)", node);
360 entity = get_SymConst_entity(node);
361 if (get_entity_owner(entity) == get_tls_type()) {
362 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
363 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
364 set_ia32_am_sc(lea, entity);
367 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
371 SET_IA32_ORIG_NODE(cnst, node);
376 static ir_type *make_array_type(ir_type *tp)
378 unsigned alignment = get_type_alignment_bytes(tp);
379 unsigned size = get_type_size_bytes(tp);
380 ir_type *res = new_type_array(1, tp);
381 set_type_alignment_bytes(res, alignment);
382 set_array_bounds_int(res, 0, 0, 2);
383 if (alignment > size)
385 set_type_size_bytes(res, 2 * size);
386 set_type_state(res, layout_fixed);
391 * Create a float[2] array type for the given atomic type.
393 * @param tp the atomic type
395 static ir_type *ia32_create_float_array(ir_type *tp)
397 ir_mode *mode = get_type_mode(tp);
400 if (mode == mode_F) {
401 static ir_type *float_F;
405 arr = float_F = make_array_type(tp);
406 } else if (mode == mode_D) {
407 static ir_type *float_D;
411 arr = float_D = make_array_type(tp);
413 static ir_type *float_E;
417 arr = float_E = make_array_type(tp);
422 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
423 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
425 static const struct {
427 const char *cnst_str;
429 } names [ia32_known_const_max] = {
430 { "C_sfp_sign", "0x80000000", 0 },
431 { "C_dfp_sign", "0x8000000000000000", 1 },
432 { "C_sfp_abs", "0x7FFFFFFF", 0 },
433 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
434 { "C_ull_bias", "0x10000000000000000", 2 }
436 static ir_entity *ent_cache[ia32_known_const_max];
438 ir_entity *ent = ent_cache[kct];
441 ir_graph *irg = current_ir_graph;
442 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
443 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
444 const char *cnst_str = names[kct].cnst_str;
445 ident *name = new_id_from_str(names[kct].name);
448 switch (names[kct].mode) {
449 case 0: mode = mode_Iu; break;
450 case 1: mode = mode_Lu; break;
451 case 2: mode = mode_F; break;
452 default: panic("internal compiler error");
454 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
456 if (kct == ia32_ULLBIAS) {
457 ir_type *type = ia32_get_prim_type(mode_F);
458 ir_type *atype = ia32_create_float_array(type);
459 ir_initializer_t *initializer;
461 ent = new_entity(get_glob_type(), name, atype);
463 set_entity_ld_ident(ent, name);
464 set_entity_visibility(ent, ir_visibility_private);
465 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
467 initializer = create_initializer_compound(2);
468 set_initializer_compound_value(initializer, 0,
469 create_initializer_tarval(get_mode_null(mode)));
470 set_initializer_compound_value(initializer, 1,
471 create_initializer_tarval(tv));
472 set_entity_initializer(ent, initializer);
474 ent = ia32_create_float_const_entity(isa, tv, name);
476 /* cache the entry */
477 ent_cache[kct] = ent;
480 return ent_cache[kct];
484 * return true if the node is a Proj(Load) and could be used in source address
485 * mode for another node. Will return only true if the @p other node is not
486 * dependent on the memory of the Load (for binary operations use the other
487 * input here, for unary operations use NULL).
489 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
490 ir_node *other, ir_node *other2,
497 /* float constants are always available */
498 if (is_Const(node)) {
499 mode = get_irn_mode(node);
500 if (mode_is_float(mode)) {
501 ir_tarval *tv = get_Const_tarval(node);
502 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
504 if (ia32_cg_config.use_sse2) {
505 if (is_simple_sse_Const(node))
508 if (is_simple_x87_Const(node))
511 if (get_irn_n_edges(node) > 1)
520 load = get_Proj_pred(node);
521 pn = get_Proj_proj(node);
522 if (!is_Load(load) || pn != pn_Load_res)
524 if (get_nodes_block(load) != block)
526 mode = get_irn_mode(node);
527 /* we can't fold mode_E AM */
528 if (mode == ia32_mode_E)
530 /* we only use address mode if we're the only user of the load */
531 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
533 /* in some edge cases with address mode we might reach the load normally
534 * and through some AM sequence, if it is already materialized then we
535 * can't create an AM node from it */
536 if (be_is_transformed(node))
539 /* don't do AM if other node inputs depend on the load (via mem-proj) */
540 if (other != NULL && ia32_prevents_AM(block, load, other))
543 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
549 typedef struct ia32_address_mode_t ia32_address_mode_t;
550 struct ia32_address_mode_t {
555 ia32_op_type_t op_type;
559 unsigned commutative : 1;
560 unsigned ins_permuted : 1;
563 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
565 /* construct load address */
566 memset(addr, 0, sizeof(addr[0]));
567 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
569 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
570 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
571 addr->mem = be_transform_node(mem);
574 static void build_address(ia32_address_mode_t *am, ir_node *node,
575 ia32_create_am_flags_t flags)
577 ia32_address_t *addr = &am->addr;
583 /* floating point immediates */
584 if (is_Const(node)) {
585 ir_graph *irg = get_irn_irg(node);
586 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
587 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
588 ir_tarval *tv = get_Const_tarval(node);
589 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
590 addr->base = get_symconst_base();
591 addr->index = noreg_GP;
593 addr->symconst_ent = entity;
594 addr->tls_segment = false;
596 am->ls_mode = get_type_mode(get_entity_type(entity));
597 am->pinned = op_pin_state_floats;
601 load = get_Proj_pred(node);
602 ptr = get_Load_ptr(load);
603 mem = get_Load_mem(load);
604 new_mem = be_transform_node(mem);
605 am->pinned = get_irn_pinned(load);
606 am->ls_mode = get_Load_mode(load);
607 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
610 /* construct load address */
611 ia32_create_address_mode(addr, ptr, flags);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
618 static void set_address(ir_node *node, const ia32_address_t *addr)
620 set_ia32_am_scale(node, addr->scale);
621 set_ia32_am_sc(node, addr->symconst_ent);
622 set_ia32_am_offs_int(node, addr->offset);
623 set_ia32_am_tls_segment(node, addr->tls_segment);
624 if (addr->symconst_sign)
625 set_ia32_am_sc_sign(node);
627 set_ia32_use_frame(node);
628 set_ia32_frame_ent(node, addr->frame_entity);
632 * Apply attributes of a given address mode to a node.
634 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
636 set_address(node, &am->addr);
638 set_ia32_op_type(node, am->op_type);
639 set_ia32_ls_mode(node, am->ls_mode);
640 if (am->pinned == op_pin_state_pinned) {
641 /* beware: some nodes are already pinned and did not allow to change the state */
642 if (get_irn_pinned(node) != op_pin_state_pinned)
643 set_irn_pinned(node, op_pin_state_pinned);
646 set_ia32_commutative(node);
650 * Check, if a given node is a Down-Conv, ie. a integer Conv
651 * from a mode with a mode with more bits to a mode with lesser bits.
652 * Moreover, we return only true if the node has not more than 1 user.
654 * @param node the node
655 * @return non-zero if node is a Down-Conv
657 static int is_downconv(const ir_node *node)
665 src_mode = get_irn_mode(get_Conv_op(node));
666 dest_mode = get_irn_mode(node);
668 ia32_mode_needs_gp_reg(src_mode) &&
669 ia32_mode_needs_gp_reg(dest_mode) &&
670 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
673 /** Skip all Down-Conv's on a given node and return the resulting node. */
674 ir_node *ia32_skip_downconv(ir_node *node)
676 while (is_downconv(node)) {
677 /* we only want to skip the conv when we're the only user
678 * (because this test is used in the context of address-mode selection
679 * and we don't want to use address mode for multiple users) */
680 if (get_irn_n_edges(node) > 1)
683 node = get_Conv_op(node);
689 static bool is_sameconv(ir_node *node)
697 /* we only want to skip the conv when we're the only user
698 * (because this test is used in the context of address-mode selection
699 * and we don't want to use address mode for multiple users) */
700 if (get_irn_n_edges(node) > 1)
703 src_mode = get_irn_mode(get_Conv_op(node));
704 dest_mode = get_irn_mode(node);
706 ia32_mode_needs_gp_reg(src_mode) &&
707 ia32_mode_needs_gp_reg(dest_mode) &&
708 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
711 /** Skip all signedness convs */
712 static ir_node *ia32_skip_sameconv(ir_node *node)
714 while (is_sameconv(node)) {
715 node = get_Conv_op(node);
721 static ir_node *transform_sext(ir_node *node, ir_node *orig_node)
723 ir_mode *mode = get_irn_mode(node);
724 ir_node *block = get_nodes_block(node);
725 dbg_info *dbgi = get_irn_dbg_info(node);
726 return create_I2I_Conv(mode, mode_Is, dbgi, block, node, orig_node);
729 static ir_node *transform_zext(ir_node *node, ir_node *orig_node)
731 ir_mode *mode = get_irn_mode(node);
732 ir_node *block = get_nodes_block(node);
733 dbg_info *dbgi = get_irn_dbg_info(node);
734 /* normalize to an unsigned mode */
735 switch (get_mode_size_bits(mode)) {
736 case 8: mode = mode_Bu; break;
737 case 16: mode = mode_Hu; break;
739 panic("ia32: invalid mode in zest: %+F", node);
741 return create_I2I_Conv(mode, mode_Iu, dbgi, block, node, orig_node);
744 static ir_node *transform_upconv(ir_node *node, ir_node *orig_node)
746 ir_mode *mode = get_irn_mode(node);
747 if (mode_is_signed(mode)) {
748 return transform_sext(node, orig_node);
750 return transform_zext(node, orig_node);
755 * matches operands of a node into ia32 addressing/operand modes. This covers
756 * usage of source address mode, immediates, operations with non 32-bit modes,
758 * The resulting data is filled into the @p am struct. block is the block
759 * of the node whose arguments are matched. op1, op2 are the first and second
760 * input that are matched (op1 may be NULL). other_op is another unrelated
761 * input that is not matched! but which is needed sometimes to check if AM
762 * for op1/op2 is legal.
763 * @p flags describes the supported modes of the operation in detail.
765 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
766 ir_node *op1, ir_node *op2, ir_node *other_op,
769 ia32_address_t *addr = &am->addr;
770 ir_mode *mode = get_irn_mode(op2);
771 int mode_bits = get_mode_size_bits(mode);
772 ir_node *new_op1, *new_op2;
774 unsigned commutative;
775 int use_am_and_immediates;
778 memset(am, 0, sizeof(am[0]));
780 commutative = (flags & match_commutative) != 0;
781 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
782 use_am = (flags & match_am) != 0;
783 use_immediate = (flags & match_immediate) != 0;
784 assert(!use_am_and_immediates || use_immediate);
787 assert(!commutative || op1 != NULL);
788 assert(use_am || !(flags & match_8bit_am));
789 assert(use_am || !(flags & match_16bit_am));
791 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
792 (mode_bits == 16 && !(flags & match_16bit_am))) {
796 /* we can simply skip downconvs for mode neutral nodes: the upper bits
797 * can be random for these operations */
798 if (flags & match_mode_neutral) {
799 op2 = ia32_skip_downconv(op2);
801 op1 = ia32_skip_downconv(op1);
804 op2 = ia32_skip_sameconv(op2);
806 op1 = ia32_skip_sameconv(op1);
810 /* match immediates. firm nodes are normalized: constants are always on the
813 if (!(flags & match_try_am) && use_immediate) {
814 new_op2 = ia32_try_create_Immediate(op2, 0);
817 if (new_op2 == NULL &&
818 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
819 build_address(am, op2, ia32_create_am_normal);
820 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
821 if (mode_is_float(mode)) {
822 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
826 am->op_type = ia32_AddrModeS;
827 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
829 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
831 build_address(am, op1, ia32_create_am_normal);
833 if (mode_is_float(mode)) {
834 noreg = ia32_new_NoReg_vfp(current_ir_graph);
839 if (new_op2 != NULL) {
842 new_op1 = be_transform_node(op2);
844 am->ins_permuted = true;
846 am->op_type = ia32_AddrModeS;
848 am->op_type = ia32_Normal;
850 if (flags & match_try_am) {
856 mode = get_irn_mode(op2);
857 if (get_mode_size_bits(mode) != 32
858 && (flags & (match_mode_neutral | match_upconv | match_zero_ext))) {
859 if (flags & match_upconv) {
860 new_op1 = (op1 == NULL ? NULL : transform_upconv(op1, op1));
862 new_op2 = transform_upconv(op2, op2);
863 } else if (flags & match_zero_ext) {
864 new_op1 = (op1 == NULL ? NULL : transform_zext(op1, op1));
866 new_op2 = transform_zext(op2, op2);
868 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
870 new_op2 = be_transform_node(op2);
871 assert(flags & match_mode_neutral);
875 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
877 new_op2 = be_transform_node(op2);
881 if (addr->base == NULL)
882 addr->base = noreg_GP;
883 if (addr->index == NULL)
884 addr->index = noreg_GP;
885 if (addr->mem == NULL)
888 am->new_op1 = new_op1;
889 am->new_op2 = new_op2;
890 am->commutative = commutative;
894 * "Fixes" a node that uses address mode by turning it into mode_T
895 * and returning a pn_ia32_res Proj.
897 * @param node the node
898 * @param am its address mode
900 * @return a Proj(pn_ia32_res) if a memory address mode is used,
903 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
908 if (am->mem_proj == NULL)
911 /* we have to create a mode_T so the old MemProj can attach to us */
912 mode = get_irn_mode(node);
913 load = get_Proj_pred(am->mem_proj);
915 be_set_transformed_node(load, node);
917 if (mode != mode_T) {
918 set_irn_mode(node, mode_T);
919 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
926 * Construct a standard binary operation, set AM and immediate if required.
928 * @param node The original node for which the binop is created
929 * @param op1 The first operand
930 * @param op2 The second operand
931 * @param func The node constructor function
932 * @return The constructed ia32 node.
934 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
935 construct_binop_func *func, match_flags_t flags)
938 ir_node *block, *new_block, *new_node;
939 ia32_address_mode_t am;
940 ia32_address_t *addr = &am.addr;
942 block = get_nodes_block(node);
943 match_arguments(&am, block, op1, op2, NULL, flags);
945 dbgi = get_irn_dbg_info(node);
946 new_block = be_transform_node(block);
947 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
948 am.new_op1, am.new_op2);
949 set_am_attributes(new_node, &am);
950 /* we can't use source address mode anymore when using immediates */
951 if (!(flags & match_am_and_immediates) &&
952 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
953 set_ia32_am_support(new_node, ia32_am_none);
954 SET_IA32_ORIG_NODE(new_node, node);
956 new_node = fix_mem_proj(new_node, &am);
962 * Generic names for the inputs of an ia32 binary op.
965 n_ia32_l_binop_left, /**< ia32 left input */
966 n_ia32_l_binop_right, /**< ia32 right input */
967 n_ia32_l_binop_eflags /**< ia32 eflags input */
969 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
970 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
971 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
972 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
973 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
974 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
977 * Construct a binary operation which also consumes the eflags.
979 * @param node The node to transform
980 * @param func The node constructor function
981 * @param flags The match flags
982 * @return The constructor ia32 node
984 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
987 ir_node *src_block = get_nodes_block(node);
988 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
989 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
990 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
992 ir_node *block, *new_node, *new_eflags;
993 ia32_address_mode_t am;
994 ia32_address_t *addr = &am.addr;
996 match_arguments(&am, src_block, op1, op2, eflags, flags);
998 dbgi = get_irn_dbg_info(node);
999 block = be_transform_node(src_block);
1000 new_eflags = be_transform_node(eflags);
1001 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1002 am.new_op1, am.new_op2, new_eflags);
1003 set_am_attributes(new_node, &am);
1004 /* we can't use source address mode anymore when using immediates */
1005 if (!(flags & match_am_and_immediates) &&
1006 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1007 set_ia32_am_support(new_node, ia32_am_none);
1008 SET_IA32_ORIG_NODE(new_node, node);
1010 new_node = fix_mem_proj(new_node, &am);
1015 static ir_node *get_fpcw(void)
1017 if (initial_fpcw != NULL)
1018 return initial_fpcw;
1020 initial_fpcw = be_transform_node(old_initial_fpcw);
1021 return initial_fpcw;
1024 static ir_node *skip_float_upconv(ir_node *node)
1026 ir_mode *mode = get_irn_mode(node);
1027 assert(mode_is_float(mode));
1029 while (is_Conv(node)) {
1030 ir_node *pred = get_Conv_op(node);
1031 ir_mode *pred_mode = get_irn_mode(pred);
1034 * suboptimal, but without this check the address mode matcher
1035 * can incorrectly think that something has only 1 user
1037 if (get_irn_n_edges(node) > 1)
1040 if (!mode_is_float(pred_mode)
1041 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1049 static void check_x87_floatmode(ir_mode *mode)
1051 if (mode != ia32_mode_E) {
1052 panic("ia32: x87 only supports x86 extended float mode");
1057 * Construct a standard binary operation, set AM and immediate if required.
1059 * @param op1 The first operand
1060 * @param op2 The second operand
1061 * @param func The node constructor function
1062 * @return The constructed ia32 node.
1064 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1065 construct_binop_float_func *func)
1071 ia32_address_mode_t am;
1072 ia32_address_t *addr = &am.addr;
1073 ia32_x87_attr_t *attr;
1074 /* All operations are considered commutative, because there are reverse
1076 match_flags_t flags = match_commutative | match_am;
1078 = is_Div(node) ? get_Div_resmode(node) : get_irn_mode(node);
1079 check_x87_floatmode(mode);
1081 op1 = skip_float_upconv(op1);
1082 op2 = skip_float_upconv(op2);
1084 block = get_nodes_block(node);
1085 match_arguments(&am, block, op1, op2, NULL, flags);
1087 dbgi = get_irn_dbg_info(node);
1088 new_block = be_transform_node(block);
1089 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1090 am.new_op1, am.new_op2, get_fpcw());
1091 set_am_attributes(new_node, &am);
1093 attr = get_ia32_x87_attr(new_node);
1094 attr->attr.data.ins_permuted = am.ins_permuted;
1096 SET_IA32_ORIG_NODE(new_node, node);
1098 new_node = fix_mem_proj(new_node, &am);
1104 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1106 * @param op1 The first operand
1107 * @param op2 The second operand
1108 * @param func The node constructor function
1109 * @return The constructed ia32 node.
1111 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1112 construct_shift_func *func,
1113 match_flags_t flags)
1115 ir_mode *mode = get_irn_mode(node);
1117 assert(! mode_is_float(mode));
1118 assert(flags & match_immediate);
1119 assert((flags & ~(match_mode_neutral | match_zero_ext | match_upconv | match_immediate)) == 0);
1121 if (get_mode_modulo_shift(mode) != 32) {
1122 /* TODO: implement special cases for non-modulo shifts */
1123 panic("modulo shift!=32 not supported by ia32 backend");
1128 if (flags & match_mode_neutral) {
1129 op1 = ia32_skip_downconv(op1);
1130 new_op1 = be_transform_node(op1);
1132 op1 = ia32_skip_sameconv(op1);
1133 if (get_mode_size_bits(mode) != 32) {
1134 if (flags & match_upconv) {
1135 new_op1 = transform_upconv(op1, node);
1136 } else if (flags & match_zero_ext) {
1137 new_op1 = transform_zext(op1, node);
1139 /* match_mode_neutral not handled here because it makes no
1140 * sense for shift operations */
1141 panic("ia32 code selection failed for %+F", node);
1144 new_op1 = be_transform_node(op1);
1148 /* the shift amount can be any mode that is bigger than 5 bits, since all
1149 * other bits are ignored anyway */
1150 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1151 ir_node *const op = get_Conv_op(op2);
1152 if (mode_is_float(get_irn_mode(op)))
1155 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1157 new_op2 = create_immediate_or_transform(op2, 0);
1159 dbg_info *dbgi = get_irn_dbg_info(node);
1160 ir_node *block = get_nodes_block(node);
1161 ir_node *new_block = be_transform_node(block);
1162 ir_node *new_node = func(dbgi, new_block, new_op1, new_op2);
1163 SET_IA32_ORIG_NODE(new_node, node);
1165 /* lowered shift instruction may have a dependency operand, handle it here */
1166 if (get_irn_arity(node) == 3) {
1167 /* we have a dependency */
1168 ir_node* dep = get_irn_n(node, 2);
1169 if (get_irn_n_edges(dep) > 1) {
1170 /* ... which has at least one user other than 'node' */
1171 ir_node *new_dep = be_transform_node(dep);
1172 add_irn_dep(new_node, new_dep);
1181 * Construct a standard unary operation, set AM and immediate if required.
1183 * @param op The operand
1184 * @param func The node constructor function
1185 * @return The constructed ia32 node.
1187 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1188 match_flags_t flags)
1191 ir_node *block, *new_block, *new_op, *new_node;
1193 assert(flags == 0 || flags == match_mode_neutral);
1194 if (flags & match_mode_neutral) {
1195 op = ia32_skip_downconv(op);
1198 new_op = be_transform_node(op);
1199 dbgi = get_irn_dbg_info(node);
1200 block = get_nodes_block(node);
1201 new_block = be_transform_node(block);
1202 new_node = func(dbgi, new_block, new_op);
1204 SET_IA32_ORIG_NODE(new_node, node);
1209 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1210 ia32_address_t *addr)
1220 base = be_transform_node(base);
1227 idx = be_transform_node(idx);
1230 /* segment overrides are ineffective for Leas :-( so we have to patch
1232 if (addr->tls_segment) {
1233 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1234 assert(addr->symconst_ent != NULL);
1235 if (base == noreg_GP)
1238 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1239 addr->tls_segment = false;
1242 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1243 set_address(res, addr);
1249 * Returns non-zero if a given address mode has a symbolic or
1250 * numerical offset != 0.
1252 static int am_has_immediates(const ia32_address_t *addr)
1254 return addr->offset != 0 || addr->symconst_ent != NULL
1255 || addr->frame_entity || addr->use_frame;
1258 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1259 ir_node *high, ir_node *low,
1263 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1264 * op1 - target to be shifted
1265 * op2 - contains bits to be shifted into target
1267 * Only op3 can be an immediate.
1269 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1270 ir_node *high, ir_node *low, ir_node *count,
1271 new_shiftd_func func)
1273 ir_node *new_block = be_transform_node(block);
1274 ir_node *new_high = be_transform_node(high);
1275 ir_node *new_low = be_transform_node(low);
1279 /* the shift amount can be any mode that is bigger than 5 bits, since all
1280 * other bits are ignored anyway */
1281 while (is_Conv(count) &&
1282 get_irn_n_edges(count) == 1 &&
1283 mode_is_int(get_irn_mode(count))) {
1284 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1285 count = get_Conv_op(count);
1287 new_count = create_immediate_or_transform(count, 0);
1289 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1294 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1297 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1299 if (is_Const(value1) && is_Const(value2)) {
1300 ir_tarval *tv1 = get_Const_tarval(value1);
1301 ir_tarval *tv2 = get_Const_tarval(value2);
1302 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1303 long v1 = get_tarval_long(tv1);
1304 long v2 = get_tarval_long(tv2);
1305 return v1 <= v2 && v2 == 32-v1;
1311 static ir_node *match_64bit_shift(ir_node *node)
1313 ir_node *op1 = get_binop_left(node);
1314 ir_node *op2 = get_binop_right(node);
1315 assert(is_Or(node) || is_Add(node));
1323 /* match ShlD operation */
1324 if (is_Shl(op1) && is_Shr(op2)) {
1325 ir_node *shl_right = get_Shl_right(op1);
1326 ir_node *shl_left = get_Shl_left(op1);
1327 ir_node *shr_right = get_Shr_right(op2);
1328 ir_node *shr_left = get_Shr_left(op2);
1329 /* constant ShlD operation */
1330 if (is_complementary_shifts(shl_right, shr_right)) {
1331 dbg_info *dbgi = get_irn_dbg_info(node);
1332 ir_node *block = get_nodes_block(node);
1333 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1336 /* constant ShrD operation */
1337 if (is_complementary_shifts(shr_right, shl_right)) {
1338 dbg_info *dbgi = get_irn_dbg_info(node);
1339 ir_node *block = get_nodes_block(node);
1340 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1343 /* lower_dw produces the following for ShlD:
1344 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1345 if (is_Shr(shr_left) && is_Not(shr_right)
1346 && is_Const_1(get_Shr_right(shr_left))
1347 && get_Not_op(shr_right) == shl_right) {
1348 dbg_info *dbgi = get_irn_dbg_info(node);
1349 ir_node *block = get_nodes_block(node);
1350 ir_node *val_h = get_Shr_left(shr_left);
1351 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1354 /* lower_dw produces the following for ShrD:
1355 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1356 if (is_Shl(shl_left) && is_Not(shl_right)
1357 && is_Const_1(get_Shl_right(shl_left))
1358 && get_Not_op(shl_right) == shr_right) {
1359 dbg_info *dbgi = get_irn_dbg_info(node);
1360 ir_node *block = get_nodes_block(node);
1361 ir_node *val_h = get_Shl_left(shl_left);
1362 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1371 * Creates an ia32 Add.
1373 * @return the created ia32 Add node
1375 static ir_node *gen_Add(ir_node *node)
1377 ir_mode *mode = get_irn_mode(node);
1378 ir_node *op1 = get_Add_left(node);
1379 ir_node *op2 = get_Add_right(node);
1381 ir_node *block, *new_block, *new_node, *add_immediate_op;
1382 ia32_address_t addr;
1383 ia32_address_mode_t am;
1385 new_node = match_64bit_shift(node);
1386 if (new_node != NULL)
1389 if (mode_is_float(mode)) {
1390 if (ia32_cg_config.use_sse2)
1391 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1392 match_commutative | match_am);
1394 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1397 ia32_mark_non_am(node);
1401 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1402 * 1. Add with immediate -> Lea
1403 * 2. Add with possible source address mode -> Add
1404 * 3. Otherwise -> Lea
1406 memset(&addr, 0, sizeof(addr));
1407 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1408 add_immediate_op = NULL;
1410 dbgi = get_irn_dbg_info(node);
1411 block = get_nodes_block(node);
1412 new_block = be_transform_node(block);
1415 if (addr.base == NULL && addr.index == NULL) {
1416 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1417 addr.symconst_sign, 0, addr.offset);
1418 SET_IA32_ORIG_NODE(new_node, node);
1421 /* add with immediate? */
1422 if (addr.index == NULL) {
1423 add_immediate_op = addr.base;
1424 } else if (addr.base == NULL && addr.scale == 0) {
1425 add_immediate_op = addr.index;
1428 if (add_immediate_op != NULL) {
1429 if (!am_has_immediates(&addr)) {
1430 #ifdef DEBUG_libfirm
1431 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1434 return be_transform_node(add_immediate_op);
1437 new_node = create_lea_from_address(dbgi, new_block, &addr);
1438 SET_IA32_ORIG_NODE(new_node, node);
1442 /* test if we can use source address mode */
1443 match_arguments(&am, block, op1, op2, NULL, match_commutative
1444 | match_mode_neutral | match_am | match_immediate | match_try_am);
1446 /* construct an Add with source address mode */
1447 if (am.op_type == ia32_AddrModeS) {
1448 ia32_address_t *am_addr = &am.addr;
1449 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1450 am_addr->index, am_addr->mem, am.new_op1,
1452 set_am_attributes(new_node, &am);
1453 SET_IA32_ORIG_NODE(new_node, node);
1455 new_node = fix_mem_proj(new_node, &am);
1460 /* otherwise construct a lea */
1461 new_node = create_lea_from_address(dbgi, new_block, &addr);
1462 SET_IA32_ORIG_NODE(new_node, node);
1467 * Creates an ia32 Mul.
1469 * @return the created ia32 Mul node
1471 static ir_node *gen_Mul(ir_node *node)
1473 ir_node *op1 = get_Mul_left(node);
1474 ir_node *op2 = get_Mul_right(node);
1475 ir_mode *mode = get_irn_mode(node);
1477 if (mode_is_float(mode)) {
1478 if (ia32_cg_config.use_sse2)
1479 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1480 match_commutative | match_am);
1482 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1484 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1485 match_commutative | match_am | match_mode_neutral |
1486 match_immediate | match_am_and_immediates);
1490 * Creates an ia32 Mulh.
1491 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1492 * this result while Mul returns the lower 32 bit.
1494 * @return the created ia32 Mulh node
1496 static ir_node *gen_Mulh(ir_node *node)
1498 dbg_info *dbgi = get_irn_dbg_info(node);
1499 ir_node *op1 = get_Mulh_left(node);
1500 ir_node *op2 = get_Mulh_right(node);
1501 ir_mode *mode = get_irn_mode(node);
1503 ir_node *proj_res_high;
1505 if (get_mode_size_bits(mode) != 32) {
1506 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1509 if (mode_is_signed(mode)) {
1510 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1511 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1513 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1514 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1516 return proj_res_high;
1520 * Creates an ia32 And.
1522 * @return The created ia32 And node
1524 static ir_node *gen_And(ir_node *node)
1526 ir_node *op1 = get_And_left(node);
1527 ir_node *op2 = get_And_right(node);
1528 assert(! mode_is_float(get_irn_mode(node)));
1530 /* is it a zero extension? */
1531 if (is_Const(op2)) {
1532 ir_tarval *tv = get_Const_tarval(op2);
1533 long v = get_tarval_long(tv);
1535 if (v == 0xFF || v == 0xFFFF) {
1536 dbg_info *dbgi = get_irn_dbg_info(node);
1537 ir_node *block = get_nodes_block(node);
1544 assert(v == 0xFFFF);
1547 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1552 return gen_binop(node, op1, op2, new_bd_ia32_And,
1553 match_commutative | match_mode_neutral | match_am | match_immediate);
1557 * Creates an ia32 Or.
1559 * @return The created ia32 Or node
1561 static ir_node *gen_Or(ir_node *node)
1563 ir_node *op1 = get_Or_left(node);
1564 ir_node *op2 = get_Or_right(node);
1567 res = match_64bit_shift(node);
1571 assert (! mode_is_float(get_irn_mode(node)));
1572 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1573 | match_mode_neutral | match_am | match_immediate);
1579 * Creates an ia32 Eor.
1581 * @return The created ia32 Eor node
1583 static ir_node *gen_Eor(ir_node *node)
1585 ir_node *op1 = get_Eor_left(node);
1586 ir_node *op2 = get_Eor_right(node);
1588 assert(! mode_is_float(get_irn_mode(node)));
1589 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1590 | match_mode_neutral | match_am | match_immediate);
1595 * Creates an ia32 Sub.
1597 * @return The created ia32 Sub node
1599 static ir_node *gen_Sub(ir_node *node)
1601 ir_node *op1 = get_Sub_left(node);
1602 ir_node *op2 = get_Sub_right(node);
1603 ir_mode *mode = get_irn_mode(node);
1605 if (mode_is_float(mode)) {
1606 if (ia32_cg_config.use_sse2)
1607 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1609 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1612 if (is_Const(op2)) {
1613 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1617 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1618 | match_am | match_immediate);
1621 static ir_node *transform_AM_mem(ir_node *const block,
1622 ir_node *const src_val,
1623 ir_node *const src_mem,
1624 ir_node *const am_mem)
1626 if (is_NoMem(am_mem)) {
1627 return be_transform_node(src_mem);
1628 } else if (is_Proj(src_val) &&
1630 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1631 /* avoid memory loop */
1633 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1634 ir_node *const ptr_pred = get_Proj_pred(src_val);
1635 int const arity = get_Sync_n_preds(src_mem);
1640 NEW_ARR_A(ir_node*, ins, arity + 1);
1642 /* NOTE: This sometimes produces dead-code because the old sync in
1643 * src_mem might not be used anymore, we should detect this case
1644 * and kill the sync... */
1645 for (i = arity - 1; i >= 0; --i) {
1646 ir_node *const pred = get_Sync_pred(src_mem, i);
1648 /* avoid memory loop */
1649 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1652 ins[n++] = be_transform_node(pred);
1655 if (n==1 && ins[0] == am_mem) {
1657 /* creating a new Sync and relying on CSE may fail,
1658 * if am_mem is a ProjM, which does not yet verify. */
1662 return new_r_Sync(block, n, ins);
1666 ins[0] = be_transform_node(src_mem);
1668 return new_r_Sync(block, 2, ins);
1673 * Create a 32bit to 64bit signed extension.
1675 * @param dbgi debug info
1676 * @param block the block where node nodes should be placed
1677 * @param val the value to extend
1678 * @param orig the original node
1680 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1681 ir_node *val, const ir_node *orig)
1686 if (ia32_cg_config.use_short_sex_eax) {
1687 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1688 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1690 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1691 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1693 SET_IA32_ORIG_NODE(res, orig);
1698 * Generates an ia32 Div with additional infrastructure for the
1699 * register allocator if needed.
1701 static ir_node *create_Div(ir_node *node)
1703 dbg_info *dbgi = get_irn_dbg_info(node);
1704 ir_node *block = get_nodes_block(node);
1705 ir_node *new_block = be_transform_node(block);
1706 int throws_exception = ir_throws_exception(node);
1713 ir_node *sign_extension;
1714 ia32_address_mode_t am;
1715 ia32_address_t *addr = &am.addr;
1717 /* the upper bits have random contents for smaller modes */
1718 switch (get_irn_opcode(node)) {
1720 op1 = get_Div_left(node);
1721 op2 = get_Div_right(node);
1722 mem = get_Div_mem(node);
1723 mode = get_Div_resmode(node);
1726 op1 = get_Mod_left(node);
1727 op2 = get_Mod_right(node);
1728 mem = get_Mod_mem(node);
1729 mode = get_Mod_resmode(node);
1732 panic("invalid divmod node %+F", node);
1735 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv);
1737 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1738 is the memory of the consumed address. We can have only the second op as address
1739 in Div nodes, so check only op2. */
1740 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1742 if (mode_is_signed(mode)) {
1743 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1744 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1745 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1747 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1749 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1750 addr->index, new_mem, am.new_op2,
1751 am.new_op1, sign_extension);
1753 ir_set_throws_exception(new_node, throws_exception);
1755 set_irn_pinned(new_node, get_irn_pinned(node));
1757 set_am_attributes(new_node, &am);
1758 SET_IA32_ORIG_NODE(new_node, node);
1760 new_node = fix_mem_proj(new_node, &am);
1766 * Generates an ia32 Mod.
1768 static ir_node *gen_Mod(ir_node *node)
1770 return create_Div(node);
1774 * Generates an ia32 Div.
1776 static ir_node *gen_Div(ir_node *node)
1778 ir_mode *mode = get_Div_resmode(node);
1779 if (mode_is_float(mode)) {
1780 ir_node *op1 = get_Div_left(node);
1781 ir_node *op2 = get_Div_right(node);
1783 if (ia32_cg_config.use_sse2) {
1784 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1786 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1790 return create_Div(node);
1794 * Creates an ia32 Shl.
1796 * @return The created ia32 Shl node
1798 static ir_node *gen_Shl(ir_node *node)
1800 ir_node *left = get_Shl_left(node);
1801 ir_node *right = get_Shl_right(node);
1803 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1804 match_mode_neutral | match_immediate);
1808 * Creates an ia32 Shr.
1810 * @return The created ia32 Shr node
1812 static ir_node *gen_Shr(ir_node *node)
1814 ir_node *left = get_Shr_left(node);
1815 ir_node *right = get_Shr_right(node);
1817 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
1818 match_immediate | match_zero_ext);
1822 * Creates an ia32 Sar.
1824 * @return The created ia32 Shrs node
1826 static ir_node *gen_Shrs(ir_node *node)
1828 ir_node *left = get_Shrs_left(node);
1829 ir_node *right = get_Shrs_right(node);
1831 if (is_Const(right)) {
1832 ir_tarval *tv = get_Const_tarval(right);
1833 long val = get_tarval_long(tv);
1835 /* this is a sign extension */
1836 dbg_info *dbgi = get_irn_dbg_info(node);
1837 ir_node *block = be_transform_node(get_nodes_block(node));
1838 ir_node *new_op = be_transform_node(left);
1840 return create_sex_32_64(dbgi, block, new_op, node);
1844 /* 8 or 16 bit sign extension? */
1845 if (is_Const(right) && is_Shl(left)) {
1846 ir_node *shl_left = get_Shl_left(left);
1847 ir_node *shl_right = get_Shl_right(left);
1848 if (is_Const(shl_right)) {
1849 ir_tarval *tv1 = get_Const_tarval(right);
1850 ir_tarval *tv2 = get_Const_tarval(shl_right);
1851 if (tv1 == tv2 && tarval_is_long(tv1)) {
1852 long val = get_tarval_long(tv1);
1853 if (val == 16 || val == 24) {
1854 dbg_info *dbgi = get_irn_dbg_info(node);
1855 ir_node *block = get_nodes_block(node);
1865 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1874 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
1875 match_immediate | match_upconv);
1881 * Creates an ia32 Rol.
1883 * @param op1 The first operator
1884 * @param op2 The second operator
1885 * @return The created ia32 RotL node
1887 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1889 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1895 * Creates an ia32 Ror.
1896 * NOTE: There is no RotR with immediate because this would always be a RotL
1897 * "imm-mode_size_bits" which can be pre-calculated.
1899 * @param op1 The first operator
1900 * @param op2 The second operator
1901 * @return The created ia32 RotR node
1903 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1905 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1911 * Creates an ia32 RotR or RotL (depending on the found pattern).
1913 * @return The created ia32 RotL or RotR node
1915 static ir_node *gen_Rotl(ir_node *node)
1917 ir_node *op1 = get_Rotl_left(node);
1918 ir_node *op2 = get_Rotl_right(node);
1920 if (is_Minus(op2)) {
1921 return gen_Ror(node, op1, get_Minus_op(op2));
1924 return gen_Rol(node, op1, op2);
1930 * Transforms a Minus node.
1932 * @return The created ia32 Minus node
1934 static ir_node *gen_Minus(ir_node *node)
1936 ir_node *op = get_Minus_op(node);
1937 ir_node *block = be_transform_node(get_nodes_block(node));
1938 dbg_info *dbgi = get_irn_dbg_info(node);
1939 ir_mode *mode = get_irn_mode(node);
1944 if (mode_is_float(mode)) {
1945 ir_node *new_op = be_transform_node(op);
1946 if (ia32_cg_config.use_sse2) {
1947 /* TODO: non-optimal... if we have many xXors, then we should
1948 * rather create a load for the const and use that instead of
1949 * several AM nodes... */
1950 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1952 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1953 noreg_GP, nomem, new_op, noreg_xmm);
1955 size = get_mode_size_bits(mode);
1956 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1958 set_ia32_am_sc(new_node, ent);
1959 set_ia32_op_type(new_node, ia32_AddrModeS);
1960 set_ia32_ls_mode(new_node, mode);
1962 check_x87_floatmode(mode);
1963 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1966 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1969 SET_IA32_ORIG_NODE(new_node, node);
1975 * Transforms a Not node.
1977 * @return The created ia32 Not node
1979 static ir_node *gen_Not(ir_node *node)
1981 ir_node *op = get_Not_op(node);
1983 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1984 assert (! mode_is_float(get_irn_mode(node)));
1986 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1989 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1990 bool negate, ir_node *node)
1992 ir_node *new_block = be_transform_node(block);
1993 ir_mode *mode = get_irn_mode(op);
1994 ir_node *new_op = be_transform_node(op);
1999 assert(mode_is_float(mode));
2001 if (ia32_cg_config.use_sse2) {
2002 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
2003 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
2004 noreg_GP, nomem, new_op, noreg_fp);
2006 size = get_mode_size_bits(mode);
2007 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
2009 set_ia32_am_sc(new_node, ent);
2011 SET_IA32_ORIG_NODE(new_node, node);
2013 set_ia32_op_type(new_node, ia32_AddrModeS);
2014 set_ia32_ls_mode(new_node, mode);
2016 /* TODO, implement -Abs case */
2019 check_x87_floatmode(mode);
2020 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
2021 SET_IA32_ORIG_NODE(new_node, node);
2023 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
2024 SET_IA32_ORIG_NODE(new_node, node);
2032 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2034 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2036 dbg_info *dbgi = get_irn_dbg_info(cmp);
2037 ir_node *block = get_nodes_block(cmp);
2038 ir_node *new_block = be_transform_node(block);
2039 ir_node *op1 = be_transform_node(x);
2040 ir_node *op2 = be_transform_node(n);
2042 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2045 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2047 bool overflow_possible)
2049 if (mode_is_float(mode)) {
2051 case ir_relation_equal: return ia32_cc_float_equal;
2052 case ir_relation_less: return ia32_cc_float_below;
2053 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2054 case ir_relation_greater: return ia32_cc_float_above;
2055 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2056 case ir_relation_less_greater: return ia32_cc_not_equal;
2057 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2058 case ir_relation_unordered: return ia32_cc_parity;
2059 case ir_relation_unordered_equal: return ia32_cc_equal;
2060 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2061 case ir_relation_unordered_less_equal:
2062 return ia32_cc_float_unordered_below_equal;
2063 case ir_relation_unordered_greater:
2064 return ia32_cc_float_unordered_above;
2065 case ir_relation_unordered_greater_equal:
2066 return ia32_cc_float_unordered_above_equal;
2067 case ir_relation_unordered_less_greater:
2068 return ia32_cc_float_not_equal;
2069 case ir_relation_false:
2070 case ir_relation_true:
2071 /* should we introduce a jump always/jump never? */
2074 panic("Unexpected float pnc");
2075 } else if (mode_is_signed(mode)) {
2077 case ir_relation_unordered_equal:
2078 case ir_relation_equal: return ia32_cc_equal;
2079 case ir_relation_unordered_less:
2080 case ir_relation_less:
2081 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2082 case ir_relation_unordered_less_equal:
2083 case ir_relation_less_equal: return ia32_cc_less_equal;
2084 case ir_relation_unordered_greater:
2085 case ir_relation_greater: return ia32_cc_greater;
2086 case ir_relation_unordered_greater_equal:
2087 case ir_relation_greater_equal:
2088 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2089 case ir_relation_unordered_less_greater:
2090 case ir_relation_less_greater: return ia32_cc_not_equal;
2091 case ir_relation_less_equal_greater:
2092 case ir_relation_unordered:
2093 case ir_relation_false:
2094 case ir_relation_true:
2095 /* introduce jump always/jump never? */
2098 panic("Unexpected pnc");
2101 case ir_relation_unordered_equal:
2102 case ir_relation_equal: return ia32_cc_equal;
2103 case ir_relation_unordered_less:
2104 case ir_relation_less: return ia32_cc_below;
2105 case ir_relation_unordered_less_equal:
2106 case ir_relation_less_equal: return ia32_cc_below_equal;
2107 case ir_relation_unordered_greater:
2108 case ir_relation_greater: return ia32_cc_above;
2109 case ir_relation_unordered_greater_equal:
2110 case ir_relation_greater_equal: return ia32_cc_above_equal;
2111 case ir_relation_unordered_less_greater:
2112 case ir_relation_less_greater: return ia32_cc_not_equal;
2113 case ir_relation_less_equal_greater:
2114 case ir_relation_unordered:
2115 case ir_relation_false:
2116 case ir_relation_true:
2117 /* introduce jump always/jump never? */
2120 panic("Unexpected pnc");
2124 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2126 /* must have a Cmp as input */
2127 ir_relation relation = get_Cmp_relation(cmp);
2128 ir_node *l = get_Cmp_left(cmp);
2129 ir_node *r = get_Cmp_right(cmp);
2130 ir_mode *mode = get_irn_mode(l);
2131 bool overflow_possible;
2134 /* check for bit-test */
2135 if (ia32_cg_config.use_bt
2136 && (relation == ir_relation_equal
2137 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2138 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2140 ir_node *la = get_And_left(l);
2141 ir_node *ra = get_And_right(l);
2148 ir_node *c = get_Shl_left(la);
2149 if (is_Const_1(c) && is_Const_0(r)) {
2150 /* (1 << n) & ra) */
2151 ir_node *n = get_Shl_right(la);
2152 flags = gen_bt(cmp, ra, n);
2153 /* the bit is copied into the CF flag */
2154 if (relation & ir_relation_equal)
2155 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2157 *cc_out = ia32_cc_below; /* test for CF=1 */
2163 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2164 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2165 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2166 * a predecessor node). So add the < bit.
2167 * (Note that we do not want to produce <=> (which can happen for
2168 * unoptimized code), because no x86 flag can represent that */
2169 if (!(relation & ir_relation_equal) && relation & ir_relation_less_greater)
2170 relation |= get_negated_relation(ir_get_possible_cmp_relations(l, r)) & ir_relation_less_greater;
2172 overflow_possible = true;
2173 if (is_Const(r) && is_Const_null(r))
2174 overflow_possible = false;
2176 /* just do a normal transformation of the Cmp */
2177 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2178 flags = be_transform_node(cmp);
2183 * Transforms a Load.
2185 * @return the created ia32 Load node
2187 static ir_node *gen_Load(ir_node *node)
2189 ir_node *old_block = get_nodes_block(node);
2190 ir_node *block = be_transform_node(old_block);
2191 ir_node *ptr = get_Load_ptr(node);
2192 ir_node *mem = get_Load_mem(node);
2193 ir_node *new_mem = be_transform_node(mem);
2194 dbg_info *dbgi = get_irn_dbg_info(node);
2195 ir_mode *mode = get_Load_mode(node);
2196 int throws_exception = ir_throws_exception(node);
2200 ia32_address_t addr;
2202 /* construct load address */
2203 memset(&addr, 0, sizeof(addr));
2204 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2211 base = be_transform_node(base);
2217 idx = be_transform_node(idx);
2220 if (mode_is_float(mode)) {
2221 if (ia32_cg_config.use_sse2) {
2222 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2225 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2229 assert(mode != mode_b);
2231 /* create a conv node with address mode for smaller modes */
2232 if (get_mode_size_bits(mode) < 32) {
2233 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2234 new_mem, noreg_GP, mode);
2236 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2239 ir_set_throws_exception(new_node, throws_exception);
2241 set_irn_pinned(new_node, get_irn_pinned(node));
2242 set_ia32_op_type(new_node, ia32_AddrModeS);
2243 set_ia32_ls_mode(new_node, mode);
2244 set_address(new_node, &addr);
2246 if (get_irn_pinned(node) == op_pin_state_floats) {
2247 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2248 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2249 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2250 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2253 SET_IA32_ORIG_NODE(new_node, node);
2258 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2259 ir_node *ptr, ir_node *other)
2266 /* we only use address mode if we're the only user of the load */
2267 if (get_irn_n_edges(node) > 1)
2270 load = get_Proj_pred(node);
2273 if (get_nodes_block(load) != block)
2276 /* store should have the same pointer as the load */
2277 if (get_Load_ptr(load) != ptr)
2280 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2281 if (other != NULL &&
2282 get_nodes_block(other) == block &&
2283 heights_reachable_in_block(ia32_heights, other, load)) {
2287 if (ia32_prevents_AM(block, load, mem))
2289 /* Store should be attached to the load via mem */
2290 assert(heights_reachable_in_block(ia32_heights, mem, load));
2295 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2296 ir_node *mem, ir_node *ptr, ir_mode *mode,
2297 construct_binop_dest_func *func,
2298 construct_binop_dest_func *func8bit,
2299 match_flags_t flags)
2301 ir_node *src_block = get_nodes_block(node);
2309 ia32_address_mode_t am;
2310 ia32_address_t *addr = &am.addr;
2311 memset(&am, 0, sizeof(am));
2313 assert(flags & match_immediate); /* there is no destam node without... */
2314 commutative = (flags & match_commutative) != 0;
2316 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2317 build_address(&am, op1, ia32_create_am_double_use);
2318 new_op = create_immediate_or_transform(op2, 0);
2319 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2320 build_address(&am, op2, ia32_create_am_double_use);
2321 new_op = create_immediate_or_transform(op1, 0);
2326 if (addr->base == NULL)
2327 addr->base = noreg_GP;
2328 if (addr->index == NULL)
2329 addr->index = noreg_GP;
2330 if (addr->mem == NULL)
2333 dbgi = get_irn_dbg_info(node);
2334 block = be_transform_node(src_block);
2335 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2337 if (get_mode_size_bits(mode) == 8) {
2338 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2340 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2342 set_address(new_node, addr);
2343 set_ia32_op_type(new_node, ia32_AddrModeD);
2344 set_ia32_ls_mode(new_node, mode);
2345 SET_IA32_ORIG_NODE(new_node, node);
2347 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2348 mem_proj = be_transform_node(am.mem_proj);
2349 be_set_transformed_node(am.mem_proj, new_node);
2350 be_set_transformed_node(mem_proj, new_node);
2355 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2356 ir_node *ptr, ir_mode *mode,
2357 construct_unop_dest_func *func)
2359 ir_node *src_block = get_nodes_block(node);
2365 ia32_address_mode_t am;
2366 ia32_address_t *addr = &am.addr;
2368 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2371 memset(&am, 0, sizeof(am));
2372 build_address(&am, op, ia32_create_am_double_use);
2374 dbgi = get_irn_dbg_info(node);
2375 block = be_transform_node(src_block);
2376 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2377 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2378 set_address(new_node, addr);
2379 set_ia32_op_type(new_node, ia32_AddrModeD);
2380 set_ia32_ls_mode(new_node, mode);
2381 SET_IA32_ORIG_NODE(new_node, node);
2383 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2384 mem_proj = be_transform_node(am.mem_proj);
2385 be_set_transformed_node(am.mem_proj, new_node);
2386 be_set_transformed_node(mem_proj, new_node);
2391 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2393 ir_mode *mode = get_irn_mode(node);
2394 ir_node *mux_true = get_Mux_true(node);
2395 ir_node *mux_false = get_Mux_false(node);
2403 ia32_condition_code_t cc;
2404 ia32_address_t addr;
2406 if (get_mode_size_bits(mode) != 8)
2409 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2411 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2417 cond = get_Mux_sel(node);
2418 flags = get_flags_node(cond, &cc);
2419 /* we can't handle the float special cases with SetM */
2420 if (cc & ia32_cc_additional_float_cases)
2423 cc = ia32_negate_condition_code(cc);
2425 build_address_ptr(&addr, ptr, mem);
2427 dbgi = get_irn_dbg_info(node);
2428 block = get_nodes_block(node);
2429 new_block = be_transform_node(block);
2430 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2431 addr.index, addr.mem, flags, cc);
2432 set_address(new_node, &addr);
2433 set_ia32_op_type(new_node, ia32_AddrModeD);
2434 set_ia32_ls_mode(new_node, mode);
2435 SET_IA32_ORIG_NODE(new_node, node);
2440 static ir_node *try_create_dest_am(ir_node *node)
2442 ir_node *val = get_Store_value(node);
2443 ir_node *mem = get_Store_mem(node);
2444 ir_node *ptr = get_Store_ptr(node);
2445 ir_mode *mode = get_irn_mode(val);
2446 unsigned bits = get_mode_size_bits(mode);
2451 /* handle only GP modes for now... */
2452 if (!ia32_mode_needs_gp_reg(mode))
2456 /* store must be the only user of the val node */
2457 if (get_irn_n_edges(val) > 1)
2459 /* skip pointless convs */
2461 ir_node *conv_op = get_Conv_op(val);
2462 ir_mode *pred_mode = get_irn_mode(conv_op);
2463 if (!ia32_mode_needs_gp_reg(pred_mode))
2465 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2473 /* value must be in the same block */
2474 if (get_nodes_block(node) != get_nodes_block(val))
2477 switch (get_irn_opcode(val)) {
2479 op1 = get_Add_left(val);
2480 op2 = get_Add_right(val);
2481 if (ia32_cg_config.use_incdec) {
2482 if (is_Const_1(op2)) {
2483 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2485 } else if (is_Const_Minus_1(op2)) {
2486 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2490 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2491 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2492 match_commutative | match_immediate);
2495 op1 = get_Sub_left(val);
2496 op2 = get_Sub_right(val);
2497 if (is_Const(op2)) {
2498 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2500 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2501 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2505 op1 = get_And_left(val);
2506 op2 = get_And_right(val);
2507 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2508 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2509 match_commutative | match_immediate);
2512 op1 = get_Or_left(val);
2513 op2 = get_Or_right(val);
2514 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2515 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2516 match_commutative | match_immediate);
2519 op1 = get_Eor_left(val);
2520 op2 = get_Eor_right(val);
2521 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2522 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2523 match_commutative | match_immediate);
2526 op1 = get_Shl_left(val);
2527 op2 = get_Shl_right(val);
2528 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2529 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2533 op1 = get_Shr_left(val);
2534 op2 = get_Shr_right(val);
2535 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2536 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2540 op1 = get_Shrs_left(val);
2541 op2 = get_Shrs_right(val);
2542 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2543 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2547 op1 = get_Rotl_left(val);
2548 op2 = get_Rotl_right(val);
2549 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2550 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2553 /* TODO: match ROR patterns... */
2555 new_node = try_create_SetMem(val, ptr, mem);
2559 op1 = get_Minus_op(val);
2560 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2563 /* should be lowered already */
2564 assert(mode != mode_b);
2565 op1 = get_Not_op(val);
2566 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2572 if (new_node != NULL) {
2573 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2574 get_irn_pinned(node) == op_pin_state_pinned) {
2575 set_irn_pinned(new_node, op_pin_state_pinned);
2582 static bool possible_int_mode_for_fp(ir_mode *mode)
2586 if (!mode_is_signed(mode))
2588 size = get_mode_size_bits(mode);
2589 if (size != 16 && size != 32)
2594 static int is_float_to_int_conv(const ir_node *node)
2596 ir_mode *mode = get_irn_mode(node);
2600 if (!possible_int_mode_for_fp(mode))
2605 conv_op = get_Conv_op(node);
2606 conv_mode = get_irn_mode(conv_op);
2608 if (!mode_is_float(conv_mode))
2615 * Transform a Store(floatConst) into a sequence of
2618 * @return the created ia32 Store node
2620 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2622 ir_mode *mode = get_irn_mode(cns);
2623 unsigned size = get_mode_size_bytes(mode);
2624 ir_tarval *tv = get_Const_tarval(cns);
2625 ir_node *block = get_nodes_block(node);
2626 ir_node *new_block = be_transform_node(block);
2627 ir_node *ptr = get_Store_ptr(node);
2628 ir_node *mem = get_Store_mem(node);
2629 dbg_info *dbgi = get_irn_dbg_info(node);
2632 int throws_exception = ir_throws_exception(node);
2634 ia32_address_t addr;
2636 build_address_ptr(&addr, ptr, mem);
2643 val= get_tarval_sub_bits(tv, ofs) |
2644 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2645 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2646 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2649 } else if (size >= 2) {
2650 val= get_tarval_sub_bits(tv, ofs) |
2651 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2655 panic("invalid size of Store float to mem (%+F)", node);
2657 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2659 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2660 addr.index, addr.mem, imm);
2661 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2663 ir_set_throws_exception(new_node, throws_exception);
2664 set_irn_pinned(new_node, get_irn_pinned(node));
2665 set_ia32_op_type(new_node, ia32_AddrModeD);
2666 set_ia32_ls_mode(new_node, mode);
2667 set_address(new_node, &addr);
2668 SET_IA32_ORIG_NODE(new_node, node);
2675 addr.offset += delta;
2676 } while (size != 0);
2679 return new_rd_Sync(dbgi, new_block, i, ins);
2681 return get_Proj_pred(ins[0]);
2686 * Generate a vfist or vfisttp instruction.
2688 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2689 ir_node *index, ir_node *mem, ir_node *val)
2691 if (ia32_cg_config.use_fisttp) {
2692 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2693 if other users exists */
2694 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2695 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_vfisttp_res);
2696 be_new_Keep(block, 1, &value);
2700 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2703 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2709 * Transforms a general (no special case) Store.
2711 * @return the created ia32 Store node
2713 static ir_node *gen_general_Store(ir_node *node)
2715 ir_node *val = get_Store_value(node);
2716 ir_mode *mode = get_irn_mode(val);
2717 ir_node *block = get_nodes_block(node);
2718 ir_node *new_block = be_transform_node(block);
2719 ir_node *ptr = get_Store_ptr(node);
2720 ir_node *mem = get_Store_mem(node);
2721 dbg_info *dbgi = get_irn_dbg_info(node);
2722 int throws_exception = ir_throws_exception(node);
2725 ia32_address_t addr;
2727 /* check for destination address mode */
2728 new_node = try_create_dest_am(node);
2729 if (new_node != NULL)
2732 /* construct store address */
2733 memset(&addr, 0, sizeof(addr));
2734 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2736 if (addr.base == NULL) {
2737 addr.base = noreg_GP;
2739 addr.base = be_transform_node(addr.base);
2742 if (addr.index == NULL) {
2743 addr.index = noreg_GP;
2745 addr.index = be_transform_node(addr.index);
2747 addr.mem = be_transform_node(mem);
2749 if (mode_is_float(mode)) {
2750 new_val = be_transform_node(val);
2751 if (ia32_cg_config.use_sse2) {
2752 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2753 addr.index, addr.mem, new_val);
2755 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2756 addr.index, addr.mem, new_val, mode);
2758 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2759 val = get_Conv_op(val);
2760 new_val = be_transform_node(val);
2761 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2763 unsigned dest_bits = get_mode_size_bits(mode);
2764 while (is_downconv(val)
2765 && get_mode_size_bits(get_irn_mode(val)) >= dest_bits) {
2766 val = get_Conv_op(val);
2768 new_val = create_immediate_or_transform(val, 0);
2769 assert(mode != mode_b);
2771 if (dest_bits == 8) {
2772 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2773 addr.index, addr.mem, new_val);
2775 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2776 addr.index, addr.mem, new_val);
2779 ir_set_throws_exception(new_node, throws_exception);
2781 set_irn_pinned(new_node, get_irn_pinned(node));
2782 set_ia32_op_type(new_node, ia32_AddrModeD);
2783 set_ia32_ls_mode(new_node, mode);
2785 set_address(new_node, &addr);
2786 SET_IA32_ORIG_NODE(new_node, node);
2792 * Transforms a Store.
2794 * @return the created ia32 Store node
2796 static ir_node *gen_Store(ir_node *node)
2798 ir_node *val = get_Store_value(node);
2799 ir_mode *mode = get_irn_mode(val);
2801 if (mode_is_float(mode) && is_Const(val)) {
2802 /* We can transform every floating const store
2803 into a sequence of integer stores.
2804 If the constant is already in a register,
2805 it would be better to use it, but we don't
2806 have this information here. */
2807 return gen_float_const_Store(node, val);
2809 return gen_general_Store(node);
2813 * Transforms a Switch.
2815 * @return the created ia32 SwitchJmp node
2817 static ir_node *gen_Switch(ir_node *node)
2819 dbg_info *dbgi = get_irn_dbg_info(node);
2820 ir_graph *irg = get_irn_irg(node);
2821 ir_node *block = be_transform_node(get_nodes_block(node));
2822 ir_node *sel = get_Switch_selector(node);
2823 ir_node *new_sel = be_transform_node(sel);
2824 ir_mode *sel_mode = get_irn_mode(sel);
2825 const ir_switch_table *table = get_Switch_table(node);
2826 unsigned n_outs = get_Switch_n_outs(node);
2830 assert(get_mode_size_bits(sel_mode) <= 32);
2831 assert(!mode_is_float(sel_mode));
2832 sel = ia32_skip_sameconv(sel);
2833 if (get_mode_size_bits(sel_mode) < 32)
2834 new_sel = transform_upconv(sel, node);
2836 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2837 set_entity_visibility(entity, ir_visibility_private);
2838 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2840 table = ir_switch_table_duplicate(irg, table);
2842 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2843 set_ia32_am_scale(new_node, 2);
2844 set_ia32_am_sc(new_node, entity);
2845 set_ia32_op_type(new_node, ia32_AddrModeS);
2846 set_ia32_ls_mode(new_node, mode_Iu);
2847 SET_IA32_ORIG_NODE(new_node, node);
2848 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2849 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2855 * Transform a Cond node.
2857 static ir_node *gen_Cond(ir_node *node)
2859 ir_node *block = get_nodes_block(node);
2860 ir_node *new_block = be_transform_node(block);
2861 dbg_info *dbgi = get_irn_dbg_info(node);
2862 ir_node *sel = get_Cond_selector(node);
2863 ir_node *flags = NULL;
2865 ia32_condition_code_t cc;
2867 /* we get flags from a Cmp */
2868 flags = get_flags_node(sel, &cc);
2870 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2871 SET_IA32_ORIG_NODE(new_node, node);
2877 * Transform a be_Copy.
2879 static ir_node *gen_be_Copy(ir_node *node)
2881 ir_node *new_node = be_duplicate_node(node);
2882 ir_mode *mode = get_irn_mode(new_node);
2884 if (ia32_mode_needs_gp_reg(mode)) {
2885 set_irn_mode(new_node, mode_Iu);
2891 static ir_node *create_Fucom(ir_node *node)
2893 dbg_info *dbgi = get_irn_dbg_info(node);
2894 ir_node *block = get_nodes_block(node);
2895 ir_node *new_block = be_transform_node(block);
2896 ir_node *left = get_Cmp_left(node);
2897 ir_node *new_left = be_transform_node(left);
2898 ir_node *right = get_Cmp_right(node);
2899 ir_mode *cmp_mode = get_irn_mode(left);
2902 check_x87_floatmode(cmp_mode);
2904 if (ia32_cg_config.use_fucomi) {
2905 new_right = be_transform_node(right);
2906 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2908 set_ia32_commutative(new_node);
2909 SET_IA32_ORIG_NODE(new_node, node);
2911 if (is_Const_0(right)) {
2912 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2914 new_right = be_transform_node(right);
2915 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2916 set_ia32_commutative(new_node);
2919 SET_IA32_ORIG_NODE(new_node, node);
2921 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2922 SET_IA32_ORIG_NODE(new_node, node);
2928 static ir_node *create_Ucomi(ir_node *node)
2930 dbg_info *dbgi = get_irn_dbg_info(node);
2931 ir_node *src_block = get_nodes_block(node);
2932 ir_node *new_block = be_transform_node(src_block);
2933 ir_node *left = get_Cmp_left(node);
2934 ir_node *right = get_Cmp_right(node);
2936 ia32_address_mode_t am;
2937 ia32_address_t *addr = &am.addr;
2939 match_arguments(&am, src_block, left, right, NULL,
2940 match_commutative | match_am);
2942 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2943 addr->mem, am.new_op1, am.new_op2,
2945 set_am_attributes(new_node, &am);
2947 SET_IA32_ORIG_NODE(new_node, node);
2949 new_node = fix_mem_proj(new_node, &am);
2954 static bool ia32_mux_upper_bits_clean(const ir_node *node, ir_mode *mode)
2956 ir_node *mux_true = get_Mux_true(node);
2957 ir_node *mux_false = get_Mux_false(node);
2958 ir_mode *mux_mode = get_irn_mode(node);
2959 /* mux nodes which get transformed to the set instruction are not clean */
2960 if (is_Const(mux_true) && is_Const(mux_false)
2961 && get_mode_size_bits(mux_mode) == 8) {
2964 return be_upper_bits_clean(mux_true, mode)
2965 && be_upper_bits_clean(mux_false, mode);
2969 * Generate code for a Cmp.
2971 static ir_node *gen_Cmp(ir_node *node)
2973 dbg_info *dbgi = get_irn_dbg_info(node);
2974 ir_node *block = get_nodes_block(node);
2975 ir_node *new_block = be_transform_node(block);
2976 ir_node *left = get_Cmp_left(node);
2977 ir_node *right = get_Cmp_right(node);
2978 ir_mode *cmp_mode = get_irn_mode(left);
2980 ia32_address_mode_t am;
2981 ia32_address_t *addr = &am.addr;
2983 if (mode_is_float(cmp_mode)) {
2984 if (ia32_cg_config.use_sse2) {
2985 return create_Ucomi(node);
2987 return create_Fucom(node);
2991 assert(ia32_mode_needs_gp_reg(cmp_mode));
2993 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2994 if (is_Const_0(right) &&
2996 get_irn_n_edges(left) == 1) {
2997 /* Test(and_left, and_right) */
2998 ir_node *and_left = get_And_left(left);
2999 ir_node *and_right = get_And_right(left);
3001 /* matze: code here used mode instead of cmd_mode, I think it is always
3002 * the same as cmp_mode, but I leave this here to see if this is really
3005 assert(get_irn_mode(and_left) == cmp_mode);
3007 match_arguments(&am, block, and_left, and_right, NULL,
3009 match_am | match_8bit_am | match_16bit_am |
3010 match_am_and_immediates | match_immediate);
3012 /* use 32bit compare mode if possible since the opcode is smaller */
3013 if (am.op_type == ia32_Normal &&
3014 be_upper_bits_clean(and_left, cmp_mode) &&
3015 be_upper_bits_clean(and_right, cmp_mode)) {
3016 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3019 if (get_mode_size_bits(cmp_mode) == 8) {
3020 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3021 addr->index, addr->mem,
3022 am.new_op1, am.new_op2,
3025 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base,
3026 addr->index, addr->mem, am.new_op1,
3027 am.new_op2, am.ins_permuted);
3030 /* Cmp(left, right) */
3031 match_arguments(&am, block, left, right, NULL,
3033 match_am | match_8bit_am | match_16bit_am |
3034 match_am_and_immediates | match_immediate);
3035 /* use 32bit compare mode if possible since the opcode is smaller */
3036 if (am.op_type == ia32_Normal &&
3037 be_upper_bits_clean(left, cmp_mode) &&
3038 be_upper_bits_clean(right, cmp_mode)) {
3039 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3042 if (get_mode_size_bits(cmp_mode) == 8) {
3043 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3044 addr->index, addr->mem, am.new_op1,
3045 am.new_op2, am.ins_permuted);
3047 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3048 addr->mem, am.new_op1, am.new_op2,
3052 set_am_attributes(new_node, &am);
3053 set_ia32_ls_mode(new_node, cmp_mode);
3055 SET_IA32_ORIG_NODE(new_node, node);
3057 new_node = fix_mem_proj(new_node, &am);
3062 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3063 ia32_condition_code_t cc)
3065 dbg_info *dbgi = get_irn_dbg_info(node);
3066 ir_node *block = get_nodes_block(node);
3067 ir_node *new_block = be_transform_node(block);
3068 ir_node *val_true = get_Mux_true(node);
3069 ir_node *val_false = get_Mux_false(node);
3071 ia32_address_mode_t am;
3072 ia32_address_t *addr;
3074 assert(ia32_cg_config.use_cmov);
3075 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3079 match_arguments(&am, block, val_false, val_true, flags,
3080 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3082 if (am.ins_permuted)
3083 cc = ia32_negate_condition_code(cc);
3085 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3086 addr->mem, am.new_op1, am.new_op2, new_flags,
3088 set_am_attributes(new_node, &am);
3090 SET_IA32_ORIG_NODE(new_node, node);
3092 new_node = fix_mem_proj(new_node, &am);
3098 * Creates a ia32 Setcc instruction.
3100 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3101 ir_node *flags, ia32_condition_code_t cc,
3104 ir_mode *mode = get_irn_mode(orig_node);
3107 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3108 SET_IA32_ORIG_NODE(new_node, orig_node);
3110 /* we might need to conv the result up */
3111 if (get_mode_size_bits(mode) > 8) {
3112 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3113 nomem, new_node, mode_Bu);
3114 SET_IA32_ORIG_NODE(new_node, orig_node);
3121 * Create instruction for an unsigned Difference or Zero.
3123 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3125 ir_mode *mode = get_irn_mode(psi);
3135 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3136 match_mode_neutral | match_am | match_immediate | match_two_users);
3138 block = get_nodes_block(new_node);
3140 if (is_Proj(new_node)) {
3141 sub = get_Proj_pred(new_node);
3144 set_irn_mode(sub, mode_T);
3145 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3147 assert(is_ia32_Sub(sub));
3148 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3150 dbgi = get_irn_dbg_info(psi);
3151 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3152 set_ia32_ls_mode(sbb, mode_Iu);
3153 notn = new_bd_ia32_Not(dbgi, block, sbb);
3155 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3156 set_ia32_ls_mode(new_node, mode_Iu);
3157 set_ia32_commutative(new_node);
3162 * Create an const array of two float consts.
3164 * @param c0 the first constant
3165 * @param c1 the second constant
3166 * @param new_mode IN/OUT for the mode of the constants, if NULL
3167 * smallest possible mode will be used
3169 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3172 ir_mode *mode = *new_mode;
3174 ir_initializer_t *initializer;
3175 ir_tarval *tv0 = get_Const_tarval(c0);
3176 ir_tarval *tv1 = get_Const_tarval(c1);
3179 /* detect the best mode for the constants */
3180 mode = get_tarval_mode(tv0);
3182 if (mode != mode_F) {
3183 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3184 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3186 tv0 = tarval_convert_to(tv0, mode);
3187 tv1 = tarval_convert_to(tv1, mode);
3188 } else if (mode != mode_D) {
3189 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3190 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3192 tv0 = tarval_convert_to(tv0, mode);
3193 tv1 = tarval_convert_to(tv1, mode);
3200 tp = ia32_get_prim_type(mode);
3201 tp = ia32_create_float_array(tp);
3203 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3205 set_entity_ld_ident(ent, get_entity_ident(ent));
3206 set_entity_visibility(ent, ir_visibility_private);
3207 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3209 initializer = create_initializer_compound(2);
3211 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3212 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3214 set_entity_initializer(ent, initializer);
3221 * Possible transformations for creating a Setcc.
3223 enum setcc_transform_insn {
3235 typedef struct setcc_transform {
3237 ia32_condition_code_t cc;
3239 enum setcc_transform_insn transform;
3243 } setcc_transform_t;
3246 * Setcc can only handle 0 and 1 result.
3247 * Find a transformation that creates 0 and 1 from
3250 static void find_const_transform(ia32_condition_code_t cc,
3251 ir_tarval *t, ir_tarval *f,
3252 setcc_transform_t *res)
3258 if (tarval_is_null(t)) {
3262 cc = ia32_negate_condition_code(cc);
3263 } else if (tarval_cmp(t, f) == ir_relation_less) {
3264 // now, t is the bigger one
3268 cc = ia32_negate_condition_code(cc);
3272 if (! tarval_is_null(f)) {
3273 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3276 res->steps[step].transform = SETCC_TR_ADD;
3278 if (t == tarval_bad)
3279 panic("constant subtract failed");
3280 if (! tarval_is_long(f))
3281 panic("tarval is not long");
3283 res->steps[step].val = get_tarval_long(f);
3285 f = tarval_sub(f, f, NULL);
3286 assert(tarval_is_null(f));
3289 if (tarval_is_one(t)) {
3290 res->steps[step].transform = SETCC_TR_SET;
3291 res->num_steps = ++step;
3295 if (tarval_is_minus_one(t)) {
3296 res->steps[step].transform = SETCC_TR_NEG;
3298 res->steps[step].transform = SETCC_TR_SET;
3299 res->num_steps = ++step;
3302 if (tarval_is_long(t)) {
3303 long v = get_tarval_long(t);
3305 res->steps[step].val = 0;
3308 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3310 res->steps[step].transform = SETCC_TR_LEAxx;
3311 res->steps[step].scale = 3; /* (a << 3) + a */
3314 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3316 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3317 res->steps[step].scale = 3; /* (a << 3) */
3320 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3322 res->steps[step].transform = SETCC_TR_LEAxx;
3323 res->steps[step].scale = 2; /* (a << 2) + a */
3326 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3328 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3329 res->steps[step].scale = 2; /* (a << 2) */
3332 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3334 res->steps[step].transform = SETCC_TR_LEAxx;
3335 res->steps[step].scale = 1; /* (a << 1) + a */
3338 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3340 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3341 res->steps[step].scale = 1; /* (a << 1) */
3344 res->num_steps = step;
3347 if (! tarval_is_single_bit(t)) {
3348 res->steps[step].transform = SETCC_TR_AND;
3349 res->steps[step].val = v;
3351 res->steps[step].transform = SETCC_TR_NEG;
3353 int val = get_tarval_lowest_bit(t);
3356 res->steps[step].transform = SETCC_TR_SHL;
3357 res->steps[step].scale = val;
3361 res->steps[step].transform = SETCC_TR_SET;
3362 res->num_steps = ++step;
3365 panic("tarval is not long");
3369 * Transforms a Mux node into some code sequence.
3371 * @return The transformed node.
3373 static ir_node *gen_Mux(ir_node *node)
3375 dbg_info *dbgi = get_irn_dbg_info(node);
3376 ir_node *block = get_nodes_block(node);
3377 ir_node *new_block = be_transform_node(block);
3378 ir_node *mux_true = get_Mux_true(node);
3379 ir_node *mux_false = get_Mux_false(node);
3380 ir_node *sel = get_Mux_sel(node);
3381 ir_mode *mode = get_irn_mode(node);
3385 ia32_condition_code_t cc;
3387 assert(get_irn_mode(sel) == mode_b);
3389 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3391 if (ia32_mode_needs_gp_reg(mode)) {
3392 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3395 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3396 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3400 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3401 if (mode_is_float(mode)) {
3402 ir_node *cmp_left = get_Cmp_left(sel);
3403 ir_node *cmp_right = get_Cmp_right(sel);
3404 ir_relation relation = get_Cmp_relation(sel);
3406 if (ia32_cg_config.use_sse2) {
3407 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3408 if (cmp_left == mux_true && cmp_right == mux_false) {
3409 /* Mux(a <= b, a, b) => MIN */
3410 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3411 match_commutative | match_am | match_two_users);
3412 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3413 /* Mux(a <= b, b, a) => MAX */
3414 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3415 match_commutative | match_am | match_two_users);
3417 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3418 if (cmp_left == mux_true && cmp_right == mux_false) {
3419 /* Mux(a >= b, a, b) => MAX */
3420 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3421 match_commutative | match_am | match_two_users);
3422 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3423 /* Mux(a >= b, b, a) => MIN */
3424 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3425 match_commutative | match_am | match_two_users);
3430 if (is_Const(mux_true) && is_Const(mux_false)) {
3431 ia32_address_mode_t am;
3436 flags = get_flags_node(sel, &cc);
3437 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3439 if (ia32_cg_config.use_sse2) {
3440 /* cannot load from different mode on SSE */
3443 /* x87 can load any mode */
3447 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3449 if (new_mode == mode_F) {
3451 } else if (new_mode == mode_D) {
3453 } else if (new_mode == ia32_mode_E) {
3454 /* arg, shift 16 NOT supported */
3456 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3458 panic("Unsupported constant size");
3461 am.ls_mode = new_mode;
3462 am.addr.base = get_symconst_base();
3463 am.addr.index = new_node;
3464 am.addr.mem = nomem;
3466 am.addr.scale = scale;
3467 am.addr.use_frame = 0;
3468 am.addr.tls_segment = false;
3469 am.addr.frame_entity = NULL;
3470 am.addr.symconst_sign = 0;
3471 am.mem_proj = am.addr.mem;
3472 am.op_type = ia32_AddrModeS;
3475 am.pinned = op_pin_state_floats;
3477 am.ins_permuted = false;
3479 if (ia32_cg_config.use_sse2)
3480 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3482 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3483 set_am_attributes(load, &am);
3485 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3487 panic("cannot transform floating point Mux");
3490 assert(ia32_mode_needs_gp_reg(mode));
3493 ir_node *cmp_left = get_Cmp_left(sel);
3494 ir_node *cmp_right = get_Cmp_right(sel);
3495 ir_relation relation = get_Cmp_relation(sel);
3496 ir_node *val_true = mux_true;
3497 ir_node *val_false = mux_false;
3499 if (is_Const(val_true) && is_Const_null(val_true)) {
3500 ir_node *tmp = val_false;
3501 val_false = val_true;
3503 relation = get_negated_relation(relation);
3505 if (is_Const_0(val_false) && is_Sub(val_true)) {
3506 if ((relation & ir_relation_greater)
3507 && get_Sub_left(val_true) == cmp_left
3508 && get_Sub_right(val_true) == cmp_right) {
3509 return create_doz(node, cmp_left, cmp_right);
3511 if ((relation & ir_relation_less)
3512 && get_Sub_left(val_true) == cmp_right
3513 && get_Sub_right(val_true) == cmp_left) {
3514 return create_doz(node, cmp_right, cmp_left);
3519 flags = get_flags_node(sel, &cc);
3521 if (is_Const(mux_true) && is_Const(mux_false)) {
3522 /* both are const, good */
3523 ir_tarval *tv_true = get_Const_tarval(mux_true);
3524 ir_tarval *tv_false = get_Const_tarval(mux_false);
3525 setcc_transform_t res;
3528 find_const_transform(cc, tv_true, tv_false, &res);
3530 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3533 switch (res.steps[step].transform) {
3535 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3536 add_ia32_am_offs_int(new_node, res.steps[step].val);
3538 case SETCC_TR_ADDxx:
3539 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3542 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3543 set_ia32_am_scale(new_node, res.steps[step].scale);
3544 set_ia32_am_offs_int(new_node, res.steps[step].val);
3546 case SETCC_TR_LEAxx:
3547 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3548 set_ia32_am_scale(new_node, res.steps[step].scale);
3549 set_ia32_am_offs_int(new_node, res.steps[step].val);
3552 imm = ia32_immediate_from_long(res.steps[step].scale);
3553 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3556 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3559 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3562 imm = ia32_immediate_from_long(res.steps[step].val);
3563 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3566 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3569 panic("unknown setcc transform");
3573 new_node = create_CMov(node, sel, flags, cc);
3580 * Create a conversion from x87 state register to general purpose.
3582 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3584 ir_node *block = be_transform_node(get_nodes_block(node));
3585 ir_node *op = get_Conv_op(node);
3586 ir_node *new_op = be_transform_node(op);
3587 ir_graph *irg = current_ir_graph;
3588 dbg_info *dbgi = get_irn_dbg_info(node);
3589 ir_mode *mode = get_irn_mode(node);
3590 ir_node *frame = get_irg_frame(irg);
3591 ir_node *fist, *load, *mem;
3593 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3594 set_irn_pinned(fist, op_pin_state_floats);
3595 set_ia32_use_frame(fist);
3596 set_ia32_op_type(fist, ia32_AddrModeD);
3598 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3599 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3601 assert(get_mode_size_bits(mode) <= 32);
3602 /* exception we can only store signed 32 bit integers, so for unsigned
3603 we store a 64bit (signed) integer and load the lower bits */
3604 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3605 set_ia32_ls_mode(fist, mode_Ls);
3607 set_ia32_ls_mode(fist, mode_Is);
3609 SET_IA32_ORIG_NODE(fist, node);
3612 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3614 set_irn_pinned(load, op_pin_state_floats);
3615 set_ia32_use_frame(load);
3616 set_ia32_op_type(load, ia32_AddrModeS);
3617 set_ia32_ls_mode(load, mode_Is);
3618 if (get_ia32_ls_mode(fist) == mode_Ls) {
3619 ia32_attr_t *attr = get_ia32_attr(load);
3620 attr->data.need_64bit_stackent = 1;
3622 ia32_attr_t *attr = get_ia32_attr(load);
3623 attr->data.need_32bit_stackent = 1;
3625 SET_IA32_ORIG_NODE(load, node);
3627 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3631 * Creates a x87 Conv by placing a Store and a Load
3633 static ir_node *gen_x87_conv(ir_mode *tgt_mode, ir_node *node)
3635 ir_node *block = get_nodes_block(node);
3636 ir_graph *irg = get_Block_irg(block);
3637 dbg_info *dbgi = get_irn_dbg_info(node);
3638 ir_node *frame = get_irg_frame(irg);
3640 ir_node *store, *load;
3643 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3644 set_ia32_use_frame(store);
3645 set_ia32_op_type(store, ia32_AddrModeD);
3646 SET_IA32_ORIG_NODE(store, node);
3648 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3650 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3651 set_ia32_use_frame(load);
3652 set_ia32_op_type(load, ia32_AddrModeS);
3653 SET_IA32_ORIG_NODE(load, node);
3655 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_vfld_res);
3659 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3660 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3662 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3664 func = get_mode_size_bits(mode) == 8 ?
3665 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3666 return func(dbgi, block, base, index, mem, val, mode);
3670 * Create a conversion from general purpose to x87 register
3672 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3674 ir_node *src_block = get_nodes_block(node);
3675 ir_node *block = be_transform_node(src_block);
3676 ir_graph *irg = get_Block_irg(block);
3677 dbg_info *dbgi = get_irn_dbg_info(node);
3678 ir_node *op = get_Conv_op(node);
3679 ir_node *new_op = NULL;
3681 ir_mode *store_mode;
3687 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3688 if (possible_int_mode_for_fp(src_mode)) {
3689 ia32_address_mode_t am;
3691 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am | match_upconv);
3692 if (am.op_type == ia32_AddrModeS) {
3693 ia32_address_t *addr = &am.addr;
3695 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3696 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3698 set_am_attributes(fild, &am);
3699 SET_IA32_ORIG_NODE(fild, node);
3701 fix_mem_proj(fild, &am);
3706 if (new_op == NULL) {
3707 new_op = be_transform_node(op);
3710 mode = get_irn_mode(op);
3712 /* first convert to 32 bit signed if necessary */
3713 if (get_mode_size_bits(src_mode) < 32) {
3714 if (!be_upper_bits_clean(op, src_mode)) {
3715 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3716 SET_IA32_ORIG_NODE(new_op, node);
3721 assert(get_mode_size_bits(mode) == 32);
3724 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3726 set_ia32_use_frame(store);
3727 set_ia32_op_type(store, ia32_AddrModeD);
3728 set_ia32_ls_mode(store, mode_Iu);
3730 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3732 /* exception for 32bit unsigned, do a 64bit spill+load */
3733 if (!mode_is_signed(mode)) {
3736 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3738 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3739 noreg_GP, nomem, zero_const);
3740 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3742 set_ia32_use_frame(zero_store);
3743 set_ia32_op_type(zero_store, ia32_AddrModeD);
3744 add_ia32_am_offs_int(zero_store, 4);
3745 set_ia32_ls_mode(zero_store, mode_Iu);
3747 in[0] = zero_store_mem;
3750 store_mem = new_rd_Sync(dbgi, block, 2, in);
3751 store_mode = mode_Ls;
3753 store_mode = mode_Is;
3757 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3759 set_ia32_use_frame(fild);
3760 set_ia32_op_type(fild, ia32_AddrModeS);
3761 set_ia32_ls_mode(fild, store_mode);
3763 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3769 * Create a conversion from one integer mode into another one
3771 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3772 dbg_info *dbgi, ir_node *block, ir_node *op,
3775 ir_node *new_block = be_transform_node(block);
3777 ia32_address_mode_t am;
3778 ia32_address_t *addr = &am.addr;
3781 assert(get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode));
3783 #ifdef DEBUG_libfirm
3785 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3790 if (be_upper_bits_clean(op, src_mode)) {
3791 return be_transform_node(op);
3794 match_arguments(&am, block, NULL, op, NULL,
3795 match_am | match_8bit_am | match_16bit_am);
3797 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3798 addr->mem, am.new_op2, src_mode);
3799 set_am_attributes(new_node, &am);
3800 /* match_arguments assume that out-mode = in-mode, this isn't true here
3802 set_ia32_ls_mode(new_node, src_mode);
3803 SET_IA32_ORIG_NODE(new_node, node);
3804 new_node = fix_mem_proj(new_node, &am);
3809 * Transforms a Conv node.
3811 * @return The created ia32 Conv node
3813 static ir_node *gen_Conv(ir_node *node)
3815 ir_node *block = get_nodes_block(node);
3816 ir_node *new_block = be_transform_node(block);
3817 ir_node *op = get_Conv_op(node);
3818 ir_node *new_op = NULL;
3819 dbg_info *dbgi = get_irn_dbg_info(node);
3820 ir_mode *src_mode = get_irn_mode(op);
3821 ir_mode *tgt_mode = get_irn_mode(node);
3822 int src_bits = get_mode_size_bits(src_mode);
3823 int tgt_bits = get_mode_size_bits(tgt_mode);
3824 ir_node *res = NULL;
3826 assert(!mode_is_int(src_mode) || src_bits <= 32);
3827 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3829 /* modeB -> X should already be lowered by the lower_mode_b pass */
3830 if (src_mode == mode_b) {
3831 panic("ConvB not lowered %+F", node);
3834 if (src_mode == tgt_mode) {
3835 /* this should be optimized already, but who knows... */
3836 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3837 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3838 return be_transform_node(op);
3841 if (mode_is_float(src_mode)) {
3842 new_op = be_transform_node(op);
3843 /* we convert from float ... */
3844 if (mode_is_float(tgt_mode)) {
3846 if (ia32_cg_config.use_sse2) {
3847 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3848 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3850 set_ia32_ls_mode(res, tgt_mode);
3852 if (src_bits < tgt_bits) {
3853 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3856 res = gen_x87_conv(tgt_mode, new_op);
3857 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3863 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3864 if (ia32_cg_config.use_sse2) {
3865 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3867 set_ia32_ls_mode(res, src_mode);
3869 return gen_x87_fp_to_gp(node);
3873 /* we convert from int ... */
3874 if (mode_is_float(tgt_mode)) {
3876 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3877 if (ia32_cg_config.use_sse2) {
3878 new_op = be_transform_node(op);
3879 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3881 set_ia32_ls_mode(res, tgt_mode);
3883 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3884 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3885 res = gen_x87_gp_to_fp(node, src_mode);
3887 /* we need a float-conv, if the int mode has more bits than the
3889 if (float_mantissa < int_mantissa) {
3890 res = gen_x87_conv(tgt_mode, res);
3891 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3895 } else if (tgt_mode == mode_b) {
3896 /* mode_b lowering already took care that we only have 0/1 values */
3897 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3898 src_mode, tgt_mode));
3899 return be_transform_node(op);
3902 if (src_bits >= tgt_bits) {
3903 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3904 src_mode, tgt_mode));
3905 return be_transform_node(op);
3908 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3916 static ir_node *create_immediate_or_transform(ir_node *node,
3917 char immediate_constraint_type)
3919 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3920 if (new_node == NULL) {
3921 new_node = be_transform_node(node);
3927 * Transforms a FrameAddr into an ia32 Add.
3929 static ir_node *gen_be_FrameAddr(ir_node *node)
3931 ir_node *block = be_transform_node(get_nodes_block(node));
3932 ir_node *op = be_get_FrameAddr_frame(node);
3933 ir_node *new_op = be_transform_node(op);
3934 dbg_info *dbgi = get_irn_dbg_info(node);
3937 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3938 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3939 set_ia32_use_frame(new_node);
3941 SET_IA32_ORIG_NODE(new_node, node);
3947 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3949 static ir_node *gen_be_Return(ir_node *node)
3951 ir_graph *irg = current_ir_graph;
3952 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
3953 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
3954 ir_node *new_ret_val = be_transform_node(ret_val);
3955 ir_node *new_ret_mem = be_transform_node(ret_mem);
3956 ir_entity *ent = get_irg_entity(irg);
3957 ir_type *tp = get_entity_type(ent);
3958 dbg_info *dbgi = get_irn_dbg_info(node);
3959 ir_node *block = be_transform_node(get_nodes_block(node));
3973 assert(ret_val != NULL);
3974 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3975 return be_duplicate_node(node);
3978 res_type = get_method_res_type(tp, 0);
3980 if (! is_Primitive_type(res_type)) {
3981 return be_duplicate_node(node);
3984 mode = get_type_mode(res_type);
3985 if (! mode_is_float(mode)) {
3986 return be_duplicate_node(node);
3989 assert(get_method_n_ress(tp) == 1);
3991 frame = get_irg_frame(irg);
3993 /* store xmm0 onto stack */
3994 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3995 new_ret_mem, new_ret_val);
3996 set_ia32_ls_mode(sse_store, mode);
3997 set_ia32_op_type(sse_store, ia32_AddrModeD);
3998 set_ia32_use_frame(sse_store);
3999 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4001 /* load into x87 register */
4002 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4003 set_ia32_op_type(fld, ia32_AddrModeS);
4004 set_ia32_use_frame(fld);
4006 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4007 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4009 /* create a new return */
4010 arity = get_irn_arity(node);
4011 in = ALLOCAN(ir_node*, arity);
4012 pop = be_Return_get_pop(node);
4013 for (i = 0; i < arity; ++i) {
4014 ir_node *op = get_irn_n(node, i);
4015 if (op == ret_val) {
4017 } else if (op == ret_mem) {
4020 in[i] = be_transform_node(op);
4023 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4024 copy_node_attr(irg, node, new_node);
4030 * Transform a be_AddSP into an ia32_SubSP.
4032 static ir_node *gen_be_AddSP(ir_node *node)
4034 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4035 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4037 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4038 match_am | match_immediate);
4039 assert(is_ia32_SubSP(new_node));
4040 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4041 &ia32_registers[REG_ESP]);
4046 * Transform a be_SubSP into an ia32_AddSP
4048 static ir_node *gen_be_SubSP(ir_node *node)
4050 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4051 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4053 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4054 match_am | match_immediate);
4055 assert(is_ia32_AddSP(new_node));
4056 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4057 &ia32_registers[REG_ESP]);
4062 * Change some phi modes
4064 static ir_node *gen_Phi(ir_node *node)
4066 const arch_register_req_t *req;
4067 ir_node *block = be_transform_node(get_nodes_block(node));
4068 ir_graph *irg = current_ir_graph;
4069 dbg_info *dbgi = get_irn_dbg_info(node);
4070 ir_mode *mode = get_irn_mode(node);
4073 if (ia32_mode_needs_gp_reg(mode)) {
4074 /* we shouldn't have any 64bit stuff around anymore */
4075 assert(get_mode_size_bits(mode) <= 32);
4076 /* all integer operations are on 32bit registers now */
4078 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4079 } else if (mode_is_float(mode)) {
4080 if (ia32_cg_config.use_sse2) {
4082 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4085 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4088 req = arch_no_register_req;
4091 /* phi nodes allow loops, so we use the old arguments for now
4092 * and fix this later */
4093 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4094 get_irn_in(node) + 1);
4095 copy_node_attr(irg, node, phi);
4096 be_duplicate_deps(node, phi);
4098 arch_set_irn_register_req_out(phi, 0, req);
4100 be_enqueue_preds(node);
4105 static ir_node *gen_Jmp(ir_node *node)
4107 ir_node *block = get_nodes_block(node);
4108 ir_node *new_block = be_transform_node(block);
4109 dbg_info *dbgi = get_irn_dbg_info(node);
4112 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4113 SET_IA32_ORIG_NODE(new_node, node);
4121 static ir_node *gen_IJmp(ir_node *node)
4123 ir_node *block = get_nodes_block(node);
4124 ir_node *new_block = be_transform_node(block);
4125 dbg_info *dbgi = get_irn_dbg_info(node);
4126 ir_node *op = get_IJmp_target(node);
4128 ia32_address_mode_t am;
4129 ia32_address_t *addr = &am.addr;
4131 assert(get_irn_mode(op) == mode_P);
4133 match_arguments(&am, block, NULL, op, NULL,
4134 match_am | match_immediate | match_upconv);
4136 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4137 addr->mem, am.new_op2);
4138 set_am_attributes(new_node, &am);
4139 SET_IA32_ORIG_NODE(new_node, node);
4141 new_node = fix_mem_proj(new_node, &am);
4146 static ir_node *gen_ia32_l_Add(ir_node *node)
4148 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4149 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4150 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4151 match_commutative | match_am | match_immediate |
4152 match_mode_neutral);
4154 if (is_Proj(lowered)) {
4155 lowered = get_Proj_pred(lowered);
4157 assert(is_ia32_Add(lowered));
4158 set_irn_mode(lowered, mode_T);
4164 static ir_node *gen_ia32_l_Adc(ir_node *node)
4166 return gen_binop_flags(node, new_bd_ia32_Adc,
4167 match_commutative | match_am | match_immediate |
4168 match_mode_neutral);
4172 * Transforms a l_MulS into a "real" MulS node.
4174 * @return the created ia32 Mul node
4176 static ir_node *gen_ia32_l_Mul(ir_node *node)
4178 ir_node *left = get_binop_left(node);
4179 ir_node *right = get_binop_right(node);
4181 return gen_binop(node, left, right, new_bd_ia32_Mul,
4182 match_commutative | match_am | match_mode_neutral);
4186 * Transforms a l_IMulS into a "real" IMul1OPS node.
4188 * @return the created ia32 IMul1OP node
4190 static ir_node *gen_ia32_l_IMul(ir_node *node)
4192 ir_node *left = get_binop_left(node);
4193 ir_node *right = get_binop_right(node);
4195 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4196 match_commutative | match_am | match_mode_neutral);
4199 static ir_node *gen_ia32_l_Sub(ir_node *node)
4201 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4202 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4203 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4204 match_am | match_immediate | match_mode_neutral);
4206 if (is_Proj(lowered)) {
4207 lowered = get_Proj_pred(lowered);
4209 assert(is_ia32_Sub(lowered));
4210 set_irn_mode(lowered, mode_T);
4216 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4218 return gen_binop_flags(node, new_bd_ia32_Sbb,
4219 match_am | match_immediate | match_mode_neutral);
4222 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4224 ir_node *src_block = get_nodes_block(node);
4225 ir_node *block = be_transform_node(src_block);
4226 ir_graph *irg = current_ir_graph;
4227 dbg_info *dbgi = get_irn_dbg_info(node);
4228 ir_node *frame = get_irg_frame(irg);
4229 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4230 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4231 ir_node *new_val_low = be_transform_node(val_low);
4232 ir_node *new_val_high = be_transform_node(val_high);
4234 ir_node *sync, *fild, *res;
4236 ir_node *store_high;
4240 if (ia32_cg_config.use_sse2) {
4241 panic("not implemented for SSE2");
4245 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4247 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4249 SET_IA32_ORIG_NODE(store_low, node);
4250 SET_IA32_ORIG_NODE(store_high, node);
4252 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4253 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4255 set_ia32_use_frame(store_low);
4256 set_ia32_use_frame(store_high);
4257 set_ia32_op_type(store_low, ia32_AddrModeD);
4258 set_ia32_op_type(store_high, ia32_AddrModeD);
4259 set_ia32_ls_mode(store_low, mode_Iu);
4260 set_ia32_ls_mode(store_high, mode_Is);
4261 add_ia32_am_offs_int(store_high, 4);
4265 sync = new_rd_Sync(dbgi, block, 2, in);
4268 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4270 set_ia32_use_frame(fild);
4271 set_ia32_op_type(fild, ia32_AddrModeS);
4272 set_ia32_ls_mode(fild, mode_Ls);
4274 SET_IA32_ORIG_NODE(fild, node);
4276 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4278 if (! mode_is_signed(get_irn_mode(val_high))) {
4279 ia32_address_mode_t am;
4281 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4284 am.addr.base = get_symconst_base();
4285 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4286 am.addr.mem = nomem;
4289 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4290 am.addr.tls_segment = false;
4291 am.addr.use_frame = 0;
4292 am.addr.frame_entity = NULL;
4293 am.addr.symconst_sign = 0;
4294 am.ls_mode = mode_F;
4295 am.mem_proj = nomem;
4296 am.op_type = ia32_AddrModeS;
4298 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4299 am.pinned = op_pin_state_floats;
4301 am.ins_permuted = false;
4303 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4304 am.new_op1, am.new_op2, get_fpcw());
4305 set_am_attributes(fadd, &am);
4307 set_irn_mode(fadd, mode_T);
4308 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4313 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4315 ir_node *src_block = get_nodes_block(node);
4316 ir_node *block = be_transform_node(src_block);
4317 ir_graph *irg = get_Block_irg(block);
4318 dbg_info *dbgi = get_irn_dbg_info(node);
4319 ir_node *frame = get_irg_frame(irg);
4320 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4321 ir_node *new_val = be_transform_node(val);
4324 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4325 SET_IA32_ORIG_NODE(fist, node);
4326 set_ia32_use_frame(fist);
4327 set_ia32_op_type(fist, ia32_AddrModeD);
4328 set_ia32_ls_mode(fist, mode_Ls);
4330 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4331 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4334 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4336 ir_node *block = be_transform_node(get_nodes_block(node));
4337 ir_graph *irg = get_Block_irg(block);
4338 ir_node *pred = get_Proj_pred(node);
4339 ir_node *new_pred = be_transform_node(pred);
4340 ir_node *frame = get_irg_frame(irg);
4341 dbg_info *dbgi = get_irn_dbg_info(node);
4342 long pn = get_Proj_proj(node);
4347 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4348 SET_IA32_ORIG_NODE(load, node);
4349 set_ia32_use_frame(load);
4350 set_ia32_op_type(load, ia32_AddrModeS);
4351 set_ia32_ls_mode(load, mode_Iu);
4352 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4353 * 32 bit from it with this particular load */
4354 attr = get_ia32_attr(load);
4355 attr->data.need_64bit_stackent = 1;
4357 if (pn == pn_ia32_l_FloattoLL_res_high) {
4358 add_ia32_am_offs_int(load, 4);
4360 assert(pn == pn_ia32_l_FloattoLL_res_low);
4363 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4369 * Transform the Projs of an AddSP.
4371 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4373 ir_node *pred = get_Proj_pred(node);
4374 ir_node *new_pred = be_transform_node(pred);
4375 dbg_info *dbgi = get_irn_dbg_info(node);
4376 long proj = get_Proj_proj(node);
4378 if (proj == pn_be_AddSP_sp) {
4379 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4380 pn_ia32_SubSP_stack);
4381 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4383 } else if (proj == pn_be_AddSP_res) {
4384 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4385 pn_ia32_SubSP_addr);
4386 } else if (proj == pn_be_AddSP_M) {
4387 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4390 panic("No idea how to transform proj->AddSP");
4394 * Transform the Projs of a SubSP.
4396 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4398 ir_node *pred = get_Proj_pred(node);
4399 ir_node *new_pred = be_transform_node(pred);
4400 dbg_info *dbgi = get_irn_dbg_info(node);
4401 long proj = get_Proj_proj(node);
4403 if (proj == pn_be_SubSP_sp) {
4404 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4405 pn_ia32_AddSP_stack);
4406 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4408 } else if (proj == pn_be_SubSP_M) {
4409 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4412 panic("No idea how to transform proj->SubSP");
4416 * Transform and renumber the Projs from a Load.
4418 static ir_node *gen_Proj_Load(ir_node *node)
4421 ir_node *pred = get_Proj_pred(node);
4422 dbg_info *dbgi = get_irn_dbg_info(node);
4423 long proj = get_Proj_proj(node);
4425 /* loads might be part of source address mode matches, so we don't
4426 * transform the ProjMs yet (with the exception of loads whose result is
4429 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4432 /* this is needed, because sometimes we have loops that are only
4433 reachable through the ProjM */
4434 be_enqueue_preds(node);
4435 /* do it in 2 steps, to silence firm verifier */
4436 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4437 set_Proj_proj(res, pn_ia32_mem);
4441 /* renumber the proj */
4442 new_pred = be_transform_node(pred);
4443 if (is_ia32_Load(new_pred)) {
4444 switch ((pn_Load)proj) {
4446 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4448 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4449 case pn_Load_X_except:
4450 /* This Load might raise an exception. Mark it. */
4451 set_ia32_exc_label(new_pred, 1);
4452 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4453 case pn_Load_X_regular:
4454 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4456 } else if (is_ia32_Conv_I2I(new_pred) ||
4457 is_ia32_Conv_I2I8Bit(new_pred)) {
4458 set_irn_mode(new_pred, mode_T);
4459 switch ((pn_Load)proj) {
4461 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4463 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4464 case pn_Load_X_except:
4465 /* This Load might raise an exception. Mark it. */
4466 set_ia32_exc_label(new_pred, 1);
4467 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4468 case pn_Load_X_regular:
4469 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4471 } else if (is_ia32_xLoad(new_pred)) {
4472 switch ((pn_Load)proj) {
4474 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4476 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4477 case pn_Load_X_except:
4478 /* This Load might raise an exception. Mark it. */
4479 set_ia32_exc_label(new_pred, 1);
4480 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4481 case pn_Load_X_regular:
4482 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4484 } else if (is_ia32_vfld(new_pred)) {
4485 switch ((pn_Load)proj) {
4487 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4489 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4490 case pn_Load_X_except:
4491 /* This Load might raise an exception. Mark it. */
4492 set_ia32_exc_label(new_pred, 1);
4493 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4494 case pn_Load_X_regular:
4495 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4498 /* can happen for ProJMs when source address mode happened for the
4501 /* however it should not be the result proj, as that would mean the
4502 load had multiple users and should not have been used for
4504 if (proj != pn_Load_M) {
4505 panic("internal error: transformed node not a Load");
4507 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4510 panic("No idea how to transform Proj(Load) %+F", node);
4513 static ir_node *gen_Proj_Store(ir_node *node)
4515 ir_node *pred = get_Proj_pred(node);
4516 ir_node *new_pred = be_transform_node(pred);
4517 dbg_info *dbgi = get_irn_dbg_info(node);
4518 long pn = get_Proj_proj(node);
4520 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4521 switch ((pn_Store)pn) {
4523 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4524 case pn_Store_X_except:
4525 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4526 case pn_Store_X_regular:
4527 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4529 } else if (is_ia32_vfist(new_pred)) {
4530 switch ((pn_Store)pn) {
4532 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4533 case pn_Store_X_except:
4534 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4535 case pn_Store_X_regular:
4536 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4538 } else if (is_ia32_vfisttp(new_pred)) {
4539 switch ((pn_Store)pn) {
4541 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4542 case pn_Store_X_except:
4543 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4544 case pn_Store_X_regular:
4545 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4547 } else if (is_ia32_vfst(new_pred)) {
4548 switch ((pn_Store)pn) {
4550 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4551 case pn_Store_X_except:
4552 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4553 case pn_Store_X_regular:
4554 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4556 } else if (is_ia32_xStore(new_pred)) {
4557 switch ((pn_Store)pn) {
4559 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4560 case pn_Store_X_except:
4561 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4562 case pn_Store_X_regular:
4563 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4565 } else if (is_Sync(new_pred)) {
4566 /* hack for the case that gen_float_const_Store produced a Sync */
4567 if (pn == pn_Store_M) {
4570 panic("exception control flow not implemented yet");
4571 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4572 /* destination address mode */
4573 if (pn == pn_Store_M) {
4576 panic("exception control flow for destination AM not implemented yet");
4579 panic("No idea how to transform Proj(Store) %+F", node);
4583 * Transform and renumber the Projs from a Div or Mod instruction.
4585 static ir_node *gen_Proj_Div(ir_node *node)
4587 ir_node *pred = get_Proj_pred(node);
4588 ir_node *new_pred = be_transform_node(pred);
4589 dbg_info *dbgi = get_irn_dbg_info(node);
4590 long proj = get_Proj_proj(node);
4592 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4593 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4595 switch ((pn_Div)proj) {
4597 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4598 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4599 } else if (is_ia32_xDiv(new_pred)) {
4600 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4601 } else if (is_ia32_vfdiv(new_pred)) {
4602 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4604 panic("Div transformed to unexpected thing %+F", new_pred);
4607 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4608 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4609 } else if (is_ia32_xDiv(new_pred)) {
4610 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4611 } else if (is_ia32_vfdiv(new_pred)) {
4612 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4614 panic("Div transformed to unexpected thing %+F", new_pred);
4616 case pn_Div_X_except:
4617 set_ia32_exc_label(new_pred, 1);
4618 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4619 case pn_Div_X_regular:
4620 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4623 panic("No idea how to transform proj->Div");
4627 * Transform and renumber the Projs from a Div or Mod instruction.
4629 static ir_node *gen_Proj_Mod(ir_node *node)
4631 ir_node *pred = get_Proj_pred(node);
4632 ir_node *new_pred = be_transform_node(pred);
4633 dbg_info *dbgi = get_irn_dbg_info(node);
4634 long proj = get_Proj_proj(node);
4636 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4637 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4638 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4640 switch ((pn_Mod)proj) {
4642 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4644 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4645 case pn_Mod_X_except:
4646 set_ia32_exc_label(new_pred, 1);
4647 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4648 case pn_Mod_X_regular:
4649 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4651 panic("No idea how to transform proj->Mod");
4655 * Transform and renumber the Projs from a CopyB.
4657 static ir_node *gen_Proj_CopyB(ir_node *node)
4659 ir_node *pred = get_Proj_pred(node);
4660 ir_node *new_pred = be_transform_node(pred);
4661 dbg_info *dbgi = get_irn_dbg_info(node);
4662 long proj = get_Proj_proj(node);
4664 switch ((pn_CopyB)proj) {
4666 if (is_ia32_CopyB_i(new_pred)) {
4667 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4668 } else if (is_ia32_CopyB(new_pred)) {
4669 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4672 case pn_CopyB_X_regular:
4673 if (is_ia32_CopyB_i(new_pred)) {
4674 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4675 } else if (is_ia32_CopyB(new_pred)) {
4676 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4679 case pn_CopyB_X_except:
4680 if (is_ia32_CopyB_i(new_pred)) {
4681 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4682 } else if (is_ia32_CopyB(new_pred)) {
4683 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4688 panic("No idea how to transform proj->CopyB");
4691 static ir_node *gen_be_Call(ir_node *node)
4693 dbg_info *const dbgi = get_irn_dbg_info(node);
4694 ir_node *const src_block = get_nodes_block(node);
4695 ir_node *const block = be_transform_node(src_block);
4696 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4697 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4698 ir_node *const sp = be_transform_node(src_sp);
4699 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4700 ia32_address_mode_t am;
4701 ia32_address_t *const addr = &am.addr;
4706 ir_node * eax = noreg_GP;
4707 ir_node * ecx = noreg_GP;
4708 ir_node * edx = noreg_GP;
4709 unsigned const pop = be_Call_get_pop(node);
4710 ir_type *const call_tp = be_Call_get_type(node);
4711 int old_no_pic_adjust;
4712 int throws_exception = ir_throws_exception(node);
4714 /* Run the x87 simulator if the call returns a float value */
4715 if (get_method_n_ress(call_tp) > 0) {
4716 ir_type *const res_type = get_method_res_type(call_tp, 0);
4717 ir_mode *const res_mode = get_type_mode(res_type);
4719 if (res_mode != NULL && mode_is_float(res_mode)) {
4720 ir_graph *irg = current_ir_graph;
4721 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4722 irg_data->do_x87_sim = 1;
4726 /* We do not want be_Call direct calls */
4727 assert(be_Call_get_entity(node) == NULL);
4729 /* special case for PIC trampoline calls */
4730 old_no_pic_adjust = ia32_no_pic_adjust;
4731 ia32_no_pic_adjust = be_options.pic;
4733 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4734 match_am | match_immediate | match_upconv);
4736 ia32_no_pic_adjust = old_no_pic_adjust;
4738 i = get_irn_arity(node) - 1;
4739 fpcw = be_transform_node(get_irn_n(node, i--));
4740 for (; i >= n_be_Call_first_arg; --i) {
4741 arch_register_req_t const *const req
4742 = arch_get_irn_register_req_in(node, i);
4743 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4745 assert(req->type == arch_register_req_type_limited);
4746 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4748 switch (*req->limited) {
4749 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4750 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4751 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4752 default: panic("Invalid GP register for register parameter");
4756 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4757 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4758 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4759 ir_set_throws_exception(call, throws_exception);
4760 set_am_attributes(call, &am);
4761 call = fix_mem_proj(call, &am);
4763 if (get_irn_pinned(node) == op_pin_state_pinned)
4764 set_irn_pinned(call, op_pin_state_pinned);
4766 SET_IA32_ORIG_NODE(call, node);
4768 if (ia32_cg_config.use_sse2) {
4769 /* remember this call for post-processing */
4770 ARR_APP1(ir_node *, call_list, call);
4771 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4778 * Transform Builtin trap
4780 static ir_node *gen_trap(ir_node *node)
4782 dbg_info *dbgi = get_irn_dbg_info(node);
4783 ir_node *block = be_transform_node(get_nodes_block(node));
4784 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4786 return new_bd_ia32_UD2(dbgi, block, mem);
4790 * Transform Builtin debugbreak
4792 static ir_node *gen_debugbreak(ir_node *node)
4794 dbg_info *dbgi = get_irn_dbg_info(node);
4795 ir_node *block = be_transform_node(get_nodes_block(node));
4796 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4798 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4802 * Transform Builtin return_address
4804 static ir_node *gen_return_address(ir_node *node)
4806 ir_node *param = get_Builtin_param(node, 0);
4807 ir_node *frame = get_Builtin_param(node, 1);
4808 dbg_info *dbgi = get_irn_dbg_info(node);
4809 ir_tarval *tv = get_Const_tarval(param);
4810 ir_graph *irg = get_irn_irg(node);
4811 unsigned long value = get_tarval_long(tv);
4813 ir_node *block = be_transform_node(get_nodes_block(node));
4814 ir_node *ptr = be_transform_node(frame);
4818 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4819 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4820 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4823 /* load the return address from this frame */
4824 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4826 set_irn_pinned(load, get_irn_pinned(node));
4827 set_ia32_op_type(load, ia32_AddrModeS);
4828 set_ia32_ls_mode(load, mode_Iu);
4830 set_ia32_am_offs_int(load, 0);
4831 set_ia32_use_frame(load);
4832 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4834 if (get_irn_pinned(node) == op_pin_state_floats) {
4835 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4836 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4837 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4838 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4841 SET_IA32_ORIG_NODE(load, node);
4842 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4846 * Transform Builtin frame_address
4848 static ir_node *gen_frame_address(ir_node *node)
4850 ir_node *param = get_Builtin_param(node, 0);
4851 ir_node *frame = get_Builtin_param(node, 1);
4852 dbg_info *dbgi = get_irn_dbg_info(node);
4853 ir_tarval *tv = get_Const_tarval(param);
4854 ir_graph *irg = get_irn_irg(node);
4855 unsigned long value = get_tarval_long(tv);
4857 ir_node *block = be_transform_node(get_nodes_block(node));
4858 ir_node *ptr = be_transform_node(frame);
4863 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4864 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4865 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4868 /* load the frame address from this frame */
4869 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4871 set_irn_pinned(load, get_irn_pinned(node));
4872 set_ia32_op_type(load, ia32_AddrModeS);
4873 set_ia32_ls_mode(load, mode_Iu);
4875 ent = ia32_get_frame_address_entity(irg);
4877 set_ia32_am_offs_int(load, 0);
4878 set_ia32_use_frame(load);
4879 set_ia32_frame_ent(load, ent);
4881 /* will fail anyway, but gcc does this: */
4882 set_ia32_am_offs_int(load, 0);
4885 if (get_irn_pinned(node) == op_pin_state_floats) {
4886 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4887 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4888 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4889 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4892 SET_IA32_ORIG_NODE(load, node);
4893 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4897 * Transform Builtin frame_address
4899 static ir_node *gen_prefetch(ir_node *node)
4902 ir_node *ptr, *block, *mem, *base, *idx;
4903 ir_node *param, *new_node;
4906 ia32_address_t addr;
4908 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4909 /* no prefetch at all, route memory */
4910 return be_transform_node(get_Builtin_mem(node));
4913 param = get_Builtin_param(node, 1);
4914 tv = get_Const_tarval(param);
4915 rw = get_tarval_long(tv);
4917 /* construct load address */
4918 memset(&addr, 0, sizeof(addr));
4919 ptr = get_Builtin_param(node, 0);
4920 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4927 base = be_transform_node(base);
4933 idx = be_transform_node(idx);
4936 dbgi = get_irn_dbg_info(node);
4937 block = be_transform_node(get_nodes_block(node));
4938 mem = be_transform_node(get_Builtin_mem(node));
4940 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4941 /* we have 3DNow!, this was already checked above */
4942 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4943 } else if (ia32_cg_config.use_sse_prefetch) {
4944 /* note: rw == 1 is IGNORED in that case */
4945 param = get_Builtin_param(node, 2);
4946 tv = get_Const_tarval(param);
4947 locality = get_tarval_long(tv);
4949 /* SSE style prefetch */
4952 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
4955 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
4958 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
4961 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
4965 assert(ia32_cg_config.use_3dnow_prefetch);
4966 /* 3DNow! style prefetch */
4967 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
4970 set_irn_pinned(new_node, get_irn_pinned(node));
4971 set_ia32_op_type(new_node, ia32_AddrModeS);
4972 set_ia32_ls_mode(new_node, mode_Bu);
4973 set_address(new_node, &addr);
4975 SET_IA32_ORIG_NODE(new_node, node);
4977 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4981 * Transform bsf like node
4983 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4985 ir_node *param = get_Builtin_param(node, 0);
4986 dbg_info *dbgi = get_irn_dbg_info(node);
4988 ir_node *block = get_nodes_block(node);
4989 ir_node *new_block = be_transform_node(block);
4991 ia32_address_mode_t am;
4992 ia32_address_t *addr = &am.addr;
4995 match_arguments(&am, block, NULL, param, NULL, match_am);
4997 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4998 set_am_attributes(cnt, &am);
4999 set_ia32_ls_mode(cnt, get_irn_mode(param));
5001 SET_IA32_ORIG_NODE(cnt, node);
5002 return fix_mem_proj(cnt, &am);
5006 * Transform builtin ffs.
5008 static ir_node *gen_ffs(ir_node *node)
5010 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5011 ir_node *real = skip_Proj(bsf);
5012 dbg_info *dbgi = get_irn_dbg_info(real);
5013 ir_node *block = get_nodes_block(real);
5014 ir_node *flag, *set, *conv, *neg, *orn, *add;
5017 if (get_irn_mode(real) != mode_T) {
5018 set_irn_mode(real, mode_T);
5019 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5022 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5025 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5026 SET_IA32_ORIG_NODE(set, node);
5029 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5030 SET_IA32_ORIG_NODE(conv, node);
5033 neg = new_bd_ia32_Neg(dbgi, block, conv);
5036 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5037 set_ia32_ls_mode(orn, mode_Iu);
5038 set_ia32_commutative(orn);
5041 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5042 add_ia32_am_offs_int(add, 1);
5047 * Transform builtin clz.
5049 static ir_node *gen_clz(ir_node *node)
5051 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5052 ir_node *real = skip_Proj(bsr);
5053 dbg_info *dbgi = get_irn_dbg_info(real);
5054 ir_node *block = get_nodes_block(real);
5055 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5057 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5061 * Transform builtin ctz.
5063 static ir_node *gen_ctz(ir_node *node)
5065 return gen_unop_AM(node, new_bd_ia32_Bsf);
5069 * Transform builtin parity.
5071 static ir_node *gen_parity(ir_node *node)
5073 dbg_info *dbgi = get_irn_dbg_info(node);
5074 ir_node *block = get_nodes_block(node);
5075 ir_node *new_block = be_transform_node(block);
5076 ir_node *param = get_Builtin_param(node, 0);
5077 ir_node *new_param = be_transform_node(param);
5080 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5081 * so we have to do complicated xoring first.
5082 * (we should also better lower this before the backend so we still have a
5083 * chance for CSE, constant folding and other goodies for some of these
5086 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5087 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5088 ir_node *xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5090 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xorn);
5093 set_ia32_ls_mode(xorn, mode_Iu);
5094 set_ia32_commutative(xorn);
5096 set_irn_mode(xor2, mode_T);
5097 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5100 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5101 SET_IA32_ORIG_NODE(new_node, node);
5104 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5105 nomem, new_node, mode_Bu);
5106 SET_IA32_ORIG_NODE(new_node, node);
5111 * Transform builtin popcount
5113 static ir_node *gen_popcount(ir_node *node)
5115 ir_node *param = get_Builtin_param(node, 0);
5116 dbg_info *dbgi = get_irn_dbg_info(node);
5118 ir_node *block = get_nodes_block(node);
5119 ir_node *new_block = be_transform_node(block);
5122 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5124 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5125 if (ia32_cg_config.use_popcnt) {
5126 ia32_address_mode_t am;
5127 ia32_address_t *addr = &am.addr;
5130 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am | match_upconv);
5132 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5133 set_am_attributes(cnt, &am);
5134 set_ia32_ls_mode(cnt, get_irn_mode(param));
5136 SET_IA32_ORIG_NODE(cnt, node);
5137 return fix_mem_proj(cnt, &am);
5140 new_param = be_transform_node(param);
5142 /* do the standard popcount algo */
5143 /* TODO: This is stupid, we should transform this before the backend,
5144 * to get CSE, localopts, etc. for the operations
5145 * TODO: This is also not the optimal algorithm (it is just the starting
5146 * example in hackers delight, they optimize it more on the following page)
5147 * But I'm too lazy to fix this now, as the code should get lowered before
5148 * the backend anyway.
5151 /* m1 = x & 0x55555555 */
5152 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5153 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5156 simm = ia32_create_Immediate(NULL, 0, 1);
5157 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5159 /* m2 = s1 & 0x55555555 */
5160 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5163 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5165 /* m4 = m3 & 0x33333333 */
5166 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5167 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5170 simm = ia32_create_Immediate(NULL, 0, 2);
5171 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5173 /* m5 = s2 & 0x33333333 */
5174 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5177 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5179 /* m7 = m6 & 0x0F0F0F0F */
5180 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5181 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5184 simm = ia32_create_Immediate(NULL, 0, 4);
5185 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5187 /* m8 = s3 & 0x0F0F0F0F */
5188 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5191 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5193 /* m10 = m9 & 0x00FF00FF */
5194 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5195 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5198 simm = ia32_create_Immediate(NULL, 0, 8);
5199 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5201 /* m11 = s4 & 0x00FF00FF */
5202 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5204 /* m12 = m10 + m11 */
5205 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5207 /* m13 = m12 & 0x0000FFFF */
5208 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5209 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5211 /* s5 = m12 >> 16 */
5212 simm = ia32_create_Immediate(NULL, 0, 16);
5213 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5215 /* res = m13 + s5 */
5216 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5220 * Transform builtin byte swap.
5222 static ir_node *gen_bswap(ir_node *node)
5224 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5225 dbg_info *dbgi = get_irn_dbg_info(node);
5227 ir_node *block = get_nodes_block(node);
5228 ir_node *new_block = be_transform_node(block);
5229 ir_mode *mode = get_irn_mode(param);
5230 unsigned size = get_mode_size_bits(mode);
5234 if (ia32_cg_config.use_bswap) {
5235 /* swap available */
5236 return new_bd_ia32_Bswap(dbgi, new_block, param);
5238 ir_node *i8 = ia32_create_Immediate(NULL, 0, 8);
5239 ir_node *rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5240 ir_node *i16 = ia32_create_Immediate(NULL, 0, 16);
5241 ir_node *rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5242 ir_node *rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5243 set_ia32_ls_mode(rol1, mode_Hu);
5244 set_ia32_ls_mode(rol2, mode_Iu);
5245 set_ia32_ls_mode(rol3, mode_Hu);
5250 /* swap16 always available */
5251 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5254 panic("Invalid bswap size (%d)", size);
5259 * Transform builtin outport.
5261 static ir_node *gen_outport(ir_node *node)
5263 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5264 ir_node *oldv = get_Builtin_param(node, 1);
5265 ir_mode *mode = get_irn_mode(oldv);
5266 ir_node *value = be_transform_node(oldv);
5267 ir_node *block = be_transform_node(get_nodes_block(node));
5268 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5269 dbg_info *dbgi = get_irn_dbg_info(node);
5271 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5272 set_ia32_ls_mode(res, mode);
5277 * Transform builtin inport.
5279 static ir_node *gen_inport(ir_node *node)
5281 ir_type *tp = get_Builtin_type(node);
5282 ir_type *rstp = get_method_res_type(tp, 0);
5283 ir_mode *mode = get_type_mode(rstp);
5284 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5285 ir_node *block = be_transform_node(get_nodes_block(node));
5286 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5287 dbg_info *dbgi = get_irn_dbg_info(node);
5289 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5290 set_ia32_ls_mode(res, mode);
5292 /* check for missing Result Proj */
5297 * Transform a builtin inner trampoline
5299 static ir_node *gen_inner_trampoline(ir_node *node)
5301 ir_node *ptr = get_Builtin_param(node, 0);
5302 ir_node *callee = get_Builtin_param(node, 1);
5303 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5304 ir_node *mem = get_Builtin_mem(node);
5305 ir_node *block = get_nodes_block(node);
5306 ir_node *new_block = be_transform_node(block);
5310 ir_node *trampoline;
5312 dbg_info *dbgi = get_irn_dbg_info(node);
5313 ia32_address_t addr;
5315 /* construct store address */
5316 memset(&addr, 0, sizeof(addr));
5317 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5319 if (addr.base == NULL) {
5320 addr.base = noreg_GP;
5322 addr.base = be_transform_node(addr.base);
5325 if (addr.index == NULL) {
5326 addr.index = noreg_GP;
5328 addr.index = be_transform_node(addr.index);
5330 addr.mem = be_transform_node(mem);
5332 /* mov ecx, <env> */
5333 val = ia32_create_Immediate(NULL, 0, 0xB9);
5334 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5335 addr.index, addr.mem, val);
5336 set_irn_pinned(store, get_irn_pinned(node));
5337 set_ia32_op_type(store, ia32_AddrModeD);
5338 set_ia32_ls_mode(store, mode_Bu);
5339 set_address(store, &addr);
5343 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5344 addr.index, addr.mem, env);
5345 set_irn_pinned(store, get_irn_pinned(node));
5346 set_ia32_op_type(store, ia32_AddrModeD);
5347 set_ia32_ls_mode(store, mode_Iu);
5348 set_address(store, &addr);
5352 /* jmp rel <callee> */
5353 val = ia32_create_Immediate(NULL, 0, 0xE9);
5354 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5355 addr.index, addr.mem, val);
5356 set_irn_pinned(store, get_irn_pinned(node));
5357 set_ia32_op_type(store, ia32_AddrModeD);
5358 set_ia32_ls_mode(store, mode_Bu);
5359 set_address(store, &addr);
5363 trampoline = be_transform_node(ptr);
5365 /* the callee is typically an immediate */
5366 if (is_SymConst(callee)) {
5367 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5369 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5370 add_ia32_am_offs_int(rel, -10);
5372 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5374 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5375 addr.index, addr.mem, rel);
5376 set_irn_pinned(store, get_irn_pinned(node));
5377 set_ia32_op_type(store, ia32_AddrModeD);
5378 set_ia32_ls_mode(store, mode_Iu);
5379 set_address(store, &addr);
5384 return new_r_Tuple(new_block, 2, in);
5388 * Transform Builtin node.
5390 static ir_node *gen_Builtin(ir_node *node)
5392 ir_builtin_kind kind = get_Builtin_kind(node);
5396 return gen_trap(node);
5397 case ir_bk_debugbreak:
5398 return gen_debugbreak(node);
5399 case ir_bk_return_address:
5400 return gen_return_address(node);
5401 case ir_bk_frame_address:
5402 return gen_frame_address(node);
5403 case ir_bk_prefetch:
5404 return gen_prefetch(node);
5406 return gen_ffs(node);
5408 return gen_clz(node);
5410 return gen_ctz(node);
5412 return gen_parity(node);
5413 case ir_bk_popcount:
5414 return gen_popcount(node);
5416 return gen_bswap(node);
5418 return gen_outport(node);
5420 return gen_inport(node);
5421 case ir_bk_inner_trampoline:
5422 return gen_inner_trampoline(node);
5424 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5428 * Transform Proj(Builtin) node.
5430 static ir_node *gen_Proj_Builtin(ir_node *proj)
5432 ir_node *node = get_Proj_pred(proj);
5433 ir_node *new_node = be_transform_node(node);
5434 ir_builtin_kind kind = get_Builtin_kind(node);
5437 case ir_bk_return_address:
5438 case ir_bk_frame_address:
5443 case ir_bk_popcount:
5445 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5448 case ir_bk_debugbreak:
5449 case ir_bk_prefetch:
5451 assert(get_Proj_proj(proj) == pn_Builtin_M);
5454 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5455 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5457 assert(get_Proj_proj(proj) == pn_Builtin_M);
5458 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5460 case ir_bk_inner_trampoline:
5461 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5462 return get_Tuple_pred(new_node, 1);
5464 assert(get_Proj_proj(proj) == pn_Builtin_M);
5465 return get_Tuple_pred(new_node, 0);
5468 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5471 static ir_node *gen_be_IncSP(ir_node *node)
5473 ir_node *res = be_duplicate_node(node);
5474 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5480 * Transform the Projs from a be_Call.
5482 static ir_node *gen_Proj_be_Call(ir_node *node)
5484 ir_node *call = get_Proj_pred(node);
5485 ir_node *new_call = be_transform_node(call);
5486 dbg_info *dbgi = get_irn_dbg_info(node);
5487 long proj = get_Proj_proj(node);
5488 ir_mode *mode = get_irn_mode(node);
5491 if (proj == pn_be_Call_M) {
5492 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5494 /* transform call modes */
5495 if (mode_is_data(mode)) {
5496 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5500 /* Map from be_Call to ia32_Call proj number */
5501 if (proj == pn_be_Call_sp) {
5502 proj = pn_ia32_Call_stack;
5503 } else if (proj == pn_be_Call_M) {
5504 proj = pn_ia32_Call_M;
5505 } else if (proj == pn_be_Call_X_except) {
5506 proj = pn_ia32_Call_X_except;
5507 } else if (proj == pn_be_Call_X_regular) {
5508 proj = pn_ia32_Call_X_regular;
5510 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5511 int const n_outs = arch_get_irn_n_outs(new_call);
5514 assert(proj >= pn_be_Call_first_res);
5515 assert(req->type & arch_register_req_type_limited);
5517 for (i = 0; i < n_outs; ++i) {
5518 arch_register_req_t const *const new_req
5519 = arch_get_irn_register_req_out(new_call, i);
5521 if (!(new_req->type & arch_register_req_type_limited) ||
5522 new_req->cls != req->cls ||
5523 *new_req->limited != *req->limited)
5532 res = new_rd_Proj(dbgi, new_call, mode, proj);
5534 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5536 case pn_ia32_Call_stack:
5537 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5540 case pn_ia32_Call_fpcw:
5541 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5548 static ir_node *gen_Proj_ASM(ir_node *node)
5550 ir_mode *mode = get_irn_mode(node);
5551 ir_node *pred = get_Proj_pred(node);
5552 ir_node *new_pred = be_transform_node(pred);
5553 long pos = get_Proj_proj(node);
5555 if (mode == mode_M) {
5556 pos = arch_get_irn_n_outs(new_pred)-1;
5557 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5559 } else if (mode_is_float(mode)) {
5562 panic("unexpected proj mode at ASM");
5565 return new_r_Proj(new_pred, mode, pos);
5569 * Transform and potentially renumber Proj nodes.
5571 static ir_node *gen_Proj(ir_node *node)
5573 ir_node *pred = get_Proj_pred(node);
5576 switch (get_irn_opcode(pred)) {
5578 return gen_Proj_Load(node);
5580 return gen_Proj_Store(node);
5582 return gen_Proj_ASM(node);
5584 return gen_Proj_Builtin(node);
5586 return gen_Proj_Div(node);
5588 return gen_Proj_Mod(node);
5590 return gen_Proj_CopyB(node);
5592 return gen_Proj_be_SubSP(node);
5594 return gen_Proj_be_AddSP(node);
5596 return gen_Proj_be_Call(node);
5598 proj = get_Proj_proj(node);
5600 case pn_Start_X_initial_exec: {
5601 ir_node *block = get_nodes_block(pred);
5602 ir_node *new_block = be_transform_node(block);
5603 dbg_info *dbgi = get_irn_dbg_info(node);
5604 /* we exchange the ProjX with a jump */
5605 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5613 if (is_ia32_l_FloattoLL(pred)) {
5614 return gen_Proj_l_FloattoLL(node);
5616 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5620 ir_mode *mode = get_irn_mode(node);
5621 if (ia32_mode_needs_gp_reg(mode)) {
5622 ir_node *new_pred = be_transform_node(pred);
5623 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5624 get_Proj_proj(node));
5625 new_proj->node_nr = node->node_nr;
5630 return be_duplicate_node(node);
5634 * Enters all transform functions into the generic pointer
5636 static void register_transformers(void)
5638 /* first clear the generic function pointer for all ops */
5639 be_start_transform_setup();
5641 be_set_transform_function(op_Add, gen_Add);
5642 be_set_transform_function(op_And, gen_And);
5643 be_set_transform_function(op_ASM, ia32_gen_ASM);
5644 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5645 be_set_transform_function(op_be_Call, gen_be_Call);
5646 be_set_transform_function(op_be_Copy, gen_be_Copy);
5647 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5648 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5649 be_set_transform_function(op_be_Return, gen_be_Return);
5650 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5651 be_set_transform_function(op_Builtin, gen_Builtin);
5652 be_set_transform_function(op_Cmp, gen_Cmp);
5653 be_set_transform_function(op_Cond, gen_Cond);
5654 be_set_transform_function(op_Const, gen_Const);
5655 be_set_transform_function(op_Conv, gen_Conv);
5656 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5657 be_set_transform_function(op_Div, gen_Div);
5658 be_set_transform_function(op_Eor, gen_Eor);
5659 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5660 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5661 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5662 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5663 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5664 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5665 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5666 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5667 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5668 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5669 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5670 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5671 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5672 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5673 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5674 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5675 be_set_transform_function(op_IJmp, gen_IJmp);
5676 be_set_transform_function(op_Jmp, gen_Jmp);
5677 be_set_transform_function(op_Load, gen_Load);
5678 be_set_transform_function(op_Minus, gen_Minus);
5679 be_set_transform_function(op_Mod, gen_Mod);
5680 be_set_transform_function(op_Mul, gen_Mul);
5681 be_set_transform_function(op_Mulh, gen_Mulh);
5682 be_set_transform_function(op_Mux, gen_Mux);
5683 be_set_transform_function(op_Not, gen_Not);
5684 be_set_transform_function(op_Or, gen_Or);
5685 be_set_transform_function(op_Phi, gen_Phi);
5686 be_set_transform_function(op_Proj, gen_Proj);
5687 be_set_transform_function(op_Rotl, gen_Rotl);
5688 be_set_transform_function(op_Shl, gen_Shl);
5689 be_set_transform_function(op_Shr, gen_Shr);
5690 be_set_transform_function(op_Shrs, gen_Shrs);
5691 be_set_transform_function(op_Store, gen_Store);
5692 be_set_transform_function(op_Sub, gen_Sub);
5693 be_set_transform_function(op_Switch, gen_Switch);
5694 be_set_transform_function(op_SymConst, gen_SymConst);
5695 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5697 be_set_upper_bits_clean_function(op_Mux, ia32_mux_upper_bits_clean);
5701 * Pre-transform all unknown and noreg nodes.
5703 static void ia32_pretransform_node(void)
5705 ir_graph *irg = current_ir_graph;
5706 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5708 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5709 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5710 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5711 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5712 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5714 nomem = get_irg_no_mem(irg);
5715 noreg_GP = ia32_new_NoReg_gp(irg);
5719 * Post-process all calls if we are in SSE mode.
5720 * The ABI requires that the results are in st0, copy them
5721 * to a xmm register.
5723 static void postprocess_fp_call_results(void)
5727 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5728 ir_node *call = call_list[i];
5729 ir_type *mtp = call_types[i];
5732 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5733 ir_type *res_tp = get_method_res_type(mtp, j);
5734 ir_node *res, *new_res;
5737 if (! is_atomic_type(res_tp)) {
5738 /* no floating point return */
5741 res_mode = get_type_mode(res_tp);
5742 if (! mode_is_float(res_mode)) {
5743 /* no floating point return */
5747 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5750 /* now patch the users */
5751 foreach_out_edge_safe(res, edge) {
5752 ir_node *succ = get_edge_src_irn(edge);
5755 if (be_is_Keep(succ))
5758 if (is_ia32_xStore(succ)) {
5759 /* an xStore can be patched into an vfst */
5760 dbg_info *db = get_irn_dbg_info(succ);
5761 ir_node *block = get_nodes_block(succ);
5762 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5763 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5764 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5765 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5766 ir_mode *mode = get_ia32_ls_mode(succ);
5768 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5769 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5770 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5771 if (is_ia32_use_frame(succ))
5772 set_ia32_use_frame(st);
5773 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5774 set_irn_pinned(st, get_irn_pinned(succ));
5775 set_ia32_op_type(st, ia32_AddrModeD);
5777 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5778 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5779 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5786 if (new_res == NULL) {
5787 dbg_info *db = get_irn_dbg_info(call);
5788 ir_node *block = get_nodes_block(call);
5789 ir_node *frame = get_irg_frame(current_ir_graph);
5790 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5791 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5792 ir_node *vfst, *xld, *new_mem;
5795 /* store st(0) on stack */
5796 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5798 set_ia32_op_type(vfst, ia32_AddrModeD);
5799 set_ia32_use_frame(vfst);
5801 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5803 /* load into SSE register */
5804 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5806 set_ia32_op_type(xld, ia32_AddrModeS);
5807 set_ia32_use_frame(xld);
5809 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5810 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5812 if (old_mem != NULL) {
5813 edges_reroute(old_mem, new_mem);
5817 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5823 /* do the transformation */
5824 void ia32_transform_graph(ir_graph *irg)
5828 register_transformers();
5829 initial_fpcw = NULL;
5830 ia32_no_pic_adjust = 0;
5832 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5834 be_timer_push(T_HEIGHTS);
5835 ia32_heights = heights_new(irg);
5836 be_timer_pop(T_HEIGHTS);
5837 ia32_calculate_non_address_mode_nodes(irg);
5839 /* the transform phase is not safe for CSE (yet) because several nodes get
5840 * attributes set after their creation */
5841 cse_last = get_opt_cse();
5844 call_list = NEW_ARR_F(ir_node *, 0);
5845 call_types = NEW_ARR_F(ir_type *, 0);
5846 be_transform_graph(irg, ia32_pretransform_node);
5848 if (ia32_cg_config.use_sse2)
5849 postprocess_fp_call_results();
5850 DEL_ARR_F(call_types);
5851 DEL_ARR_F(call_list);
5853 set_opt_cse(cse_last);
5855 ia32_free_non_address_mode_nodes();
5856 heights_free(ia32_heights);
5857 ia32_heights = NULL;
5860 void ia32_init_transform(void)
5862 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");