2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
55 #include "betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_dbg_stat.h"
64 #include "ia32_optimize.h"
65 #include "ia32_address_mode.h"
66 #include "ia32_architecture.h"
68 #include "gen_ia32_regalloc_if.h"
70 /* define this to construct SSE constants instead of load them */
71 #undef CONSTRUCT_SSE_CONST
73 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
74 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
76 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
78 static ir_node *old_initial_fpcw = NULL;
79 static ir_node *initial_fpcw = NULL;
80 int ia32_no_pic_adjust;
82 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
83 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
86 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
87 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
90 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
91 ir_node *op1, ir_node *op2);
93 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
94 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
96 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
97 ir_node *base, ir_node *index, ir_node *mem);
99 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
100 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
103 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
105 static ir_node *create_immediate_or_transform(ir_node *node,
106 char immediate_constraint_type);
108 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
109 dbg_info *dbgi, ir_node *block,
110 ir_node *op, ir_node *orig_node);
112 /* its enough to have those once */
113 static ir_node *nomem, *noreg_GP;
115 /** a list to postprocess all calls */
116 static ir_node **call_list;
117 static ir_type **call_types;
119 /** Return non-zero is a node represents the 0 constant. */
120 static bool is_Const_0(ir_node *node)
122 return is_Const(node) && is_Const_null(node);
125 /** Return non-zero is a node represents the 1 constant. */
126 static bool is_Const_1(ir_node *node)
128 return is_Const(node) && is_Const_one(node);
131 /** Return non-zero is a node represents the -1 constant. */
132 static bool is_Const_Minus_1(ir_node *node)
134 return is_Const(node) && is_Const_all_one(node);
138 * returns true if constant can be created with a simple float command
140 static bool is_simple_x87_Const(ir_node *node)
142 ir_tarval *tv = get_Const_tarval(node);
143 if (tarval_is_null(tv) || tarval_is_one(tv))
146 /* TODO: match all the other float constants */
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_sse_Const(ir_node *node)
155 ir_tarval *tv = get_Const_tarval(node);
156 ir_mode *mode = get_tarval_mode(tv);
161 if (tarval_is_null(tv)
162 #ifdef CONSTRUCT_SSE_CONST
167 #ifdef CONSTRUCT_SSE_CONST
168 if (mode == mode_D) {
169 unsigned val = get_tarval_sub_bits(tv, 0) |
170 (get_tarval_sub_bits(tv, 1) << 8) |
171 (get_tarval_sub_bits(tv, 2) << 16) |
172 (get_tarval_sub_bits(tv, 3) << 24);
174 /* lower 32bit are zero, really a 32bit constant */
177 #endif /* CONSTRUCT_SSE_CONST */
178 /* TODO: match all the other float constants */
183 * return NoREG or pic_base in case of PIC.
184 * This is necessary as base address for newly created symbols
186 static ir_node *get_symconst_base(void)
188 ir_graph *irg = current_ir_graph;
190 if (be_options.pic) {
191 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
192 return arch_env->impl->get_pic_base(irg);
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
207 ir_tarval *tv = get_Const_tarval(node);
209 assert(is_Const(node));
211 if (mode_is_float(mode)) {
212 ir_graph *irg = get_irn_irg(node);
213 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
214 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
219 if (ia32_cg_config.use_sse2) {
220 if (tarval_is_null(tv)) {
221 load = new_bd_ia32_xZero(dbgi, block);
222 set_ia32_ls_mode(load, mode);
224 #ifdef CONSTRUCT_SSE_CONST
225 } else if (tarval_is_one(tv)) {
226 int cnst = mode == mode_F ? 26 : 55;
227 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
228 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
229 ir_node *pslld, *psrld;
231 load = new_bd_ia32_xAllOnes(dbgi, block);
232 set_ia32_ls_mode(load, mode);
233 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
234 set_ia32_ls_mode(pslld, mode);
235 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
236 set_ia32_ls_mode(psrld, mode);
238 #endif /* CONSTRUCT_SSE_CONST */
239 } else if (mode == mode_F) {
240 /* we can place any 32bit constant by using a movd gp, sse */
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
245 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
246 load = new_bd_ia32_xMovd(dbgi, block, cnst);
247 set_ia32_ls_mode(load, mode);
251 #ifdef CONSTRUCT_SSE_CONST
252 if (mode == mode_D) {
253 unsigned val = get_tarval_sub_bits(tv, 0) |
254 (get_tarval_sub_bits(tv, 1) << 8) |
255 (get_tarval_sub_bits(tv, 2) << 16) |
256 (get_tarval_sub_bits(tv, 3) << 24);
258 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
259 ir_node *cnst, *psllq;
261 /* fine, lower 32bit are zero, produce 32bit value */
262 val = get_tarval_sub_bits(tv, 4) |
263 (get_tarval_sub_bits(tv, 5) << 8) |
264 (get_tarval_sub_bits(tv, 6) << 16) |
265 (get_tarval_sub_bits(tv, 7) << 24);
266 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
267 load = new_bd_ia32_xMovd(dbgi, block, cnst);
268 set_ia32_ls_mode(load, mode);
269 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
270 set_ia32_ls_mode(psllq, mode);
275 #endif /* CONSTRUCT_SSE_CONST */
276 floatent = ia32_create_float_const_entity(isa, tv, NULL);
278 base = get_symconst_base();
279 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
281 set_ia32_op_type(load, ia32_AddrModeS);
282 set_ia32_am_sc(load, floatent);
283 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
284 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
287 if (tarval_is_null(tv)) {
288 load = new_bd_ia32_vfldz(dbgi, block);
290 set_ia32_ls_mode(load, mode);
291 } else if (tarval_is_one(tv)) {
292 load = new_bd_ia32_vfld1(dbgi, block);
294 set_ia32_ls_mode(load, mode);
299 floatent = ia32_create_float_const_entity(isa, tv, NULL);
300 /* create_float_const_ent is smart and sometimes creates
302 ls_mode = get_type_mode(get_entity_type(floatent));
303 base = get_symconst_base();
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 } else { /* non-float mode */
321 tv = tarval_convert_to(tv, mode_Iu);
323 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
325 panic("couldn't convert constant tarval (%+F)", node);
327 val = get_tarval_long(tv);
329 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
330 SET_IA32_ORIG_NODE(cnst, node);
337 * Transforms a SymConst.
339 static ir_node *gen_SymConst(ir_node *node)
341 ir_node *old_block = get_nodes_block(node);
342 ir_node *block = be_transform_node(old_block);
343 dbg_info *dbgi = get_irn_dbg_info(node);
344 ir_mode *mode = get_irn_mode(node);
347 if (mode_is_float(mode)) {
348 if (ia32_cg_config.use_sse2)
349 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
351 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
352 set_ia32_am_sc(cnst, get_SymConst_entity(node));
353 set_ia32_use_frame(cnst);
357 if (get_SymConst_kind(node) != symconst_addr_ent) {
358 panic("backend only support symconst_addr_ent (at %+F)", node);
360 entity = get_SymConst_entity(node);
361 if (get_entity_owner(entity) == get_tls_type()) {
362 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
363 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
364 set_ia32_am_sc(lea, entity);
367 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
371 SET_IA32_ORIG_NODE(cnst, node);
376 static ir_type *make_array_type(ir_type *tp)
378 unsigned alignment = get_type_alignment_bytes(tp);
379 unsigned size = get_type_size_bytes(tp);
380 ir_type *res = new_type_array(1, tp);
381 set_type_alignment_bytes(res, alignment);
382 set_array_bounds_int(res, 0, 0, 2);
383 if (alignment > size)
385 set_type_size_bytes(res, 2 * size);
386 set_type_state(res, layout_fixed);
391 * Create a float[2] array type for the given atomic type.
393 * @param tp the atomic type
395 static ir_type *ia32_create_float_array(ir_type *tp)
397 ir_mode *mode = get_type_mode(tp);
400 if (mode == mode_F) {
401 static ir_type *float_F;
405 arr = float_F = make_array_type(tp);
406 } else if (mode == mode_D) {
407 static ir_type *float_D;
411 arr = float_D = make_array_type(tp);
413 static ir_type *float_E;
417 arr = float_E = make_array_type(tp);
422 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
423 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
425 static const struct {
427 const char *cnst_str;
429 } names [ia32_known_const_max] = {
430 { "C_sfp_sign", "0x80000000", 0 },
431 { "C_dfp_sign", "0x8000000000000000", 1 },
432 { "C_sfp_abs", "0x7FFFFFFF", 0 },
433 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
434 { "C_ull_bias", "0x10000000000000000", 2 }
436 static ir_entity *ent_cache[ia32_known_const_max];
438 ir_entity *ent = ent_cache[kct];
441 ir_graph *irg = current_ir_graph;
442 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
443 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
444 const char *cnst_str = names[kct].cnst_str;
445 ident *name = new_id_from_str(names[kct].name);
448 switch (names[kct].mode) {
449 case 0: mode = mode_Iu; break;
450 case 1: mode = mode_Lu; break;
451 case 2: mode = mode_F; break;
452 default: panic("internal compiler error");
454 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
456 if (kct == ia32_ULLBIAS) {
457 ir_type *type = ia32_get_prim_type(mode_F);
458 ir_type *atype = ia32_create_float_array(type);
459 ir_initializer_t *initializer;
461 ent = new_entity(get_glob_type(), name, atype);
463 set_entity_ld_ident(ent, name);
464 set_entity_visibility(ent, ir_visibility_private);
465 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
467 initializer = create_initializer_compound(2);
468 set_initializer_compound_value(initializer, 0,
469 create_initializer_tarval(get_mode_null(mode)));
470 set_initializer_compound_value(initializer, 1,
471 create_initializer_tarval(tv));
472 set_entity_initializer(ent, initializer);
474 ent = ia32_create_float_const_entity(isa, tv, name);
476 /* cache the entry */
477 ent_cache[kct] = ent;
480 return ent_cache[kct];
484 * return true if the node is a Proj(Load) and could be used in source address
485 * mode for another node. Will return only true if the @p other node is not
486 * dependent on the memory of the Load (for binary operations use the other
487 * input here, for unary operations use NULL).
489 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
490 ir_node *other, ir_node *other2,
497 /* float constants are always available */
498 if (is_Const(node)) {
499 mode = get_irn_mode(node);
500 if (mode_is_float(mode)) {
501 ir_tarval *tv = get_Const_tarval(node);
502 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
504 if (ia32_cg_config.use_sse2) {
505 if (is_simple_sse_Const(node))
508 if (is_simple_x87_Const(node))
511 if (get_irn_n_edges(node) > 1)
520 load = get_Proj_pred(node);
521 pn = get_Proj_proj(node);
522 if (!is_Load(load) || pn != pn_Load_res)
524 if (get_nodes_block(load) != block)
526 mode = get_irn_mode(node);
527 /* we can't fold mode_E AM */
528 if (mode == ia32_mode_E)
530 /* we only use address mode if we're the only user of the load */
531 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
533 /* in some edge cases with address mode we might reach the load normally
534 * and through some AM sequence, if it is already materialized then we
535 * can't create an AM node from it */
536 if (be_is_transformed(node))
539 /* don't do AM if other node inputs depend on the load (via mem-proj) */
540 if (other != NULL && ia32_prevents_AM(block, load, other))
543 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
549 typedef struct ia32_address_mode_t ia32_address_mode_t;
550 struct ia32_address_mode_t {
555 ia32_op_type_t op_type;
559 unsigned commutative : 1;
560 unsigned ins_permuted : 1;
563 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
565 /* construct load address */
566 memset(addr, 0, sizeof(addr[0]));
567 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
569 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
570 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
571 addr->mem = be_transform_node(mem);
574 static void build_address(ia32_address_mode_t *am, ir_node *node,
575 ia32_create_am_flags_t flags)
577 ia32_address_t *addr = &am->addr;
583 /* floating point immediates */
584 if (is_Const(node)) {
585 ir_graph *irg = get_irn_irg(node);
586 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
587 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
588 ir_tarval *tv = get_Const_tarval(node);
589 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
590 addr->base = get_symconst_base();
591 addr->index = noreg_GP;
593 addr->symconst_ent = entity;
594 addr->tls_segment = false;
596 am->ls_mode = get_type_mode(get_entity_type(entity));
597 am->pinned = op_pin_state_floats;
601 load = get_Proj_pred(node);
602 ptr = get_Load_ptr(load);
603 mem = get_Load_mem(load);
604 new_mem = be_transform_node(mem);
605 am->pinned = get_irn_pinned(load);
606 am->ls_mode = get_Load_mode(load);
607 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
610 /* construct load address */
611 ia32_create_address_mode(addr, ptr, flags);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
618 static void set_address(ir_node *node, const ia32_address_t *addr)
620 set_ia32_am_scale(node, addr->scale);
621 set_ia32_am_sc(node, addr->symconst_ent);
622 set_ia32_am_offs_int(node, addr->offset);
623 set_ia32_am_tls_segment(node, addr->tls_segment);
624 if (addr->symconst_sign)
625 set_ia32_am_sc_sign(node);
627 set_ia32_use_frame(node);
628 set_ia32_frame_ent(node, addr->frame_entity);
632 * Apply attributes of a given address mode to a node.
634 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
636 set_address(node, &am->addr);
638 set_ia32_op_type(node, am->op_type);
639 set_ia32_ls_mode(node, am->ls_mode);
640 if (am->pinned == op_pin_state_pinned) {
641 /* beware: some nodes are already pinned and did not allow to change the state */
642 if (get_irn_pinned(node) != op_pin_state_pinned)
643 set_irn_pinned(node, op_pin_state_pinned);
646 set_ia32_commutative(node);
650 * Check, if a given node is a Down-Conv, ie. a integer Conv
651 * from a mode with a mode with more bits to a mode with lesser bits.
652 * Moreover, we return only true if the node has not more than 1 user.
654 * @param node the node
655 * @return non-zero if node is a Down-Conv
657 static int is_downconv(const ir_node *node)
665 src_mode = get_irn_mode(get_Conv_op(node));
666 dest_mode = get_irn_mode(node);
668 ia32_mode_needs_gp_reg(src_mode) &&
669 ia32_mode_needs_gp_reg(dest_mode) &&
670 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
673 /** Skip all Down-Conv's on a given node and return the resulting node. */
674 ir_node *ia32_skip_downconv(ir_node *node)
676 while (is_downconv(node)) {
677 /* we only want to skip the conv when we're the only user
678 * (because this test is used in the context of address-mode selection
679 * and we don't want to use address mode for multiple users) */
680 if (get_irn_n_edges(node) > 1)
683 node = get_Conv_op(node);
689 static bool is_float_downconv(const ir_node *node)
693 ir_node *pred = get_Conv_op(node);
694 ir_mode *pred_mode = get_irn_mode(pred);
695 ir_mode *mode = get_irn_mode(node);
696 return mode_is_float(pred_mode)
697 && get_mode_size_bits(mode) <= get_mode_size_bits(pred_mode);
700 static ir_node *ia32_skip_float_downconv(ir_node *node)
702 while (is_float_downconv(node)) {
703 node = get_Conv_op(node);
708 static bool is_sameconv(ir_node *node)
716 /* we only want to skip the conv when we're the only user
717 * (because this test is used in the context of address-mode selection
718 * and we don't want to use address mode for multiple users) */
719 if (get_irn_n_edges(node) > 1)
722 src_mode = get_irn_mode(get_Conv_op(node));
723 dest_mode = get_irn_mode(node);
725 ia32_mode_needs_gp_reg(src_mode) &&
726 ia32_mode_needs_gp_reg(dest_mode) &&
727 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
730 /** Skip all signedness convs */
731 static ir_node *ia32_skip_sameconv(ir_node *node)
733 while (is_sameconv(node)) {
734 node = get_Conv_op(node);
740 static ir_node *transform_sext(ir_node *node, ir_node *orig_node)
742 ir_mode *mode = get_irn_mode(node);
743 ir_node *block = get_nodes_block(node);
744 dbg_info *dbgi = get_irn_dbg_info(node);
745 return create_I2I_Conv(mode, mode_Is, dbgi, block, node, orig_node);
748 static ir_node *transform_zext(ir_node *node, ir_node *orig_node)
750 ir_mode *mode = get_irn_mode(node);
751 ir_node *block = get_nodes_block(node);
752 dbg_info *dbgi = get_irn_dbg_info(node);
753 /* normalize to an unsigned mode */
754 switch (get_mode_size_bits(mode)) {
755 case 8: mode = mode_Bu; break;
756 case 16: mode = mode_Hu; break;
758 panic("ia32: invalid mode in zest: %+F", node);
760 return create_I2I_Conv(mode, mode_Iu, dbgi, block, node, orig_node);
763 static ir_node *transform_upconv(ir_node *node, ir_node *orig_node)
765 ir_mode *mode = get_irn_mode(node);
766 if (mode_is_signed(mode)) {
767 return transform_sext(node, orig_node);
769 return transform_zext(node, orig_node);
774 * matches operands of a node into ia32 addressing/operand modes. This covers
775 * usage of source address mode, immediates, operations with non 32-bit modes,
777 * The resulting data is filled into the @p am struct. block is the block
778 * of the node whose arguments are matched. op1, op2 are the first and second
779 * input that are matched (op1 may be NULL). other_op is another unrelated
780 * input that is not matched! but which is needed sometimes to check if AM
781 * for op1/op2 is legal.
782 * @p flags describes the supported modes of the operation in detail.
784 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
785 ir_node *op1, ir_node *op2, ir_node *other_op,
788 ia32_address_t *addr = &am->addr;
789 ir_mode *mode = get_irn_mode(op2);
790 int mode_bits = get_mode_size_bits(mode);
791 ir_node *new_op1, *new_op2;
793 unsigned commutative;
794 int use_am_and_immediates;
797 memset(am, 0, sizeof(am[0]));
799 commutative = (flags & match_commutative) != 0;
800 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
801 use_am = (flags & match_am) != 0;
802 use_immediate = (flags & match_immediate) != 0;
803 assert(!use_am_and_immediates || use_immediate);
806 assert(!commutative || op1 != NULL);
807 assert(use_am || !(flags & match_8bit_am));
808 assert(use_am || !(flags & match_16bit_am));
810 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
811 (mode_bits == 16 && !(flags & match_16bit_am))) {
815 /* we can simply skip downconvs for mode neutral nodes: the upper bits
816 * can be random for these operations */
817 if (flags & match_mode_neutral) {
818 op2 = ia32_skip_downconv(op2);
820 op1 = ia32_skip_downconv(op1);
823 op2 = ia32_skip_sameconv(op2);
825 op1 = ia32_skip_sameconv(op1);
829 /* match immediates. firm nodes are normalized: constants are always on the
832 if (!(flags & match_try_am) && use_immediate) {
833 new_op2 = ia32_try_create_Immediate(op2, 0);
836 if (new_op2 == NULL &&
837 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
838 build_address(am, op2, ia32_create_am_normal);
839 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
840 if (mode_is_float(mode)) {
841 new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
845 am->op_type = ia32_AddrModeS;
846 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
848 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
850 build_address(am, op1, ia32_create_am_normal);
852 if (mode_is_float(mode)) {
853 noreg = ia32_new_NoReg_vfp(current_ir_graph);
858 if (new_op2 != NULL) {
861 new_op1 = be_transform_node(op2);
863 am->ins_permuted = true;
865 am->op_type = ia32_AddrModeS;
867 am->op_type = ia32_Normal;
869 if (flags & match_try_am) {
875 mode = get_irn_mode(op2);
876 if (get_mode_size_bits(mode) != 32
877 && (flags & (match_mode_neutral | match_upconv | match_zero_ext))) {
878 if (flags & match_upconv) {
879 new_op1 = (op1 == NULL ? NULL : transform_upconv(op1, op1));
881 new_op2 = transform_upconv(op2, op2);
882 } else if (flags & match_zero_ext) {
883 new_op1 = (op1 == NULL ? NULL : transform_zext(op1, op1));
885 new_op2 = transform_zext(op2, op2);
887 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
889 new_op2 = be_transform_node(op2);
890 assert(flags & match_mode_neutral);
894 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
896 new_op2 = be_transform_node(op2);
900 if (addr->base == NULL)
901 addr->base = noreg_GP;
902 if (addr->index == NULL)
903 addr->index = noreg_GP;
904 if (addr->mem == NULL)
907 am->new_op1 = new_op1;
908 am->new_op2 = new_op2;
909 am->commutative = commutative;
913 * "Fixes" a node that uses address mode by turning it into mode_T
914 * and returning a pn_ia32_res Proj.
916 * @param node the node
917 * @param am its address mode
919 * @return a Proj(pn_ia32_res) if a memory address mode is used,
922 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
927 if (am->mem_proj == NULL)
930 /* we have to create a mode_T so the old MemProj can attach to us */
931 mode = get_irn_mode(node);
932 load = get_Proj_pred(am->mem_proj);
934 be_set_transformed_node(load, node);
936 if (mode != mode_T) {
937 set_irn_mode(node, mode_T);
938 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
945 * Construct a standard binary operation, set AM and immediate if required.
947 * @param node The original node for which the binop is created
948 * @param op1 The first operand
949 * @param op2 The second operand
950 * @param func The node constructor function
951 * @return The constructed ia32 node.
953 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
954 construct_binop_func *func, match_flags_t flags)
957 ir_node *block, *new_block, *new_node;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 block = get_nodes_block(node);
962 match_arguments(&am, block, op1, op2, NULL, flags);
964 dbgi = get_irn_dbg_info(node);
965 new_block = be_transform_node(block);
966 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
981 * Generic names for the inputs of an ia32 binary op.
984 n_ia32_l_binop_left, /**< ia32 left input */
985 n_ia32_l_binop_right, /**< ia32 right input */
986 n_ia32_l_binop_eflags /**< ia32 eflags input */
988 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
989 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
990 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
991 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
992 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
993 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
996 * Construct a binary operation which also consumes the eflags.
998 * @param node The node to transform
999 * @param func The node constructor function
1000 * @param flags The match flags
1001 * @return The constructor ia32 node
1003 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1004 match_flags_t flags)
1006 ir_node *src_block = get_nodes_block(node);
1007 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1008 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1009 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1011 ir_node *block, *new_node, *new_eflags;
1012 ia32_address_mode_t am;
1013 ia32_address_t *addr = &am.addr;
1015 match_arguments(&am, src_block, op1, op2, eflags, flags);
1017 dbgi = get_irn_dbg_info(node);
1018 block = be_transform_node(src_block);
1019 new_eflags = be_transform_node(eflags);
1020 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1021 am.new_op1, am.new_op2, new_eflags);
1022 set_am_attributes(new_node, &am);
1023 /* we can't use source address mode anymore when using immediates */
1024 if (!(flags & match_am_and_immediates) &&
1025 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1026 set_ia32_am_support(new_node, ia32_am_none);
1027 SET_IA32_ORIG_NODE(new_node, node);
1029 new_node = fix_mem_proj(new_node, &am);
1034 static ir_node *get_fpcw(void)
1036 if (initial_fpcw != NULL)
1037 return initial_fpcw;
1039 initial_fpcw = be_transform_node(old_initial_fpcw);
1040 return initial_fpcw;
1043 static ir_node *skip_float_upconv(ir_node *node)
1045 ir_mode *mode = get_irn_mode(node);
1046 assert(mode_is_float(mode));
1048 while (is_Conv(node)) {
1049 ir_node *pred = get_Conv_op(node);
1050 ir_mode *pred_mode = get_irn_mode(pred);
1053 * suboptimal, but without this check the address mode matcher
1054 * can incorrectly think that something has only 1 user
1056 if (get_irn_n_edges(node) > 1)
1059 if (!mode_is_float(pred_mode)
1060 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1068 static void check_x87_floatmode(ir_mode *mode)
1070 if (mode != ia32_mode_E) {
1071 panic("ia32: x87 only supports x86 extended float mode");
1076 * Construct a standard binary operation, set AM and immediate if required.
1078 * @param op1 The first operand
1079 * @param op2 The second operand
1080 * @param func The node constructor function
1081 * @return The constructed ia32 node.
1083 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1084 construct_binop_float_func *func)
1090 ia32_address_mode_t am;
1091 ia32_address_t *addr = &am.addr;
1092 ia32_x87_attr_t *attr;
1093 /* All operations are considered commutative, because there are reverse
1095 match_flags_t flags = match_commutative | match_am;
1097 = is_Div(node) ? get_Div_resmode(node) : get_irn_mode(node);
1098 check_x87_floatmode(mode);
1100 op1 = skip_float_upconv(op1);
1101 op2 = skip_float_upconv(op2);
1103 block = get_nodes_block(node);
1104 match_arguments(&am, block, op1, op2, NULL, flags);
1106 dbgi = get_irn_dbg_info(node);
1107 new_block = be_transform_node(block);
1108 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1109 am.new_op1, am.new_op2, get_fpcw());
1110 set_am_attributes(new_node, &am);
1112 attr = get_ia32_x87_attr(new_node);
1113 attr->attr.data.ins_permuted = am.ins_permuted;
1115 SET_IA32_ORIG_NODE(new_node, node);
1117 new_node = fix_mem_proj(new_node, &am);
1123 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1125 * @param op1 The first operand
1126 * @param op2 The second operand
1127 * @param func The node constructor function
1128 * @return The constructed ia32 node.
1130 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1131 construct_shift_func *func,
1132 match_flags_t flags)
1134 ir_mode *mode = get_irn_mode(node);
1136 assert(! mode_is_float(mode));
1137 assert(flags & match_immediate);
1138 assert((flags & ~(match_mode_neutral | match_zero_ext | match_upconv | match_immediate)) == 0);
1140 if (get_mode_modulo_shift(mode) != 32) {
1141 /* TODO: implement special cases for non-modulo shifts */
1142 panic("modulo shift!=32 not supported by ia32 backend");
1147 if (flags & match_mode_neutral) {
1148 op1 = ia32_skip_downconv(op1);
1149 new_op1 = be_transform_node(op1);
1151 op1 = ia32_skip_sameconv(op1);
1152 if (get_mode_size_bits(mode) != 32) {
1153 if (flags & match_upconv) {
1154 new_op1 = transform_upconv(op1, node);
1155 } else if (flags & match_zero_ext) {
1156 new_op1 = transform_zext(op1, node);
1158 /* match_mode_neutral not handled here because it makes no
1159 * sense for shift operations */
1160 panic("ia32 code selection failed for %+F", node);
1163 new_op1 = be_transform_node(op1);
1167 /* the shift amount can be any mode that is bigger than 5 bits, since all
1168 * other bits are ignored anyway */
1169 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1170 ir_node *const op = get_Conv_op(op2);
1171 if (mode_is_float(get_irn_mode(op)))
1174 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1176 new_op2 = create_immediate_or_transform(op2, 0);
1178 dbg_info *dbgi = get_irn_dbg_info(node);
1179 ir_node *block = get_nodes_block(node);
1180 ir_node *new_block = be_transform_node(block);
1181 ir_node *new_node = func(dbgi, new_block, new_op1, new_op2);
1182 SET_IA32_ORIG_NODE(new_node, node);
1184 /* lowered shift instruction may have a dependency operand, handle it here */
1185 if (get_irn_arity(node) == 3) {
1186 /* we have a dependency */
1187 ir_node* dep = get_irn_n(node, 2);
1188 if (get_irn_n_edges(dep) > 1) {
1189 /* ... which has at least one user other than 'node' */
1190 ir_node *new_dep = be_transform_node(dep);
1191 add_irn_dep(new_node, new_dep);
1200 * Construct a standard unary operation, set AM and immediate if required.
1202 * @param op The operand
1203 * @param func The node constructor function
1204 * @return The constructed ia32 node.
1206 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1207 match_flags_t flags)
1210 ir_node *block, *new_block, *new_op, *new_node;
1212 assert(flags == 0 || flags == match_mode_neutral);
1213 if (flags & match_mode_neutral) {
1214 op = ia32_skip_downconv(op);
1217 new_op = be_transform_node(op);
1218 dbgi = get_irn_dbg_info(node);
1219 block = get_nodes_block(node);
1220 new_block = be_transform_node(block);
1221 new_node = func(dbgi, new_block, new_op);
1223 SET_IA32_ORIG_NODE(new_node, node);
1228 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1229 ia32_address_t *addr)
1239 base = be_transform_node(base);
1246 idx = be_transform_node(idx);
1249 /* segment overrides are ineffective for Leas :-( so we have to patch
1251 if (addr->tls_segment) {
1252 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1253 assert(addr->symconst_ent != NULL);
1254 if (base == noreg_GP)
1257 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1258 addr->tls_segment = false;
1261 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1262 set_address(res, addr);
1268 * Returns non-zero if a given address mode has a symbolic or
1269 * numerical offset != 0.
1271 static int am_has_immediates(const ia32_address_t *addr)
1273 return addr->offset != 0 || addr->symconst_ent != NULL
1274 || addr->frame_entity || addr->use_frame;
1277 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1278 ir_node *high, ir_node *low,
1282 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1283 * op1 - target to be shifted
1284 * op2 - contains bits to be shifted into target
1286 * Only op3 can be an immediate.
1288 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1289 ir_node *high, ir_node *low, ir_node *count,
1290 new_shiftd_func func)
1292 ir_node *new_block = be_transform_node(block);
1293 ir_node *new_high = be_transform_node(high);
1294 ir_node *new_low = be_transform_node(low);
1298 /* the shift amount can be any mode that is bigger than 5 bits, since all
1299 * other bits are ignored anyway */
1300 while (is_Conv(count) &&
1301 get_irn_n_edges(count) == 1 &&
1302 mode_is_int(get_irn_mode(count))) {
1303 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1304 count = get_Conv_op(count);
1306 new_count = create_immediate_or_transform(count, 0);
1308 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1313 * test wether 2 values result in 'x' and '32-x' when interpreted as a shift
1316 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1318 if (is_Const(value1) && is_Const(value2)) {
1319 ir_tarval *tv1 = get_Const_tarval(value1);
1320 ir_tarval *tv2 = get_Const_tarval(value2);
1321 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1322 long v1 = get_tarval_long(tv1);
1323 long v2 = get_tarval_long(tv2);
1324 return v1 <= v2 && v2 == 32-v1;
1330 static ir_node *match_64bit_shift(ir_node *node)
1332 ir_node *op1 = get_binop_left(node);
1333 ir_node *op2 = get_binop_right(node);
1334 assert(is_Or(node) || is_Add(node));
1342 /* match ShlD operation */
1343 if (is_Shl(op1) && is_Shr(op2)) {
1344 ir_node *shl_right = get_Shl_right(op1);
1345 ir_node *shl_left = get_Shl_left(op1);
1346 ir_node *shr_right = get_Shr_right(op2);
1347 ir_node *shr_left = get_Shr_left(op2);
1348 /* constant ShlD operation */
1349 if (is_complementary_shifts(shl_right, shr_right)) {
1350 dbg_info *dbgi = get_irn_dbg_info(node);
1351 ir_node *block = get_nodes_block(node);
1352 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1355 /* constant ShrD operation */
1356 if (is_complementary_shifts(shr_right, shl_right)) {
1357 dbg_info *dbgi = get_irn_dbg_info(node);
1358 ir_node *block = get_nodes_block(node);
1359 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1362 /* lower_dw produces the following for ShlD:
1363 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1364 if (is_Shr(shr_left) && is_Not(shr_right)
1365 && is_Const_1(get_Shr_right(shr_left))
1366 && get_Not_op(shr_right) == shl_right) {
1367 dbg_info *dbgi = get_irn_dbg_info(node);
1368 ir_node *block = get_nodes_block(node);
1369 ir_node *val_h = get_Shr_left(shr_left);
1370 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1373 /* lower_dw produces the following for ShrD:
1374 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1375 if (is_Shl(shl_left) && is_Not(shl_right)
1376 && is_Const_1(get_Shl_right(shl_left))
1377 && get_Not_op(shl_right) == shr_right) {
1378 dbg_info *dbgi = get_irn_dbg_info(node);
1379 ir_node *block = get_nodes_block(node);
1380 ir_node *val_h = get_Shl_left(shl_left);
1381 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1390 * Creates an ia32 Add.
1392 * @return the created ia32 Add node
1394 static ir_node *gen_Add(ir_node *node)
1396 ir_mode *mode = get_irn_mode(node);
1397 ir_node *op1 = get_Add_left(node);
1398 ir_node *op2 = get_Add_right(node);
1400 ir_node *block, *new_block, *new_node, *add_immediate_op;
1401 ia32_address_t addr;
1402 ia32_address_mode_t am;
1404 new_node = match_64bit_shift(node);
1405 if (new_node != NULL)
1408 if (mode_is_float(mode)) {
1409 if (ia32_cg_config.use_sse2)
1410 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1411 match_commutative | match_am);
1413 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1416 ia32_mark_non_am(node);
1420 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1421 * 1. Add with immediate -> Lea
1422 * 2. Add with possible source address mode -> Add
1423 * 3. Otherwise -> Lea
1425 memset(&addr, 0, sizeof(addr));
1426 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1427 add_immediate_op = NULL;
1429 dbgi = get_irn_dbg_info(node);
1430 block = get_nodes_block(node);
1431 new_block = be_transform_node(block);
1434 if (addr.base == NULL && addr.index == NULL) {
1435 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1436 addr.symconst_sign, 0, addr.offset);
1437 SET_IA32_ORIG_NODE(new_node, node);
1440 /* add with immediate? */
1441 if (addr.index == NULL) {
1442 add_immediate_op = addr.base;
1443 } else if (addr.base == NULL && addr.scale == 0) {
1444 add_immediate_op = addr.index;
1447 if (add_immediate_op != NULL) {
1448 if (!am_has_immediates(&addr)) {
1449 #ifdef DEBUG_libfirm
1450 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1453 return be_transform_node(add_immediate_op);
1456 new_node = create_lea_from_address(dbgi, new_block, &addr);
1457 SET_IA32_ORIG_NODE(new_node, node);
1461 /* test if we can use source address mode */
1462 match_arguments(&am, block, op1, op2, NULL, match_commutative
1463 | match_mode_neutral | match_am | match_immediate | match_try_am);
1465 /* construct an Add with source address mode */
1466 if (am.op_type == ia32_AddrModeS) {
1467 ia32_address_t *am_addr = &am.addr;
1468 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1469 am_addr->index, am_addr->mem, am.new_op1,
1471 set_am_attributes(new_node, &am);
1472 SET_IA32_ORIG_NODE(new_node, node);
1474 new_node = fix_mem_proj(new_node, &am);
1479 /* otherwise construct a lea */
1480 new_node = create_lea_from_address(dbgi, new_block, &addr);
1481 SET_IA32_ORIG_NODE(new_node, node);
1486 * Creates an ia32 Mul.
1488 * @return the created ia32 Mul node
1490 static ir_node *gen_Mul(ir_node *node)
1492 ir_node *op1 = get_Mul_left(node);
1493 ir_node *op2 = get_Mul_right(node);
1494 ir_mode *mode = get_irn_mode(node);
1496 if (mode_is_float(mode)) {
1497 if (ia32_cg_config.use_sse2)
1498 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1499 match_commutative | match_am);
1501 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1503 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1504 match_commutative | match_am | match_mode_neutral |
1505 match_immediate | match_am_and_immediates);
1509 * Creates an ia32 Mulh.
1510 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1511 * this result while Mul returns the lower 32 bit.
1513 * @return the created ia32 Mulh node
1515 static ir_node *gen_Mulh(ir_node *node)
1517 dbg_info *dbgi = get_irn_dbg_info(node);
1518 ir_node *op1 = get_Mulh_left(node);
1519 ir_node *op2 = get_Mulh_right(node);
1520 ir_mode *mode = get_irn_mode(node);
1522 ir_node *proj_res_high;
1524 if (get_mode_size_bits(mode) != 32) {
1525 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1528 if (mode_is_signed(mode)) {
1529 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1530 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1532 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1533 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1535 return proj_res_high;
1539 * Creates an ia32 And.
1541 * @return The created ia32 And node
1543 static ir_node *gen_And(ir_node *node)
1545 ir_node *op1 = get_And_left(node);
1546 ir_node *op2 = get_And_right(node);
1547 assert(! mode_is_float(get_irn_mode(node)));
1549 /* is it a zero extension? */
1550 if (is_Const(op2)) {
1551 ir_tarval *tv = get_Const_tarval(op2);
1552 long v = get_tarval_long(tv);
1554 if (v == 0xFF || v == 0xFFFF) {
1555 dbg_info *dbgi = get_irn_dbg_info(node);
1556 ir_node *block = get_nodes_block(node);
1563 assert(v == 0xFFFF);
1566 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1571 return gen_binop(node, op1, op2, new_bd_ia32_And,
1572 match_commutative | match_mode_neutral | match_am | match_immediate);
1576 * Creates an ia32 Or.
1578 * @return The created ia32 Or node
1580 static ir_node *gen_Or(ir_node *node)
1582 ir_node *op1 = get_Or_left(node);
1583 ir_node *op2 = get_Or_right(node);
1586 res = match_64bit_shift(node);
1590 assert (! mode_is_float(get_irn_mode(node)));
1591 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1592 | match_mode_neutral | match_am | match_immediate);
1598 * Creates an ia32 Eor.
1600 * @return The created ia32 Eor node
1602 static ir_node *gen_Eor(ir_node *node)
1604 ir_node *op1 = get_Eor_left(node);
1605 ir_node *op2 = get_Eor_right(node);
1607 assert(! mode_is_float(get_irn_mode(node)));
1608 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1609 | match_mode_neutral | match_am | match_immediate);
1614 * Creates an ia32 Sub.
1616 * @return The created ia32 Sub node
1618 static ir_node *gen_Sub(ir_node *node)
1620 ir_node *op1 = get_Sub_left(node);
1621 ir_node *op2 = get_Sub_right(node);
1622 ir_mode *mode = get_irn_mode(node);
1624 if (mode_is_float(mode)) {
1625 if (ia32_cg_config.use_sse2)
1626 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1628 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1631 if (is_Const(op2)) {
1632 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1636 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1637 | match_am | match_immediate);
1640 static ir_node *transform_AM_mem(ir_node *const block,
1641 ir_node *const src_val,
1642 ir_node *const src_mem,
1643 ir_node *const am_mem)
1645 if (is_NoMem(am_mem)) {
1646 return be_transform_node(src_mem);
1647 } else if (is_Proj(src_val) &&
1649 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1650 /* avoid memory loop */
1652 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1653 ir_node *const ptr_pred = get_Proj_pred(src_val);
1654 int const arity = get_Sync_n_preds(src_mem);
1659 NEW_ARR_A(ir_node*, ins, arity + 1);
1661 /* NOTE: This sometimes produces dead-code because the old sync in
1662 * src_mem might not be used anymore, we should detect this case
1663 * and kill the sync... */
1664 for (i = arity - 1; i >= 0; --i) {
1665 ir_node *const pred = get_Sync_pred(src_mem, i);
1667 /* avoid memory loop */
1668 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1671 ins[n++] = be_transform_node(pred);
1674 if (n==1 && ins[0] == am_mem) {
1676 /* creating a new Sync and relying on CSE may fail,
1677 * if am_mem is a ProjM, which does not yet verify. */
1681 return new_r_Sync(block, n, ins);
1685 ins[0] = be_transform_node(src_mem);
1687 return new_r_Sync(block, 2, ins);
1692 * Create a 32bit to 64bit signed extension.
1694 * @param dbgi debug info
1695 * @param block the block where node nodes should be placed
1696 * @param val the value to extend
1697 * @param orig the original node
1699 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1700 ir_node *val, const ir_node *orig)
1705 if (ia32_cg_config.use_short_sex_eax) {
1706 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1707 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1709 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1710 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1712 SET_IA32_ORIG_NODE(res, orig);
1717 * Generates an ia32 Div with additional infrastructure for the
1718 * register allocator if needed.
1720 static ir_node *create_Div(ir_node *node)
1722 dbg_info *dbgi = get_irn_dbg_info(node);
1723 ir_node *block = get_nodes_block(node);
1724 ir_node *new_block = be_transform_node(block);
1725 int throws_exception = ir_throws_exception(node);
1732 ir_node *sign_extension;
1733 ia32_address_mode_t am;
1734 ia32_address_t *addr = &am.addr;
1736 /* the upper bits have random contents for smaller modes */
1737 switch (get_irn_opcode(node)) {
1739 op1 = get_Div_left(node);
1740 op2 = get_Div_right(node);
1741 mem = get_Div_mem(node);
1742 mode = get_Div_resmode(node);
1745 op1 = get_Mod_left(node);
1746 op2 = get_Mod_right(node);
1747 mem = get_Mod_mem(node);
1748 mode = get_Mod_resmode(node);
1751 panic("invalid divmod node %+F", node);
1754 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv);
1756 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1757 is the memory of the consumed address. We can have only the second op as address
1758 in Div nodes, so check only op2. */
1759 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1761 if (mode_is_signed(mode)) {
1762 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1763 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1764 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1766 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1768 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1769 addr->index, new_mem, am.new_op2,
1770 am.new_op1, sign_extension);
1772 ir_set_throws_exception(new_node, throws_exception);
1774 set_irn_pinned(new_node, get_irn_pinned(node));
1776 set_am_attributes(new_node, &am);
1777 SET_IA32_ORIG_NODE(new_node, node);
1779 new_node = fix_mem_proj(new_node, &am);
1785 * Generates an ia32 Mod.
1787 static ir_node *gen_Mod(ir_node *node)
1789 return create_Div(node);
1793 * Generates an ia32 Div.
1795 static ir_node *gen_Div(ir_node *node)
1797 ir_mode *mode = get_Div_resmode(node);
1798 if (mode_is_float(mode)) {
1799 ir_node *op1 = get_Div_left(node);
1800 ir_node *op2 = get_Div_right(node);
1802 if (ia32_cg_config.use_sse2) {
1803 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1805 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1809 return create_Div(node);
1813 * Creates an ia32 Shl.
1815 * @return The created ia32 Shl node
1817 static ir_node *gen_Shl(ir_node *node)
1819 ir_node *left = get_Shl_left(node);
1820 ir_node *right = get_Shl_right(node);
1822 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1823 match_mode_neutral | match_immediate);
1827 * Creates an ia32 Shr.
1829 * @return The created ia32 Shr node
1831 static ir_node *gen_Shr(ir_node *node)
1833 ir_node *left = get_Shr_left(node);
1834 ir_node *right = get_Shr_right(node);
1836 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
1837 match_immediate | match_zero_ext);
1841 * Creates an ia32 Sar.
1843 * @return The created ia32 Shrs node
1845 static ir_node *gen_Shrs(ir_node *node)
1847 ir_node *left = get_Shrs_left(node);
1848 ir_node *right = get_Shrs_right(node);
1850 if (is_Const(right)) {
1851 ir_tarval *tv = get_Const_tarval(right);
1852 long val = get_tarval_long(tv);
1854 /* this is a sign extension */
1855 dbg_info *dbgi = get_irn_dbg_info(node);
1856 ir_node *block = be_transform_node(get_nodes_block(node));
1857 ir_node *new_op = be_transform_node(left);
1859 return create_sex_32_64(dbgi, block, new_op, node);
1863 /* 8 or 16 bit sign extension? */
1864 if (is_Const(right) && is_Shl(left)) {
1865 ir_node *shl_left = get_Shl_left(left);
1866 ir_node *shl_right = get_Shl_right(left);
1867 if (is_Const(shl_right)) {
1868 ir_tarval *tv1 = get_Const_tarval(right);
1869 ir_tarval *tv2 = get_Const_tarval(shl_right);
1870 if (tv1 == tv2 && tarval_is_long(tv1)) {
1871 long val = get_tarval_long(tv1);
1872 if (val == 16 || val == 24) {
1873 dbg_info *dbgi = get_irn_dbg_info(node);
1874 ir_node *block = get_nodes_block(node);
1884 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1893 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
1894 match_immediate | match_upconv);
1900 * Creates an ia32 Rol.
1902 * @param op1 The first operator
1903 * @param op2 The second operator
1904 * @return The created ia32 RotL node
1906 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1908 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1914 * Creates an ia32 Ror.
1915 * NOTE: There is no RotR with immediate because this would always be a RotL
1916 * "imm-mode_size_bits" which can be pre-calculated.
1918 * @param op1 The first operator
1919 * @param op2 The second operator
1920 * @return The created ia32 RotR node
1922 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1924 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1930 * Creates an ia32 RotR or RotL (depending on the found pattern).
1932 * @return The created ia32 RotL or RotR node
1934 static ir_node *gen_Rotl(ir_node *node)
1936 ir_node *op1 = get_Rotl_left(node);
1937 ir_node *op2 = get_Rotl_right(node);
1939 if (is_Minus(op2)) {
1940 return gen_Ror(node, op1, get_Minus_op(op2));
1943 return gen_Rol(node, op1, op2);
1949 * Transforms a Minus node.
1951 * @return The created ia32 Minus node
1953 static ir_node *gen_Minus(ir_node *node)
1955 ir_node *op = get_Minus_op(node);
1956 ir_node *block = be_transform_node(get_nodes_block(node));
1957 dbg_info *dbgi = get_irn_dbg_info(node);
1958 ir_mode *mode = get_irn_mode(node);
1963 if (mode_is_float(mode)) {
1964 ir_node *new_op = be_transform_node(op);
1965 if (ia32_cg_config.use_sse2) {
1966 /* TODO: non-optimal... if we have many xXors, then we should
1967 * rather create a load for the const and use that instead of
1968 * several AM nodes... */
1969 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1971 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1972 noreg_GP, nomem, new_op, noreg_xmm);
1974 size = get_mode_size_bits(mode);
1975 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1977 set_ia32_am_sc(new_node, ent);
1978 set_ia32_op_type(new_node, ia32_AddrModeS);
1979 set_ia32_ls_mode(new_node, mode);
1981 check_x87_floatmode(mode);
1982 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1985 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1988 SET_IA32_ORIG_NODE(new_node, node);
1994 * Transforms a Not node.
1996 * @return The created ia32 Not node
1998 static ir_node *gen_Not(ir_node *node)
2000 ir_node *op = get_Not_op(node);
2002 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
2003 assert (! mode_is_float(get_irn_mode(node)));
2005 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
2008 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
2009 bool negate, ir_node *node)
2011 ir_node *new_block = be_transform_node(block);
2012 ir_mode *mode = get_irn_mode(op);
2013 ir_node *new_op = be_transform_node(op);
2018 assert(mode_is_float(mode));
2020 if (ia32_cg_config.use_sse2) {
2021 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
2022 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
2023 noreg_GP, nomem, new_op, noreg_fp);
2025 size = get_mode_size_bits(mode);
2026 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
2028 set_ia32_am_sc(new_node, ent);
2030 SET_IA32_ORIG_NODE(new_node, node);
2032 set_ia32_op_type(new_node, ia32_AddrModeS);
2033 set_ia32_ls_mode(new_node, mode);
2035 /* TODO, implement -Abs case */
2038 check_x87_floatmode(mode);
2039 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
2040 SET_IA32_ORIG_NODE(new_node, node);
2042 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
2043 SET_IA32_ORIG_NODE(new_node, node);
2051 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2053 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2055 dbg_info *dbgi = get_irn_dbg_info(cmp);
2056 ir_node *block = get_nodes_block(cmp);
2057 ir_node *new_block = be_transform_node(block);
2058 ir_node *op1 = be_transform_node(x);
2059 ir_node *op2 = be_transform_node(n);
2061 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2064 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2066 bool overflow_possible)
2068 if (mode_is_float(mode)) {
2070 case ir_relation_equal: return ia32_cc_float_equal;
2071 case ir_relation_less: return ia32_cc_float_below;
2072 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2073 case ir_relation_greater: return ia32_cc_float_above;
2074 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2075 case ir_relation_less_greater: return ia32_cc_not_equal;
2076 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2077 case ir_relation_unordered: return ia32_cc_parity;
2078 case ir_relation_unordered_equal: return ia32_cc_equal;
2079 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2080 case ir_relation_unordered_less_equal:
2081 return ia32_cc_float_unordered_below_equal;
2082 case ir_relation_unordered_greater:
2083 return ia32_cc_float_unordered_above;
2084 case ir_relation_unordered_greater_equal:
2085 return ia32_cc_float_unordered_above_equal;
2086 case ir_relation_unordered_less_greater:
2087 return ia32_cc_float_not_equal;
2088 case ir_relation_false:
2089 case ir_relation_true:
2090 /* should we introduce a jump always/jump never? */
2093 panic("Unexpected float pnc");
2094 } else if (mode_is_signed(mode)) {
2096 case ir_relation_unordered_equal:
2097 case ir_relation_equal: return ia32_cc_equal;
2098 case ir_relation_unordered_less:
2099 case ir_relation_less:
2100 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2101 case ir_relation_unordered_less_equal:
2102 case ir_relation_less_equal: return ia32_cc_less_equal;
2103 case ir_relation_unordered_greater:
2104 case ir_relation_greater: return ia32_cc_greater;
2105 case ir_relation_unordered_greater_equal:
2106 case ir_relation_greater_equal:
2107 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2108 case ir_relation_unordered_less_greater:
2109 case ir_relation_less_greater: return ia32_cc_not_equal;
2110 case ir_relation_less_equal_greater:
2111 case ir_relation_unordered:
2112 case ir_relation_false:
2113 case ir_relation_true:
2114 /* introduce jump always/jump never? */
2117 panic("Unexpected pnc");
2120 case ir_relation_unordered_equal:
2121 case ir_relation_equal: return ia32_cc_equal;
2122 case ir_relation_unordered_less:
2123 case ir_relation_less: return ia32_cc_below;
2124 case ir_relation_unordered_less_equal:
2125 case ir_relation_less_equal: return ia32_cc_below_equal;
2126 case ir_relation_unordered_greater:
2127 case ir_relation_greater: return ia32_cc_above;
2128 case ir_relation_unordered_greater_equal:
2129 case ir_relation_greater_equal: return ia32_cc_above_equal;
2130 case ir_relation_unordered_less_greater:
2131 case ir_relation_less_greater: return ia32_cc_not_equal;
2132 case ir_relation_less_equal_greater:
2133 case ir_relation_unordered:
2134 case ir_relation_false:
2135 case ir_relation_true:
2136 /* introduce jump always/jump never? */
2139 panic("Unexpected pnc");
2143 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2145 /* must have a Cmp as input */
2146 ir_relation relation = get_Cmp_relation(cmp);
2147 ir_node *l = get_Cmp_left(cmp);
2148 ir_node *r = get_Cmp_right(cmp);
2149 ir_mode *mode = get_irn_mode(l);
2150 bool overflow_possible;
2153 /* check for bit-test */
2154 if (ia32_cg_config.use_bt
2155 && (relation == ir_relation_equal
2156 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2157 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2159 ir_node *la = get_And_left(l);
2160 ir_node *ra = get_And_right(l);
2167 ir_node *c = get_Shl_left(la);
2168 if (is_Const_1(c) && is_Const_0(r)) {
2169 /* (1 << n) & ra) */
2170 ir_node *n = get_Shl_right(la);
2171 flags = gen_bt(cmp, ra, n);
2172 /* the bit is copied into the CF flag */
2173 if (relation & ir_relation_equal)
2174 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2176 *cc_out = ia32_cc_below; /* test for CF=1 */
2182 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2183 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2184 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2185 * a predecessor node). So add the < bit.
2186 * (Note that we do not want to produce <=> (which can happen for
2187 * unoptimized code), because no x86 flag can represent that */
2188 if (!(relation & ir_relation_equal) && relation & ir_relation_less_greater)
2189 relation |= get_negated_relation(ir_get_possible_cmp_relations(l, r)) & ir_relation_less_greater;
2191 overflow_possible = true;
2192 if (is_Const(r) && is_Const_null(r))
2193 overflow_possible = false;
2195 /* just do a normal transformation of the Cmp */
2196 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2197 flags = be_transform_node(cmp);
2202 * Transforms a Load.
2204 * @return the created ia32 Load node
2206 static ir_node *gen_Load(ir_node *node)
2208 ir_node *old_block = get_nodes_block(node);
2209 ir_node *block = be_transform_node(old_block);
2210 ir_node *ptr = get_Load_ptr(node);
2211 ir_node *mem = get_Load_mem(node);
2212 ir_node *new_mem = be_transform_node(mem);
2213 dbg_info *dbgi = get_irn_dbg_info(node);
2214 ir_mode *mode = get_Load_mode(node);
2215 int throws_exception = ir_throws_exception(node);
2219 ia32_address_t addr;
2221 /* construct load address */
2222 memset(&addr, 0, sizeof(addr));
2223 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2230 base = be_transform_node(base);
2236 idx = be_transform_node(idx);
2239 if (mode_is_float(mode)) {
2240 if (ia32_cg_config.use_sse2) {
2241 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2244 new_node = new_bd_ia32_vfld(dbgi, block, base, idx, new_mem,
2248 assert(mode != mode_b);
2250 /* create a conv node with address mode for smaller modes */
2251 if (get_mode_size_bits(mode) < 32) {
2252 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2253 new_mem, noreg_GP, mode);
2255 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2258 ir_set_throws_exception(new_node, throws_exception);
2260 set_irn_pinned(new_node, get_irn_pinned(node));
2261 set_ia32_op_type(new_node, ia32_AddrModeS);
2262 set_ia32_ls_mode(new_node, mode);
2263 set_address(new_node, &addr);
2265 if (get_irn_pinned(node) == op_pin_state_floats) {
2266 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
2267 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
2268 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2269 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2272 SET_IA32_ORIG_NODE(new_node, node);
2277 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2278 ir_node *ptr, ir_node *other)
2285 /* we only use address mode if we're the only user of the load */
2286 if (get_irn_n_edges(node) > 1)
2289 load = get_Proj_pred(node);
2292 if (get_nodes_block(load) != block)
2295 /* store should have the same pointer as the load */
2296 if (get_Load_ptr(load) != ptr)
2299 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2300 if (other != NULL &&
2301 get_nodes_block(other) == block &&
2302 heights_reachable_in_block(ia32_heights, other, load)) {
2306 if (ia32_prevents_AM(block, load, mem))
2308 /* Store should be attached to the load via mem */
2309 assert(heights_reachable_in_block(ia32_heights, mem, load));
2314 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2315 ir_node *mem, ir_node *ptr, ir_mode *mode,
2316 construct_binop_dest_func *func,
2317 construct_binop_dest_func *func8bit,
2318 match_flags_t flags)
2320 ir_node *src_block = get_nodes_block(node);
2328 ia32_address_mode_t am;
2329 ia32_address_t *addr = &am.addr;
2330 memset(&am, 0, sizeof(am));
2332 assert(flags & match_immediate); /* there is no destam node without... */
2333 commutative = (flags & match_commutative) != 0;
2335 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2336 build_address(&am, op1, ia32_create_am_double_use);
2337 new_op = create_immediate_or_transform(op2, 0);
2338 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2339 build_address(&am, op2, ia32_create_am_double_use);
2340 new_op = create_immediate_or_transform(op1, 0);
2345 if (addr->base == NULL)
2346 addr->base = noreg_GP;
2347 if (addr->index == NULL)
2348 addr->index = noreg_GP;
2349 if (addr->mem == NULL)
2352 dbgi = get_irn_dbg_info(node);
2353 block = be_transform_node(src_block);
2354 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2356 if (get_mode_size_bits(mode) == 8) {
2357 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2359 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2361 set_address(new_node, addr);
2362 set_ia32_op_type(new_node, ia32_AddrModeD);
2363 set_ia32_ls_mode(new_node, mode);
2364 SET_IA32_ORIG_NODE(new_node, node);
2366 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2367 mem_proj = be_transform_node(am.mem_proj);
2368 be_set_transformed_node(am.mem_proj, new_node);
2369 be_set_transformed_node(mem_proj, new_node);
2374 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2375 ir_node *ptr, ir_mode *mode,
2376 construct_unop_dest_func *func)
2378 ir_node *src_block = get_nodes_block(node);
2384 ia32_address_mode_t am;
2385 ia32_address_t *addr = &am.addr;
2387 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2390 memset(&am, 0, sizeof(am));
2391 build_address(&am, op, ia32_create_am_double_use);
2393 dbgi = get_irn_dbg_info(node);
2394 block = be_transform_node(src_block);
2395 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2396 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2397 set_address(new_node, addr);
2398 set_ia32_op_type(new_node, ia32_AddrModeD);
2399 set_ia32_ls_mode(new_node, mode);
2400 SET_IA32_ORIG_NODE(new_node, node);
2402 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2403 mem_proj = be_transform_node(am.mem_proj);
2404 be_set_transformed_node(am.mem_proj, new_node);
2405 be_set_transformed_node(mem_proj, new_node);
2410 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2412 ir_mode *mode = get_irn_mode(node);
2413 ir_node *mux_true = get_Mux_true(node);
2414 ir_node *mux_false = get_Mux_false(node);
2422 ia32_condition_code_t cc;
2423 ia32_address_t addr;
2425 if (get_mode_size_bits(mode) != 8)
2428 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2430 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2436 cond = get_Mux_sel(node);
2437 flags = get_flags_node(cond, &cc);
2438 /* we can't handle the float special cases with SetM */
2439 if (cc & ia32_cc_additional_float_cases)
2442 cc = ia32_negate_condition_code(cc);
2444 build_address_ptr(&addr, ptr, mem);
2446 dbgi = get_irn_dbg_info(node);
2447 block = get_nodes_block(node);
2448 new_block = be_transform_node(block);
2449 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2450 addr.index, addr.mem, flags, cc);
2451 set_address(new_node, &addr);
2452 set_ia32_op_type(new_node, ia32_AddrModeD);
2453 set_ia32_ls_mode(new_node, mode);
2454 SET_IA32_ORIG_NODE(new_node, node);
2459 static ir_node *try_create_dest_am(ir_node *node)
2461 ir_node *val = get_Store_value(node);
2462 ir_node *mem = get_Store_mem(node);
2463 ir_node *ptr = get_Store_ptr(node);
2464 ir_mode *mode = get_irn_mode(val);
2465 unsigned bits = get_mode_size_bits(mode);
2470 /* handle only GP modes for now... */
2471 if (!ia32_mode_needs_gp_reg(mode))
2475 /* store must be the only user of the val node */
2476 if (get_irn_n_edges(val) > 1)
2478 /* skip pointless convs */
2480 ir_node *conv_op = get_Conv_op(val);
2481 ir_mode *pred_mode = get_irn_mode(conv_op);
2482 if (!ia32_mode_needs_gp_reg(pred_mode))
2484 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2492 /* value must be in the same block */
2493 if (get_nodes_block(node) != get_nodes_block(val))
2496 switch (get_irn_opcode(val)) {
2498 op1 = get_Add_left(val);
2499 op2 = get_Add_right(val);
2500 if (ia32_cg_config.use_incdec) {
2501 if (is_Const_1(op2)) {
2502 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2504 } else if (is_Const_Minus_1(op2)) {
2505 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2509 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2510 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2511 match_commutative | match_immediate);
2514 op1 = get_Sub_left(val);
2515 op2 = get_Sub_right(val);
2516 if (is_Const(op2)) {
2517 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2519 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2520 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2524 op1 = get_And_left(val);
2525 op2 = get_And_right(val);
2526 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2527 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2528 match_commutative | match_immediate);
2531 op1 = get_Or_left(val);
2532 op2 = get_Or_right(val);
2533 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2534 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2535 match_commutative | match_immediate);
2538 op1 = get_Eor_left(val);
2539 op2 = get_Eor_right(val);
2540 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2541 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2542 match_commutative | match_immediate);
2545 op1 = get_Shl_left(val);
2546 op2 = get_Shl_right(val);
2547 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2548 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2552 op1 = get_Shr_left(val);
2553 op2 = get_Shr_right(val);
2554 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2555 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2559 op1 = get_Shrs_left(val);
2560 op2 = get_Shrs_right(val);
2561 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2562 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2566 op1 = get_Rotl_left(val);
2567 op2 = get_Rotl_right(val);
2568 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2569 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2572 /* TODO: match ROR patterns... */
2574 new_node = try_create_SetMem(val, ptr, mem);
2578 op1 = get_Minus_op(val);
2579 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2582 /* should be lowered already */
2583 assert(mode != mode_b);
2584 op1 = get_Not_op(val);
2585 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2591 if (new_node != NULL) {
2592 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2593 get_irn_pinned(node) == op_pin_state_pinned) {
2594 set_irn_pinned(new_node, op_pin_state_pinned);
2601 static bool possible_int_mode_for_fp(ir_mode *mode)
2605 if (!mode_is_signed(mode))
2607 size = get_mode_size_bits(mode);
2608 if (size != 16 && size != 32)
2613 static int is_float_to_int_conv(const ir_node *node)
2615 ir_mode *mode = get_irn_mode(node);
2619 if (!possible_int_mode_for_fp(mode))
2624 conv_op = get_Conv_op(node);
2625 conv_mode = get_irn_mode(conv_op);
2627 if (!mode_is_float(conv_mode))
2634 * Transform a Store(floatConst) into a sequence of
2637 * @return the created ia32 Store node
2639 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2641 ir_mode *mode = get_irn_mode(cns);
2642 unsigned size = get_mode_size_bytes(mode);
2643 ir_tarval *tv = get_Const_tarval(cns);
2644 ir_node *block = get_nodes_block(node);
2645 ir_node *new_block = be_transform_node(block);
2646 ir_node *ptr = get_Store_ptr(node);
2647 ir_node *mem = get_Store_mem(node);
2648 dbg_info *dbgi = get_irn_dbg_info(node);
2651 int throws_exception = ir_throws_exception(node);
2653 ia32_address_t addr;
2655 build_address_ptr(&addr, ptr, mem);
2662 val= get_tarval_sub_bits(tv, ofs) |
2663 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2664 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2665 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2668 } else if (size >= 2) {
2669 val= get_tarval_sub_bits(tv, ofs) |
2670 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2674 panic("invalid size of Store float to mem (%+F)", node);
2676 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2678 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2679 addr.index, addr.mem, imm);
2680 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2682 ir_set_throws_exception(new_node, throws_exception);
2683 set_irn_pinned(new_node, get_irn_pinned(node));
2684 set_ia32_op_type(new_node, ia32_AddrModeD);
2685 set_ia32_ls_mode(new_node, mode);
2686 set_address(new_node, &addr);
2687 SET_IA32_ORIG_NODE(new_node, node);
2694 addr.offset += delta;
2695 } while (size != 0);
2698 return new_rd_Sync(dbgi, new_block, i, ins);
2700 return get_Proj_pred(ins[0]);
2705 * Generate a vfist or vfisttp instruction.
2707 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2708 ir_node *index, ir_node *mem, ir_node *val)
2710 if (ia32_cg_config.use_fisttp) {
2711 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2712 if other users exists */
2713 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2714 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_vfisttp_res);
2715 be_new_Keep(block, 1, &value);
2719 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2722 ir_node *vfist = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2728 * Transforms a general (no special case) Store.
2730 * @return the created ia32 Store node
2732 static ir_node *gen_general_Store(ir_node *node)
2734 ir_node *val = get_Store_value(node);
2735 ir_mode *mode = get_irn_mode(val);
2736 ir_node *block = get_nodes_block(node);
2737 ir_node *new_block = be_transform_node(block);
2738 ir_node *ptr = get_Store_ptr(node);
2739 ir_node *mem = get_Store_mem(node);
2740 dbg_info *dbgi = get_irn_dbg_info(node);
2741 int throws_exception = ir_throws_exception(node);
2744 ia32_address_t addr;
2746 /* check for destination address mode */
2747 new_node = try_create_dest_am(node);
2748 if (new_node != NULL)
2751 /* construct store address */
2752 memset(&addr, 0, sizeof(addr));
2753 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2755 if (addr.base == NULL) {
2756 addr.base = noreg_GP;
2758 addr.base = be_transform_node(addr.base);
2761 if (addr.index == NULL) {
2762 addr.index = noreg_GP;
2764 addr.index = be_transform_node(addr.index);
2766 addr.mem = be_transform_node(mem);
2768 if (mode_is_float(mode)) {
2769 if (ia32_cg_config.use_sse2) {
2770 new_val = be_transform_node(val);
2771 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2772 addr.index, addr.mem, new_val);
2774 val = ia32_skip_float_downconv(val);
2775 new_val = be_transform_node(val);
2776 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2777 addr.index, addr.mem, new_val, mode);
2779 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2780 val = get_Conv_op(val);
2781 new_val = be_transform_node(val);
2782 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2784 unsigned dest_bits = get_mode_size_bits(mode);
2785 while (is_downconv(val)
2786 && get_mode_size_bits(get_irn_mode(val)) >= dest_bits) {
2787 val = get_Conv_op(val);
2789 new_val = create_immediate_or_transform(val, 0);
2790 assert(mode != mode_b);
2792 if (dest_bits == 8) {
2793 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2794 addr.index, addr.mem, new_val);
2796 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2797 addr.index, addr.mem, new_val);
2800 ir_set_throws_exception(new_node, throws_exception);
2802 set_irn_pinned(new_node, get_irn_pinned(node));
2803 set_ia32_op_type(new_node, ia32_AddrModeD);
2804 set_ia32_ls_mode(new_node, mode);
2806 set_address(new_node, &addr);
2807 SET_IA32_ORIG_NODE(new_node, node);
2813 * Transforms a Store.
2815 * @return the created ia32 Store node
2817 static ir_node *gen_Store(ir_node *node)
2819 ir_node *val = get_Store_value(node);
2820 ir_mode *mode = get_irn_mode(val);
2822 if (mode_is_float(mode) && is_Const(val)) {
2823 /* We can transform every floating const store
2824 into a sequence of integer stores.
2825 If the constant is already in a register,
2826 it would be better to use it, but we don't
2827 have this information here. */
2828 return gen_float_const_Store(node, val);
2830 return gen_general_Store(node);
2834 * Transforms a Switch.
2836 * @return the created ia32 SwitchJmp node
2838 static ir_node *gen_Switch(ir_node *node)
2840 dbg_info *dbgi = get_irn_dbg_info(node);
2841 ir_graph *irg = get_irn_irg(node);
2842 ir_node *block = be_transform_node(get_nodes_block(node));
2843 ir_node *sel = get_Switch_selector(node);
2844 ir_node *new_sel = be_transform_node(sel);
2845 ir_mode *sel_mode = get_irn_mode(sel);
2846 const ir_switch_table *table = get_Switch_table(node);
2847 unsigned n_outs = get_Switch_n_outs(node);
2851 assert(get_mode_size_bits(sel_mode) <= 32);
2852 assert(!mode_is_float(sel_mode));
2853 sel = ia32_skip_sameconv(sel);
2854 if (get_mode_size_bits(sel_mode) < 32)
2855 new_sel = transform_upconv(sel, node);
2857 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2858 set_entity_visibility(entity, ir_visibility_private);
2859 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2861 table = ir_switch_table_duplicate(irg, table);
2863 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2864 set_ia32_am_scale(new_node, 2);
2865 set_ia32_am_sc(new_node, entity);
2866 set_ia32_op_type(new_node, ia32_AddrModeS);
2867 set_ia32_ls_mode(new_node, mode_Iu);
2868 SET_IA32_ORIG_NODE(new_node, node);
2869 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2870 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2876 * Transform a Cond node.
2878 static ir_node *gen_Cond(ir_node *node)
2880 ir_node *block = get_nodes_block(node);
2881 ir_node *new_block = be_transform_node(block);
2882 dbg_info *dbgi = get_irn_dbg_info(node);
2883 ir_node *sel = get_Cond_selector(node);
2884 ir_node *flags = NULL;
2886 ia32_condition_code_t cc;
2888 /* we get flags from a Cmp */
2889 flags = get_flags_node(sel, &cc);
2891 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2892 SET_IA32_ORIG_NODE(new_node, node);
2898 * Transform a be_Copy.
2900 static ir_node *gen_be_Copy(ir_node *node)
2902 ir_node *new_node = be_duplicate_node(node);
2903 ir_mode *mode = get_irn_mode(new_node);
2905 if (ia32_mode_needs_gp_reg(mode)) {
2906 set_irn_mode(new_node, mode_Iu);
2912 static ir_node *create_Fucom(ir_node *node)
2914 dbg_info *dbgi = get_irn_dbg_info(node);
2915 ir_node *block = get_nodes_block(node);
2916 ir_node *new_block = be_transform_node(block);
2917 ir_node *left = get_Cmp_left(node);
2918 ir_node *new_left = be_transform_node(left);
2919 ir_node *right = get_Cmp_right(node);
2920 ir_mode *cmp_mode = get_irn_mode(left);
2923 check_x87_floatmode(cmp_mode);
2925 if (ia32_cg_config.use_fucomi) {
2926 new_right = be_transform_node(right);
2927 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2929 set_ia32_commutative(new_node);
2930 SET_IA32_ORIG_NODE(new_node, node);
2932 if (is_Const_0(right)) {
2933 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2935 new_right = be_transform_node(right);
2936 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2937 set_ia32_commutative(new_node);
2940 SET_IA32_ORIG_NODE(new_node, node);
2942 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2943 SET_IA32_ORIG_NODE(new_node, node);
2949 static ir_node *create_Ucomi(ir_node *node)
2951 dbg_info *dbgi = get_irn_dbg_info(node);
2952 ir_node *src_block = get_nodes_block(node);
2953 ir_node *new_block = be_transform_node(src_block);
2954 ir_node *left = get_Cmp_left(node);
2955 ir_node *right = get_Cmp_right(node);
2957 ia32_address_mode_t am;
2958 ia32_address_t *addr = &am.addr;
2960 match_arguments(&am, src_block, left, right, NULL,
2961 match_commutative | match_am);
2963 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2964 addr->mem, am.new_op1, am.new_op2,
2966 set_am_attributes(new_node, &am);
2968 SET_IA32_ORIG_NODE(new_node, node);
2970 new_node = fix_mem_proj(new_node, &am);
2975 static bool ia32_mux_upper_bits_clean(const ir_node *node, ir_mode *mode)
2977 ir_node *mux_true = get_Mux_true(node);
2978 ir_node *mux_false = get_Mux_false(node);
2979 ir_mode *mux_mode = get_irn_mode(node);
2980 /* mux nodes which get transformed to the set instruction are not clean */
2981 if (is_Const(mux_true) && is_Const(mux_false)
2982 && get_mode_size_bits(mux_mode) == 8) {
2985 return be_upper_bits_clean(mux_true, mode)
2986 && be_upper_bits_clean(mux_false, mode);
2990 * Generate code for a Cmp.
2992 static ir_node *gen_Cmp(ir_node *node)
2994 dbg_info *dbgi = get_irn_dbg_info(node);
2995 ir_node *block = get_nodes_block(node);
2996 ir_node *new_block = be_transform_node(block);
2997 ir_node *left = get_Cmp_left(node);
2998 ir_node *right = get_Cmp_right(node);
2999 ir_mode *cmp_mode = get_irn_mode(left);
3001 ia32_address_mode_t am;
3002 ia32_address_t *addr = &am.addr;
3004 if (mode_is_float(cmp_mode)) {
3005 if (ia32_cg_config.use_sse2) {
3006 return create_Ucomi(node);
3008 return create_Fucom(node);
3012 assert(ia32_mode_needs_gp_reg(cmp_mode));
3014 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3015 if (is_Const_0(right) &&
3017 get_irn_n_edges(left) == 1) {
3018 /* Test(and_left, and_right) */
3019 ir_node *and_left = get_And_left(left);
3020 ir_node *and_right = get_And_right(left);
3022 /* matze: code here used mode instead of cmd_mode, I think it is always
3023 * the same as cmp_mode, but I leave this here to see if this is really
3026 assert(get_irn_mode(and_left) == cmp_mode);
3028 match_arguments(&am, block, and_left, and_right, NULL,
3030 match_am | match_8bit_am | match_16bit_am |
3031 match_am_and_immediates | match_immediate);
3033 /* use 32bit compare mode if possible since the opcode is smaller */
3034 if (am.op_type == ia32_Normal &&
3035 be_upper_bits_clean(and_left, cmp_mode) &&
3036 be_upper_bits_clean(and_right, cmp_mode)) {
3037 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3040 if (get_mode_size_bits(cmp_mode) == 8) {
3041 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
3042 addr->index, addr->mem,
3043 am.new_op1, am.new_op2,
3046 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base,
3047 addr->index, addr->mem, am.new_op1,
3048 am.new_op2, am.ins_permuted);
3051 /* Cmp(left, right) */
3052 match_arguments(&am, block, left, right, NULL,
3054 match_am | match_8bit_am | match_16bit_am |
3055 match_am_and_immediates | match_immediate);
3056 /* use 32bit compare mode if possible since the opcode is smaller */
3057 if (am.op_type == ia32_Normal &&
3058 be_upper_bits_clean(left, cmp_mode) &&
3059 be_upper_bits_clean(right, cmp_mode)) {
3060 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3063 if (get_mode_size_bits(cmp_mode) == 8) {
3064 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
3065 addr->index, addr->mem, am.new_op1,
3066 am.new_op2, am.ins_permuted);
3068 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
3069 addr->mem, am.new_op1, am.new_op2,
3073 set_am_attributes(new_node, &am);
3074 set_ia32_ls_mode(new_node, cmp_mode);
3076 SET_IA32_ORIG_NODE(new_node, node);
3078 new_node = fix_mem_proj(new_node, &am);
3083 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3084 ia32_condition_code_t cc)
3086 dbg_info *dbgi = get_irn_dbg_info(node);
3087 ir_node *block = get_nodes_block(node);
3088 ir_node *new_block = be_transform_node(block);
3089 ir_node *val_true = get_Mux_true(node);
3090 ir_node *val_false = get_Mux_false(node);
3092 ia32_address_mode_t am;
3093 ia32_address_t *addr;
3095 assert(ia32_cg_config.use_cmov);
3096 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3100 match_arguments(&am, block, val_false, val_true, flags,
3101 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3103 if (am.ins_permuted)
3104 cc = ia32_negate_condition_code(cc);
3106 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3107 addr->mem, am.new_op1, am.new_op2, new_flags,
3109 set_am_attributes(new_node, &am);
3111 SET_IA32_ORIG_NODE(new_node, node);
3113 new_node = fix_mem_proj(new_node, &am);
3119 * Creates a ia32 Setcc instruction.
3121 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3122 ir_node *flags, ia32_condition_code_t cc,
3125 ir_mode *mode = get_irn_mode(orig_node);
3128 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3129 SET_IA32_ORIG_NODE(new_node, orig_node);
3131 /* we might need to conv the result up */
3132 if (get_mode_size_bits(mode) > 8) {
3133 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3134 nomem, new_node, mode_Bu);
3135 SET_IA32_ORIG_NODE(new_node, orig_node);
3142 * Create instruction for an unsigned Difference or Zero.
3144 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3146 ir_mode *mode = get_irn_mode(psi);
3156 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3157 match_mode_neutral | match_am | match_immediate | match_two_users);
3159 block = get_nodes_block(new_node);
3161 if (is_Proj(new_node)) {
3162 sub = get_Proj_pred(new_node);
3165 set_irn_mode(sub, mode_T);
3166 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3168 assert(is_ia32_Sub(sub));
3169 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3171 dbgi = get_irn_dbg_info(psi);
3172 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3173 set_ia32_ls_mode(sbb, mode_Iu);
3174 notn = new_bd_ia32_Not(dbgi, block, sbb);
3176 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3177 set_ia32_ls_mode(new_node, mode_Iu);
3178 set_ia32_commutative(new_node);
3183 * Create an const array of two float consts.
3185 * @param c0 the first constant
3186 * @param c1 the second constant
3187 * @param new_mode IN/OUT for the mode of the constants, if NULL
3188 * smallest possible mode will be used
3190 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3193 ir_mode *mode = *new_mode;
3195 ir_initializer_t *initializer;
3196 ir_tarval *tv0 = get_Const_tarval(c0);
3197 ir_tarval *tv1 = get_Const_tarval(c1);
3200 /* detect the best mode for the constants */
3201 mode = get_tarval_mode(tv0);
3203 if (mode != mode_F) {
3204 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3205 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3207 tv0 = tarval_convert_to(tv0, mode);
3208 tv1 = tarval_convert_to(tv1, mode);
3209 } else if (mode != mode_D) {
3210 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3211 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3213 tv0 = tarval_convert_to(tv0, mode);
3214 tv1 = tarval_convert_to(tv1, mode);
3221 tp = ia32_get_prim_type(mode);
3222 tp = ia32_create_float_array(tp);
3224 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3226 set_entity_ld_ident(ent, get_entity_ident(ent));
3227 set_entity_visibility(ent, ir_visibility_private);
3228 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3230 initializer = create_initializer_compound(2);
3232 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3233 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3235 set_entity_initializer(ent, initializer);
3242 * Possible transformations for creating a Setcc.
3244 enum setcc_transform_insn {
3256 typedef struct setcc_transform {
3258 ia32_condition_code_t cc;
3260 enum setcc_transform_insn transform;
3264 } setcc_transform_t;
3267 * Setcc can only handle 0 and 1 result.
3268 * Find a transformation that creates 0 and 1 from
3271 static void find_const_transform(ia32_condition_code_t cc,
3272 ir_tarval *t, ir_tarval *f,
3273 setcc_transform_t *res)
3279 if (tarval_is_null(t)) {
3283 cc = ia32_negate_condition_code(cc);
3284 } else if (tarval_cmp(t, f) == ir_relation_less) {
3285 // now, t is the bigger one
3289 cc = ia32_negate_condition_code(cc);
3293 if (! tarval_is_null(f)) {
3294 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3297 res->steps[step].transform = SETCC_TR_ADD;
3299 if (t == tarval_bad)
3300 panic("constant subtract failed");
3301 if (! tarval_is_long(f))
3302 panic("tarval is not long");
3304 res->steps[step].val = get_tarval_long(f);
3306 f = tarval_sub(f, f, NULL);
3307 assert(tarval_is_null(f));
3310 if (tarval_is_one(t)) {
3311 res->steps[step].transform = SETCC_TR_SET;
3312 res->num_steps = ++step;
3316 if (tarval_is_minus_one(t)) {
3317 res->steps[step].transform = SETCC_TR_NEG;
3319 res->steps[step].transform = SETCC_TR_SET;
3320 res->num_steps = ++step;
3323 if (tarval_is_long(t)) {
3324 long v = get_tarval_long(t);
3326 res->steps[step].val = 0;
3329 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3331 res->steps[step].transform = SETCC_TR_LEAxx;
3332 res->steps[step].scale = 3; /* (a << 3) + a */
3335 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3337 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3338 res->steps[step].scale = 3; /* (a << 3) */
3341 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3343 res->steps[step].transform = SETCC_TR_LEAxx;
3344 res->steps[step].scale = 2; /* (a << 2) + a */
3347 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3349 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3350 res->steps[step].scale = 2; /* (a << 2) */
3353 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3355 res->steps[step].transform = SETCC_TR_LEAxx;
3356 res->steps[step].scale = 1; /* (a << 1) + a */
3359 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3361 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3362 res->steps[step].scale = 1; /* (a << 1) */
3365 res->num_steps = step;
3368 if (! tarval_is_single_bit(t)) {
3369 res->steps[step].transform = SETCC_TR_AND;
3370 res->steps[step].val = v;
3372 res->steps[step].transform = SETCC_TR_NEG;
3374 int val = get_tarval_lowest_bit(t);
3377 res->steps[step].transform = SETCC_TR_SHL;
3378 res->steps[step].scale = val;
3382 res->steps[step].transform = SETCC_TR_SET;
3383 res->num_steps = ++step;
3386 panic("tarval is not long");
3390 * Transforms a Mux node into some code sequence.
3392 * @return The transformed node.
3394 static ir_node *gen_Mux(ir_node *node)
3396 dbg_info *dbgi = get_irn_dbg_info(node);
3397 ir_node *block = get_nodes_block(node);
3398 ir_node *new_block = be_transform_node(block);
3399 ir_node *mux_true = get_Mux_true(node);
3400 ir_node *mux_false = get_Mux_false(node);
3401 ir_node *sel = get_Mux_sel(node);
3402 ir_mode *mode = get_irn_mode(node);
3406 ia32_condition_code_t cc;
3408 assert(get_irn_mode(sel) == mode_b);
3410 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3412 if (ia32_mode_needs_gp_reg(mode)) {
3413 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3416 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3417 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3421 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3422 if (mode_is_float(mode)) {
3423 ir_node *cmp_left = get_Cmp_left(sel);
3424 ir_node *cmp_right = get_Cmp_right(sel);
3425 ir_relation relation = get_Cmp_relation(sel);
3427 if (ia32_cg_config.use_sse2) {
3428 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3429 if (cmp_left == mux_true && cmp_right == mux_false) {
3430 /* Mux(a <= b, a, b) => MIN */
3431 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3432 match_commutative | match_am | match_two_users);
3433 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3434 /* Mux(a <= b, b, a) => MAX */
3435 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3436 match_commutative | match_am | match_two_users);
3438 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3439 if (cmp_left == mux_true && cmp_right == mux_false) {
3440 /* Mux(a >= b, a, b) => MAX */
3441 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3442 match_commutative | match_am | match_two_users);
3443 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3444 /* Mux(a >= b, b, a) => MIN */
3445 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3446 match_commutative | match_am | match_two_users);
3451 if (is_Const(mux_true) && is_Const(mux_false)) {
3452 ia32_address_mode_t am;
3457 flags = get_flags_node(sel, &cc);
3458 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3460 if (ia32_cg_config.use_sse2) {
3461 /* cannot load from different mode on SSE */
3464 /* x87 can load any mode */
3468 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3470 if (new_mode == mode_F) {
3472 } else if (new_mode == mode_D) {
3474 } else if (new_mode == ia32_mode_E) {
3475 /* arg, shift 16 NOT supported */
3477 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3479 panic("Unsupported constant size");
3482 am.ls_mode = new_mode;
3483 am.addr.base = get_symconst_base();
3484 am.addr.index = new_node;
3485 am.addr.mem = nomem;
3487 am.addr.scale = scale;
3488 am.addr.use_frame = 0;
3489 am.addr.tls_segment = false;
3490 am.addr.frame_entity = NULL;
3491 am.addr.symconst_sign = 0;
3492 am.mem_proj = am.addr.mem;
3493 am.op_type = ia32_AddrModeS;
3496 am.pinned = op_pin_state_floats;
3498 am.ins_permuted = false;
3500 if (ia32_cg_config.use_sse2)
3501 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3503 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3504 set_am_attributes(load, &am);
3506 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3508 panic("cannot transform floating point Mux");
3511 assert(ia32_mode_needs_gp_reg(mode));
3514 ir_node *cmp_left = get_Cmp_left(sel);
3515 ir_node *cmp_right = get_Cmp_right(sel);
3516 ir_relation relation = get_Cmp_relation(sel);
3517 ir_node *val_true = mux_true;
3518 ir_node *val_false = mux_false;
3520 if (is_Const(val_true) && is_Const_null(val_true)) {
3521 ir_node *tmp = val_false;
3522 val_false = val_true;
3524 relation = get_negated_relation(relation);
3526 if (is_Const_0(val_false) && is_Sub(val_true)) {
3527 if ((relation & ir_relation_greater)
3528 && get_Sub_left(val_true) == cmp_left
3529 && get_Sub_right(val_true) == cmp_right) {
3530 return create_doz(node, cmp_left, cmp_right);
3532 if ((relation & ir_relation_less)
3533 && get_Sub_left(val_true) == cmp_right
3534 && get_Sub_right(val_true) == cmp_left) {
3535 return create_doz(node, cmp_right, cmp_left);
3540 flags = get_flags_node(sel, &cc);
3542 if (is_Const(mux_true) && is_Const(mux_false)) {
3543 /* both are const, good */
3544 ir_tarval *tv_true = get_Const_tarval(mux_true);
3545 ir_tarval *tv_false = get_Const_tarval(mux_false);
3546 setcc_transform_t res;
3549 find_const_transform(cc, tv_true, tv_false, &res);
3551 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3554 switch (res.steps[step].transform) {
3556 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3557 add_ia32_am_offs_int(new_node, res.steps[step].val);
3559 case SETCC_TR_ADDxx:
3560 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3563 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3564 set_ia32_am_scale(new_node, res.steps[step].scale);
3565 set_ia32_am_offs_int(new_node, res.steps[step].val);
3567 case SETCC_TR_LEAxx:
3568 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3569 set_ia32_am_scale(new_node, res.steps[step].scale);
3570 set_ia32_am_offs_int(new_node, res.steps[step].val);
3573 imm = ia32_immediate_from_long(res.steps[step].scale);
3574 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3577 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3580 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3583 imm = ia32_immediate_from_long(res.steps[step].val);
3584 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3587 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3590 panic("unknown setcc transform");
3594 new_node = create_CMov(node, sel, flags, cc);
3601 * Create a conversion from x87 state register to general purpose.
3603 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3605 ir_node *block = be_transform_node(get_nodes_block(node));
3606 ir_node *op = get_Conv_op(node);
3607 ir_node *new_op = be_transform_node(op);
3608 ir_graph *irg = current_ir_graph;
3609 dbg_info *dbgi = get_irn_dbg_info(node);
3610 ir_mode *mode = get_irn_mode(node);
3611 ir_node *frame = get_irg_frame(irg);
3612 ir_node *fist, *load, *mem;
3614 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3615 set_irn_pinned(fist, op_pin_state_floats);
3616 set_ia32_use_frame(fist);
3617 set_ia32_op_type(fist, ia32_AddrModeD);
3619 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
3620 mem = new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
3622 assert(get_mode_size_bits(mode) <= 32);
3623 /* exception we can only store signed 32 bit integers, so for unsigned
3624 we store a 64bit (signed) integer and load the lower bits */
3625 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3626 set_ia32_ls_mode(fist, mode_Ls);
3628 set_ia32_ls_mode(fist, mode_Is);
3630 SET_IA32_ORIG_NODE(fist, node);
3633 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3635 set_irn_pinned(load, op_pin_state_floats);
3636 set_ia32_use_frame(load);
3637 set_ia32_op_type(load, ia32_AddrModeS);
3638 set_ia32_ls_mode(load, mode_Is);
3639 if (get_ia32_ls_mode(fist) == mode_Ls) {
3640 ia32_attr_t *attr = get_ia32_attr(load);
3641 attr->data.need_64bit_stackent = 1;
3643 ia32_attr_t *attr = get_ia32_attr(load);
3644 attr->data.need_32bit_stackent = 1;
3646 SET_IA32_ORIG_NODE(load, node);
3648 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3652 * Creates a x87 Conv by placing a Store and a Load
3654 static ir_node *gen_x87_conv(ir_mode *tgt_mode, ir_node *node)
3656 ir_node *block = get_nodes_block(node);
3657 ir_graph *irg = get_Block_irg(block);
3658 dbg_info *dbgi = get_irn_dbg_info(node);
3659 ir_node *frame = get_irg_frame(irg);
3661 ir_node *store, *load;
3664 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3665 set_ia32_use_frame(store);
3666 set_ia32_op_type(store, ia32_AddrModeD);
3667 SET_IA32_ORIG_NODE(store, node);
3669 store_mem = new_r_Proj(store, mode_M, pn_ia32_vfst_M);
3671 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3672 set_ia32_use_frame(load);
3673 set_ia32_op_type(load, ia32_AddrModeS);
3674 SET_IA32_ORIG_NODE(load, node);
3676 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_vfld_res);
3680 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3681 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3683 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3685 func = get_mode_size_bits(mode) == 8 ?
3686 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3687 return func(dbgi, block, base, index, mem, val, mode);
3691 * Create a conversion from general purpose to x87 register
3693 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3695 ir_node *src_block = get_nodes_block(node);
3696 ir_node *block = be_transform_node(src_block);
3697 ir_graph *irg = get_Block_irg(block);
3698 dbg_info *dbgi = get_irn_dbg_info(node);
3699 ir_node *op = get_Conv_op(node);
3700 ir_node *new_op = NULL;
3702 ir_mode *store_mode;
3708 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3709 if (possible_int_mode_for_fp(src_mode)) {
3710 ia32_address_mode_t am;
3712 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am | match_upconv);
3713 if (am.op_type == ia32_AddrModeS) {
3714 ia32_address_t *addr = &am.addr;
3716 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3717 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3719 set_am_attributes(fild, &am);
3720 SET_IA32_ORIG_NODE(fild, node);
3722 fix_mem_proj(fild, &am);
3727 if (new_op == NULL) {
3728 new_op = be_transform_node(op);
3731 mode = get_irn_mode(op);
3733 /* first convert to 32 bit signed if necessary */
3734 if (get_mode_size_bits(src_mode) < 32) {
3735 if (!be_upper_bits_clean(op, src_mode)) {
3736 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3737 SET_IA32_ORIG_NODE(new_op, node);
3742 assert(get_mode_size_bits(mode) == 32);
3745 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3747 set_ia32_use_frame(store);
3748 set_ia32_op_type(store, ia32_AddrModeD);
3749 set_ia32_ls_mode(store, mode_Iu);
3751 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3753 /* exception for 32bit unsigned, do a 64bit spill+load */
3754 if (!mode_is_signed(mode)) {
3757 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3759 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3760 noreg_GP, nomem, zero_const);
3761 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3763 set_ia32_use_frame(zero_store);
3764 set_ia32_op_type(zero_store, ia32_AddrModeD);
3765 add_ia32_am_offs_int(zero_store, 4);
3766 set_ia32_ls_mode(zero_store, mode_Iu);
3768 in[0] = zero_store_mem;
3771 store_mem = new_rd_Sync(dbgi, block, 2, in);
3772 store_mode = mode_Ls;
3774 store_mode = mode_Is;
3778 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3780 set_ia32_use_frame(fild);
3781 set_ia32_op_type(fild, ia32_AddrModeS);
3782 set_ia32_ls_mode(fild, store_mode);
3784 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3790 * Create a conversion from one integer mode into another one
3792 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3793 dbg_info *dbgi, ir_node *block, ir_node *op,
3796 ir_node *new_block = be_transform_node(block);
3798 ia32_address_mode_t am;
3799 ia32_address_t *addr = &am.addr;
3802 assert(get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode));
3804 #ifdef DEBUG_libfirm
3806 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3811 if (be_upper_bits_clean(op, src_mode)) {
3812 return be_transform_node(op);
3815 match_arguments(&am, block, NULL, op, NULL,
3816 match_am | match_8bit_am | match_16bit_am);
3818 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3819 addr->mem, am.new_op2, src_mode);
3820 set_am_attributes(new_node, &am);
3821 /* match_arguments assume that out-mode = in-mode, this isn't true here
3823 set_ia32_ls_mode(new_node, src_mode);
3824 SET_IA32_ORIG_NODE(new_node, node);
3825 new_node = fix_mem_proj(new_node, &am);
3830 * Transforms a Conv node.
3832 * @return The created ia32 Conv node
3834 static ir_node *gen_Conv(ir_node *node)
3836 ir_node *block = get_nodes_block(node);
3837 ir_node *new_block = be_transform_node(block);
3838 ir_node *op = get_Conv_op(node);
3839 ir_node *new_op = NULL;
3840 dbg_info *dbgi = get_irn_dbg_info(node);
3841 ir_mode *src_mode = get_irn_mode(op);
3842 ir_mode *tgt_mode = get_irn_mode(node);
3843 int src_bits = get_mode_size_bits(src_mode);
3844 int tgt_bits = get_mode_size_bits(tgt_mode);
3845 ir_node *res = NULL;
3847 assert(!mode_is_int(src_mode) || src_bits <= 32);
3848 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3850 /* modeB -> X should already be lowered by the lower_mode_b pass */
3851 if (src_mode == mode_b) {
3852 panic("ConvB not lowered %+F", node);
3855 if (src_mode == tgt_mode) {
3856 /* this should be optimized already, but who knows... */
3857 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3858 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3859 return be_transform_node(op);
3862 if (mode_is_float(src_mode)) {
3863 new_op = be_transform_node(op);
3864 /* we convert from float ... */
3865 if (mode_is_float(tgt_mode)) {
3867 if (ia32_cg_config.use_sse2) {
3868 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3869 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3871 set_ia32_ls_mode(res, tgt_mode);
3873 if (src_bits < tgt_bits) {
3874 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3877 res = gen_x87_conv(tgt_mode, new_op);
3878 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3884 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3885 if (ia32_cg_config.use_sse2) {
3886 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3888 set_ia32_ls_mode(res, src_mode);
3890 return gen_x87_fp_to_gp(node);
3894 /* we convert from int ... */
3895 if (mode_is_float(tgt_mode)) {
3897 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3898 if (ia32_cg_config.use_sse2) {
3899 new_op = be_transform_node(op);
3900 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3902 set_ia32_ls_mode(res, tgt_mode);
3904 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3905 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3906 res = gen_x87_gp_to_fp(node, src_mode);
3908 /* we need a float-conv, if the int mode has more bits than the
3910 if (float_mantissa < int_mantissa) {
3911 res = gen_x87_conv(tgt_mode, res);
3912 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3916 } else if (tgt_mode == mode_b) {
3917 /* mode_b lowering already took care that we only have 0/1 values */
3918 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3919 src_mode, tgt_mode));
3920 return be_transform_node(op);
3923 if (src_bits >= tgt_bits) {
3924 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3925 src_mode, tgt_mode));
3926 return be_transform_node(op);
3929 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3937 static ir_node *create_immediate_or_transform(ir_node *node,
3938 char immediate_constraint_type)
3940 ir_node *new_node = ia32_try_create_Immediate(node, immediate_constraint_type);
3941 if (new_node == NULL) {
3942 new_node = be_transform_node(node);
3948 * Transforms a FrameAddr into an ia32 Add.
3950 static ir_node *gen_be_FrameAddr(ir_node *node)
3952 ir_node *block = be_transform_node(get_nodes_block(node));
3953 ir_node *op = be_get_FrameAddr_frame(node);
3954 ir_node *new_op = be_transform_node(op);
3955 dbg_info *dbgi = get_irn_dbg_info(node);
3958 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3959 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3960 set_ia32_use_frame(new_node);
3962 SET_IA32_ORIG_NODE(new_node, node);
3968 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3970 static ir_node *gen_be_Return(ir_node *node)
3972 ir_graph *irg = current_ir_graph;
3973 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
3974 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
3975 ir_node *new_ret_val = be_transform_node(ret_val);
3976 ir_node *new_ret_mem = be_transform_node(ret_mem);
3977 ir_entity *ent = get_irg_entity(irg);
3978 ir_type *tp = get_entity_type(ent);
3979 dbg_info *dbgi = get_irn_dbg_info(node);
3980 ir_node *block = be_transform_node(get_nodes_block(node));
3994 assert(ret_val != NULL);
3995 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3996 return be_duplicate_node(node);
3999 res_type = get_method_res_type(tp, 0);
4001 if (! is_Primitive_type(res_type)) {
4002 return be_duplicate_node(node);
4005 mode = get_type_mode(res_type);
4006 if (! mode_is_float(mode)) {
4007 return be_duplicate_node(node);
4010 assert(get_method_n_ress(tp) == 1);
4012 frame = get_irg_frame(irg);
4014 /* store xmm0 onto stack */
4015 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
4016 new_ret_mem, new_ret_val);
4017 set_ia32_ls_mode(sse_store, mode);
4018 set_ia32_op_type(sse_store, ia32_AddrModeD);
4019 set_ia32_use_frame(sse_store);
4020 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4022 /* load into x87 register */
4023 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store_mem, mode);
4024 set_ia32_op_type(fld, ia32_AddrModeS);
4025 set_ia32_use_frame(fld);
4027 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
4028 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
4030 /* create a new return */
4031 arity = get_irn_arity(node);
4032 in = ALLOCAN(ir_node*, arity);
4033 pop = be_Return_get_pop(node);
4034 for (i = 0; i < arity; ++i) {
4035 ir_node *op = get_irn_n(node, i);
4036 if (op == ret_val) {
4038 } else if (op == ret_mem) {
4041 in[i] = be_transform_node(op);
4044 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4045 copy_node_attr(irg, node, new_node);
4051 * Transform a be_AddSP into an ia32_SubSP.
4053 static ir_node *gen_be_AddSP(ir_node *node)
4055 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4056 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4058 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4059 match_am | match_immediate);
4060 assert(is_ia32_SubSP(new_node));
4061 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4062 &ia32_registers[REG_ESP]);
4067 * Transform a be_SubSP into an ia32_AddSP
4069 static ir_node *gen_be_SubSP(ir_node *node)
4071 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4072 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4074 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4075 match_am | match_immediate);
4076 assert(is_ia32_AddSP(new_node));
4077 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4078 &ia32_registers[REG_ESP]);
4082 static ir_node *gen_Phi(ir_node *node)
4084 ir_mode *mode = get_irn_mode(node);
4085 const arch_register_req_t *req;
4086 if (ia32_mode_needs_gp_reg(mode)) {
4087 /* we shouldn't have any 64bit stuff around anymore */
4088 assert(get_mode_size_bits(mode) <= 32);
4089 /* all integer operations are on 32bit registers now */
4091 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4092 } else if (mode_is_float(mode)) {
4093 if (ia32_cg_config.use_sse2) {
4095 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4098 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4101 req = arch_no_register_req;
4104 return be_transform_phi(node, req);
4107 static ir_node *gen_Jmp(ir_node *node)
4109 ir_node *block = get_nodes_block(node);
4110 ir_node *new_block = be_transform_node(block);
4111 dbg_info *dbgi = get_irn_dbg_info(node);
4114 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4115 SET_IA32_ORIG_NODE(new_node, node);
4123 static ir_node *gen_IJmp(ir_node *node)
4125 ir_node *block = get_nodes_block(node);
4126 ir_node *new_block = be_transform_node(block);
4127 dbg_info *dbgi = get_irn_dbg_info(node);
4128 ir_node *op = get_IJmp_target(node);
4130 ia32_address_mode_t am;
4131 ia32_address_t *addr = &am.addr;
4133 assert(get_irn_mode(op) == mode_P);
4135 match_arguments(&am, block, NULL, op, NULL,
4136 match_am | match_immediate | match_upconv);
4138 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4139 addr->mem, am.new_op2);
4140 set_am_attributes(new_node, &am);
4141 SET_IA32_ORIG_NODE(new_node, node);
4143 new_node = fix_mem_proj(new_node, &am);
4148 static ir_node *gen_ia32_l_Add(ir_node *node)
4150 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4151 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4152 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4153 match_commutative | match_am | match_immediate |
4154 match_mode_neutral);
4156 if (is_Proj(lowered)) {
4157 lowered = get_Proj_pred(lowered);
4159 assert(is_ia32_Add(lowered));
4160 set_irn_mode(lowered, mode_T);
4166 static ir_node *gen_ia32_l_Adc(ir_node *node)
4168 return gen_binop_flags(node, new_bd_ia32_Adc,
4169 match_commutative | match_am | match_immediate |
4170 match_mode_neutral);
4174 * Transforms a l_MulS into a "real" MulS node.
4176 * @return the created ia32 Mul node
4178 static ir_node *gen_ia32_l_Mul(ir_node *node)
4180 ir_node *left = get_binop_left(node);
4181 ir_node *right = get_binop_right(node);
4183 return gen_binop(node, left, right, new_bd_ia32_Mul,
4184 match_commutative | match_am | match_mode_neutral);
4188 * Transforms a l_IMulS into a "real" IMul1OPS node.
4190 * @return the created ia32 IMul1OP node
4192 static ir_node *gen_ia32_l_IMul(ir_node *node)
4194 ir_node *left = get_binop_left(node);
4195 ir_node *right = get_binop_right(node);
4197 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4198 match_commutative | match_am | match_mode_neutral);
4201 static ir_node *gen_ia32_l_Sub(ir_node *node)
4203 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4204 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4205 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4206 match_am | match_immediate | match_mode_neutral);
4208 if (is_Proj(lowered)) {
4209 lowered = get_Proj_pred(lowered);
4211 assert(is_ia32_Sub(lowered));
4212 set_irn_mode(lowered, mode_T);
4218 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4220 return gen_binop_flags(node, new_bd_ia32_Sbb,
4221 match_am | match_immediate | match_mode_neutral);
4224 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4226 ir_node *src_block = get_nodes_block(node);
4227 ir_node *block = be_transform_node(src_block);
4228 ir_graph *irg = current_ir_graph;
4229 dbg_info *dbgi = get_irn_dbg_info(node);
4230 ir_node *frame = get_irg_frame(irg);
4231 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4232 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4233 ir_node *new_val_low = be_transform_node(val_low);
4234 ir_node *new_val_high = be_transform_node(val_high);
4236 ir_node *sync, *fild, *res;
4238 ir_node *store_high;
4242 if (ia32_cg_config.use_sse2) {
4243 panic("not implemented for SSE2");
4247 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4249 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4251 SET_IA32_ORIG_NODE(store_low, node);
4252 SET_IA32_ORIG_NODE(store_high, node);
4254 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4255 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4257 set_ia32_use_frame(store_low);
4258 set_ia32_use_frame(store_high);
4259 set_ia32_op_type(store_low, ia32_AddrModeD);
4260 set_ia32_op_type(store_high, ia32_AddrModeD);
4261 set_ia32_ls_mode(store_low, mode_Iu);
4262 set_ia32_ls_mode(store_high, mode_Is);
4263 add_ia32_am_offs_int(store_high, 4);
4267 sync = new_rd_Sync(dbgi, block, 2, in);
4270 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4272 set_ia32_use_frame(fild);
4273 set_ia32_op_type(fild, ia32_AddrModeS);
4274 set_ia32_ls_mode(fild, mode_Ls);
4276 SET_IA32_ORIG_NODE(fild, node);
4278 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4280 if (! mode_is_signed(get_irn_mode(val_high))) {
4281 ia32_address_mode_t am;
4283 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4286 am.addr.base = get_symconst_base();
4287 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4288 am.addr.mem = nomem;
4291 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4292 am.addr.tls_segment = false;
4293 am.addr.use_frame = 0;
4294 am.addr.frame_entity = NULL;
4295 am.addr.symconst_sign = 0;
4296 am.ls_mode = mode_F;
4297 am.mem_proj = nomem;
4298 am.op_type = ia32_AddrModeS;
4300 am.new_op2 = ia32_new_NoReg_vfp(current_ir_graph);
4301 am.pinned = op_pin_state_floats;
4303 am.ins_permuted = false;
4305 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4306 am.new_op1, am.new_op2, get_fpcw());
4307 set_am_attributes(fadd, &am);
4309 set_irn_mode(fadd, mode_T);
4310 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4315 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4317 ir_node *src_block = get_nodes_block(node);
4318 ir_node *block = be_transform_node(src_block);
4319 ir_graph *irg = get_Block_irg(block);
4320 dbg_info *dbgi = get_irn_dbg_info(node);
4321 ir_node *frame = get_irg_frame(irg);
4322 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4323 ir_node *new_val = be_transform_node(val);
4326 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4327 SET_IA32_ORIG_NODE(fist, node);
4328 set_ia32_use_frame(fist);
4329 set_ia32_op_type(fist, ia32_AddrModeD);
4330 set_ia32_ls_mode(fist, mode_Ls);
4332 assert((long)pn_ia32_vfist_M == (long) pn_ia32_vfisttp_M);
4333 return new_r_Proj(fist, mode_M, pn_ia32_vfist_M);
4336 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4338 ir_node *block = be_transform_node(get_nodes_block(node));
4339 ir_graph *irg = get_Block_irg(block);
4340 ir_node *pred = get_Proj_pred(node);
4341 ir_node *new_pred = be_transform_node(pred);
4342 ir_node *frame = get_irg_frame(irg);
4343 dbg_info *dbgi = get_irn_dbg_info(node);
4344 long pn = get_Proj_proj(node);
4349 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4350 SET_IA32_ORIG_NODE(load, node);
4351 set_ia32_use_frame(load);
4352 set_ia32_op_type(load, ia32_AddrModeS);
4353 set_ia32_ls_mode(load, mode_Iu);
4354 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4355 * 32 bit from it with this particular load */
4356 attr = get_ia32_attr(load);
4357 attr->data.need_64bit_stackent = 1;
4359 if (pn == pn_ia32_l_FloattoLL_res_high) {
4360 add_ia32_am_offs_int(load, 4);
4362 assert(pn == pn_ia32_l_FloattoLL_res_low);
4365 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4371 * Transform the Projs of an AddSP.
4373 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4375 ir_node *pred = get_Proj_pred(node);
4376 ir_node *new_pred = be_transform_node(pred);
4377 dbg_info *dbgi = get_irn_dbg_info(node);
4378 long proj = get_Proj_proj(node);
4380 if (proj == pn_be_AddSP_sp) {
4381 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4382 pn_ia32_SubSP_stack);
4383 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4385 } else if (proj == pn_be_AddSP_res) {
4386 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4387 pn_ia32_SubSP_addr);
4388 } else if (proj == pn_be_AddSP_M) {
4389 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4392 panic("No idea how to transform proj->AddSP");
4396 * Transform the Projs of a SubSP.
4398 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4400 ir_node *pred = get_Proj_pred(node);
4401 ir_node *new_pred = be_transform_node(pred);
4402 dbg_info *dbgi = get_irn_dbg_info(node);
4403 long proj = get_Proj_proj(node);
4405 if (proj == pn_be_SubSP_sp) {
4406 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4407 pn_ia32_AddSP_stack);
4408 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4410 } else if (proj == pn_be_SubSP_M) {
4411 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4414 panic("No idea how to transform proj->SubSP");
4418 * Transform and renumber the Projs from a Load.
4420 static ir_node *gen_Proj_Load(ir_node *node)
4423 ir_node *pred = get_Proj_pred(node);
4424 dbg_info *dbgi = get_irn_dbg_info(node);
4425 long proj = get_Proj_proj(node);
4427 /* loads might be part of source address mode matches, so we don't
4428 * transform the ProjMs yet (with the exception of loads whose result is
4431 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4434 /* this is needed, because sometimes we have loops that are only
4435 reachable through the ProjM */
4436 be_enqueue_preds(node);
4437 /* do it in 2 steps, to silence firm verifier */
4438 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4439 set_Proj_proj(res, pn_ia32_mem);
4443 /* renumber the proj */
4444 new_pred = be_transform_node(pred);
4445 if (is_ia32_Load(new_pred)) {
4446 switch ((pn_Load)proj) {
4448 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4450 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4451 case pn_Load_X_except:
4452 /* This Load might raise an exception. Mark it. */
4453 set_ia32_exc_label(new_pred, 1);
4454 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4455 case pn_Load_X_regular:
4456 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4458 } else if (is_ia32_Conv_I2I(new_pred) ||
4459 is_ia32_Conv_I2I8Bit(new_pred)) {
4460 set_irn_mode(new_pred, mode_T);
4461 switch ((pn_Load)proj) {
4463 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4465 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4466 case pn_Load_X_except:
4467 /* This Load might raise an exception. Mark it. */
4468 set_ia32_exc_label(new_pred, 1);
4469 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4470 case pn_Load_X_regular:
4471 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4473 } else if (is_ia32_xLoad(new_pred)) {
4474 switch ((pn_Load)proj) {
4476 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4478 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4479 case pn_Load_X_except:
4480 /* This Load might raise an exception. Mark it. */
4481 set_ia32_exc_label(new_pred, 1);
4482 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4483 case pn_Load_X_regular:
4484 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4486 } else if (is_ia32_vfld(new_pred)) {
4487 switch ((pn_Load)proj) {
4489 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4491 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4492 case pn_Load_X_except:
4493 /* This Load might raise an exception. Mark it. */
4494 set_ia32_exc_label(new_pred, 1);
4495 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_except);
4496 case pn_Load_X_regular:
4497 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_regular);
4500 /* can happen for ProJMs when source address mode happened for the
4503 /* however it should not be the result proj, as that would mean the
4504 load had multiple users and should not have been used for
4506 if (proj != pn_Load_M) {
4507 panic("internal error: transformed node not a Load");
4509 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4512 panic("No idea how to transform Proj(Load) %+F", node);
4515 static ir_node *gen_Proj_Store(ir_node *node)
4517 ir_node *pred = get_Proj_pred(node);
4518 ir_node *new_pred = be_transform_node(pred);
4519 dbg_info *dbgi = get_irn_dbg_info(node);
4520 long pn = get_Proj_proj(node);
4522 if (is_ia32_Store(new_pred) || is_ia32_Store8Bit(new_pred)) {
4523 switch ((pn_Store)pn) {
4525 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4526 case pn_Store_X_except:
4527 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4528 case pn_Store_X_regular:
4529 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4531 } else if (is_ia32_vfist(new_pred)) {
4532 switch ((pn_Store)pn) {
4534 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfist_M);
4535 case pn_Store_X_except:
4536 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_except);
4537 case pn_Store_X_regular:
4538 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfist_X_regular);
4540 } else if (is_ia32_vfisttp(new_pred)) {
4541 switch ((pn_Store)pn) {
4543 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfisttp_M);
4544 case pn_Store_X_except:
4545 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_except);
4546 case pn_Store_X_regular:
4547 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfisttp_X_regular);
4549 } else if (is_ia32_vfst(new_pred)) {
4550 switch ((pn_Store)pn) {
4552 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfst_M);
4553 case pn_Store_X_except:
4554 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_except);
4555 case pn_Store_X_regular:
4556 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfst_X_regular);
4558 } else if (is_ia32_xStore(new_pred)) {
4559 switch ((pn_Store)pn) {
4561 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4562 case pn_Store_X_except:
4563 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4564 case pn_Store_X_regular:
4565 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4567 } else if (is_Sync(new_pred)) {
4568 /* hack for the case that gen_float_const_Store produced a Sync */
4569 if (pn == pn_Store_M) {
4572 panic("exception control flow not implemented yet");
4573 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4574 /* destination address mode */
4575 if (pn == pn_Store_M) {
4578 panic("exception control flow for destination AM not implemented yet");
4581 panic("No idea how to transform Proj(Store) %+F", node);
4585 * Transform and renumber the Projs from a Div or Mod instruction.
4587 static ir_node *gen_Proj_Div(ir_node *node)
4589 ir_node *pred = get_Proj_pred(node);
4590 ir_node *new_pred = be_transform_node(pred);
4591 dbg_info *dbgi = get_irn_dbg_info(node);
4592 long proj = get_Proj_proj(node);
4594 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4595 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4597 switch ((pn_Div)proj) {
4599 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4600 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4601 } else if (is_ia32_xDiv(new_pred)) {
4602 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4603 } else if (is_ia32_vfdiv(new_pred)) {
4604 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4606 panic("Div transformed to unexpected thing %+F", new_pred);
4609 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4610 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4611 } else if (is_ia32_xDiv(new_pred)) {
4612 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4613 } else if (is_ia32_vfdiv(new_pred)) {
4614 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4616 panic("Div transformed to unexpected thing %+F", new_pred);
4618 case pn_Div_X_except:
4619 set_ia32_exc_label(new_pred, 1);
4620 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4621 case pn_Div_X_regular:
4622 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4625 panic("No idea how to transform proj->Div");
4629 * Transform and renumber the Projs from a Div or Mod instruction.
4631 static ir_node *gen_Proj_Mod(ir_node *node)
4633 ir_node *pred = get_Proj_pred(node);
4634 ir_node *new_pred = be_transform_node(pred);
4635 dbg_info *dbgi = get_irn_dbg_info(node);
4636 long proj = get_Proj_proj(node);
4638 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4639 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4640 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4642 switch ((pn_Mod)proj) {
4644 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4646 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4647 case pn_Mod_X_except:
4648 set_ia32_exc_label(new_pred, 1);
4649 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4650 case pn_Mod_X_regular:
4651 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4653 panic("No idea how to transform proj->Mod");
4657 * Transform and renumber the Projs from a CopyB.
4659 static ir_node *gen_Proj_CopyB(ir_node *node)
4661 ir_node *pred = get_Proj_pred(node);
4662 ir_node *new_pred = be_transform_node(pred);
4663 dbg_info *dbgi = get_irn_dbg_info(node);
4664 long proj = get_Proj_proj(node);
4666 switch ((pn_CopyB)proj) {
4668 if (is_ia32_CopyB_i(new_pred)) {
4669 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4670 } else if (is_ia32_CopyB(new_pred)) {
4671 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4674 case pn_CopyB_X_regular:
4675 if (is_ia32_CopyB_i(new_pred)) {
4676 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4677 } else if (is_ia32_CopyB(new_pred)) {
4678 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4681 case pn_CopyB_X_except:
4682 if (is_ia32_CopyB_i(new_pred)) {
4683 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4684 } else if (is_ia32_CopyB(new_pred)) {
4685 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4690 panic("No idea how to transform proj->CopyB");
4693 static ir_node *gen_be_Call(ir_node *node)
4695 dbg_info *const dbgi = get_irn_dbg_info(node);
4696 ir_node *const src_block = get_nodes_block(node);
4697 ir_node *const block = be_transform_node(src_block);
4698 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4699 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4700 ir_node *const sp = be_transform_node(src_sp);
4701 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4702 ia32_address_mode_t am;
4703 ia32_address_t *const addr = &am.addr;
4708 ir_node * eax = noreg_GP;
4709 ir_node * ecx = noreg_GP;
4710 ir_node * edx = noreg_GP;
4711 unsigned const pop = be_Call_get_pop(node);
4712 ir_type *const call_tp = be_Call_get_type(node);
4713 int old_no_pic_adjust;
4714 int throws_exception = ir_throws_exception(node);
4716 /* Run the x87 simulator if the call returns a float value */
4717 if (get_method_n_ress(call_tp) > 0) {
4718 ir_type *const res_type = get_method_res_type(call_tp, 0);
4719 ir_mode *const res_mode = get_type_mode(res_type);
4721 if (res_mode != NULL && mode_is_float(res_mode)) {
4722 ir_graph *irg = current_ir_graph;
4723 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
4724 irg_data->do_x87_sim = 1;
4728 /* We do not want be_Call direct calls */
4729 assert(be_Call_get_entity(node) == NULL);
4731 /* special case for PIC trampoline calls */
4732 old_no_pic_adjust = ia32_no_pic_adjust;
4733 ia32_no_pic_adjust = be_options.pic;
4735 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4736 match_am | match_immediate | match_upconv);
4738 ia32_no_pic_adjust = old_no_pic_adjust;
4740 i = get_irn_arity(node) - 1;
4741 fpcw = be_transform_node(get_irn_n(node, i--));
4742 for (; i >= n_be_Call_first_arg; --i) {
4743 arch_register_req_t const *const req
4744 = arch_get_irn_register_req_in(node, i);
4745 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4747 assert(req->type == arch_register_req_type_limited);
4748 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4750 switch (*req->limited) {
4751 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4752 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4753 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4754 default: panic("Invalid GP register for register parameter");
4758 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4759 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4760 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4761 ir_set_throws_exception(call, throws_exception);
4762 set_am_attributes(call, &am);
4763 call = fix_mem_proj(call, &am);
4765 if (get_irn_pinned(node) == op_pin_state_pinned)
4766 set_irn_pinned(call, op_pin_state_pinned);
4768 SET_IA32_ORIG_NODE(call, node);
4770 if (ia32_cg_config.use_sse2) {
4771 /* remember this call for post-processing */
4772 ARR_APP1(ir_node *, call_list, call);
4773 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4780 * Transform Builtin trap
4782 static ir_node *gen_trap(ir_node *node)
4784 dbg_info *dbgi = get_irn_dbg_info(node);
4785 ir_node *block = be_transform_node(get_nodes_block(node));
4786 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4788 return new_bd_ia32_UD2(dbgi, block, mem);
4792 * Transform Builtin debugbreak
4794 static ir_node *gen_debugbreak(ir_node *node)
4796 dbg_info *dbgi = get_irn_dbg_info(node);
4797 ir_node *block = be_transform_node(get_nodes_block(node));
4798 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4800 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4804 * Transform Builtin return_address
4806 static ir_node *gen_return_address(ir_node *node)
4808 ir_node *param = get_Builtin_param(node, 0);
4809 ir_node *frame = get_Builtin_param(node, 1);
4810 dbg_info *dbgi = get_irn_dbg_info(node);
4811 ir_tarval *tv = get_Const_tarval(param);
4812 ir_graph *irg = get_irn_irg(node);
4813 unsigned long value = get_tarval_long(tv);
4815 ir_node *block = be_transform_node(get_nodes_block(node));
4816 ir_node *ptr = be_transform_node(frame);
4820 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4821 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4822 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4825 /* load the return address from this frame */
4826 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4828 set_irn_pinned(load, get_irn_pinned(node));
4829 set_ia32_op_type(load, ia32_AddrModeS);
4830 set_ia32_ls_mode(load, mode_Iu);
4832 set_ia32_am_offs_int(load, 0);
4833 set_ia32_use_frame(load);
4834 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4836 if (get_irn_pinned(node) == op_pin_state_floats) {
4837 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4838 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4839 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4840 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4843 SET_IA32_ORIG_NODE(load, node);
4844 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4848 * Transform Builtin frame_address
4850 static ir_node *gen_frame_address(ir_node *node)
4852 ir_node *param = get_Builtin_param(node, 0);
4853 ir_node *frame = get_Builtin_param(node, 1);
4854 dbg_info *dbgi = get_irn_dbg_info(node);
4855 ir_tarval *tv = get_Const_tarval(param);
4856 ir_graph *irg = get_irn_irg(node);
4857 unsigned long value = get_tarval_long(tv);
4859 ir_node *block = be_transform_node(get_nodes_block(node));
4860 ir_node *ptr = be_transform_node(frame);
4865 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4866 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4867 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4870 /* load the frame address from this frame */
4871 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4873 set_irn_pinned(load, get_irn_pinned(node));
4874 set_ia32_op_type(load, ia32_AddrModeS);
4875 set_ia32_ls_mode(load, mode_Iu);
4877 ent = ia32_get_frame_address_entity(irg);
4879 set_ia32_am_offs_int(load, 0);
4880 set_ia32_use_frame(load);
4881 set_ia32_frame_ent(load, ent);
4883 /* will fail anyway, but gcc does this: */
4884 set_ia32_am_offs_int(load, 0);
4887 if (get_irn_pinned(node) == op_pin_state_floats) {
4888 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_vfld_res
4889 && (int)pn_ia32_vfld_res == (int)pn_ia32_Load_res
4890 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4891 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4894 SET_IA32_ORIG_NODE(load, node);
4895 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4899 * Transform Builtin frame_address
4901 static ir_node *gen_prefetch(ir_node *node)
4904 ir_node *ptr, *block, *mem, *base, *idx;
4905 ir_node *param, *new_node;
4908 ia32_address_t addr;
4910 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4911 /* no prefetch at all, route memory */
4912 return be_transform_node(get_Builtin_mem(node));
4915 param = get_Builtin_param(node, 1);
4916 tv = get_Const_tarval(param);
4917 rw = get_tarval_long(tv);
4919 /* construct load address */
4920 memset(&addr, 0, sizeof(addr));
4921 ptr = get_Builtin_param(node, 0);
4922 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4929 base = be_transform_node(base);
4935 idx = be_transform_node(idx);
4938 dbgi = get_irn_dbg_info(node);
4939 block = be_transform_node(get_nodes_block(node));
4940 mem = be_transform_node(get_Builtin_mem(node));
4942 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4943 /* we have 3DNow!, this was already checked above */
4944 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4945 } else if (ia32_cg_config.use_sse_prefetch) {
4946 /* note: rw == 1 is IGNORED in that case */
4947 param = get_Builtin_param(node, 2);
4948 tv = get_Const_tarval(param);
4949 locality = get_tarval_long(tv);
4951 /* SSE style prefetch */
4954 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
4957 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
4960 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
4963 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
4967 assert(ia32_cg_config.use_3dnow_prefetch);
4968 /* 3DNow! style prefetch */
4969 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
4972 set_irn_pinned(new_node, get_irn_pinned(node));
4973 set_ia32_op_type(new_node, ia32_AddrModeS);
4974 set_ia32_ls_mode(new_node, mode_Bu);
4975 set_address(new_node, &addr);
4977 SET_IA32_ORIG_NODE(new_node, node);
4979 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4983 * Transform bsf like node
4985 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4987 ir_node *param = get_Builtin_param(node, 0);
4988 dbg_info *dbgi = get_irn_dbg_info(node);
4990 ir_node *block = get_nodes_block(node);
4991 ir_node *new_block = be_transform_node(block);
4993 ia32_address_mode_t am;
4994 ia32_address_t *addr = &am.addr;
4997 match_arguments(&am, block, NULL, param, NULL, match_am);
4999 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5000 set_am_attributes(cnt, &am);
5001 set_ia32_ls_mode(cnt, get_irn_mode(param));
5003 SET_IA32_ORIG_NODE(cnt, node);
5004 return fix_mem_proj(cnt, &am);
5008 * Transform builtin ffs.
5010 static ir_node *gen_ffs(ir_node *node)
5012 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5013 ir_node *real = skip_Proj(bsf);
5014 dbg_info *dbgi = get_irn_dbg_info(real);
5015 ir_node *block = get_nodes_block(real);
5016 ir_node *flag, *set, *conv, *neg, *orn, *add;
5019 if (get_irn_mode(real) != mode_T) {
5020 set_irn_mode(real, mode_T);
5021 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5024 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5027 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5028 SET_IA32_ORIG_NODE(set, node);
5031 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5032 SET_IA32_ORIG_NODE(conv, node);
5035 neg = new_bd_ia32_Neg(dbgi, block, conv);
5038 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5039 set_ia32_ls_mode(orn, mode_Iu);
5040 set_ia32_commutative(orn);
5043 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5044 add_ia32_am_offs_int(add, 1);
5049 * Transform builtin clz.
5051 static ir_node *gen_clz(ir_node *node)
5053 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5054 ir_node *real = skip_Proj(bsr);
5055 dbg_info *dbgi = get_irn_dbg_info(real);
5056 ir_node *block = get_nodes_block(real);
5057 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5059 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5063 * Transform builtin ctz.
5065 static ir_node *gen_ctz(ir_node *node)
5067 return gen_unop_AM(node, new_bd_ia32_Bsf);
5071 * Transform builtin parity.
5073 static ir_node *gen_parity(ir_node *node)
5075 dbg_info *dbgi = get_irn_dbg_info(node);
5076 ir_node *block = get_nodes_block(node);
5077 ir_node *new_block = be_transform_node(block);
5078 ir_node *param = get_Builtin_param(node, 0);
5079 ir_node *new_param = be_transform_node(param);
5082 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5083 * so we have to do complicated xoring first.
5084 * (we should also better lower this before the backend so we still have a
5085 * chance for CSE, constant folding and other goodies for some of these
5088 ir_node *count = ia32_create_Immediate(NULL, 0, 16);
5089 ir_node *shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5090 ir_node *xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem,
5092 ir_node *xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xorn);
5095 set_ia32_ls_mode(xorn, mode_Iu);
5096 set_ia32_commutative(xorn);
5098 set_irn_mode(xor2, mode_T);
5099 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5102 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5103 SET_IA32_ORIG_NODE(new_node, node);
5106 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5107 nomem, new_node, mode_Bu);
5108 SET_IA32_ORIG_NODE(new_node, node);
5113 * Transform builtin popcount
5115 static ir_node *gen_popcount(ir_node *node)
5117 ir_node *param = get_Builtin_param(node, 0);
5118 dbg_info *dbgi = get_irn_dbg_info(node);
5120 ir_node *block = get_nodes_block(node);
5121 ir_node *new_block = be_transform_node(block);
5124 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5126 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5127 if (ia32_cg_config.use_popcnt) {
5128 ia32_address_mode_t am;
5129 ia32_address_t *addr = &am.addr;
5132 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am | match_upconv);
5134 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5135 set_am_attributes(cnt, &am);
5136 set_ia32_ls_mode(cnt, get_irn_mode(param));
5138 SET_IA32_ORIG_NODE(cnt, node);
5139 return fix_mem_proj(cnt, &am);
5142 new_param = be_transform_node(param);
5144 /* do the standard popcount algo */
5145 /* TODO: This is stupid, we should transform this before the backend,
5146 * to get CSE, localopts, etc. for the operations
5147 * TODO: This is also not the optimal algorithm (it is just the starting
5148 * example in hackers delight, they optimize it more on the following page)
5149 * But I'm too lazy to fix this now, as the code should get lowered before
5150 * the backend anyway.
5153 /* m1 = x & 0x55555555 */
5154 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5155 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5158 simm = ia32_create_Immediate(NULL, 0, 1);
5159 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5161 /* m2 = s1 & 0x55555555 */
5162 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5165 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5167 /* m4 = m3 & 0x33333333 */
5168 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5169 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5172 simm = ia32_create_Immediate(NULL, 0, 2);
5173 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5175 /* m5 = s2 & 0x33333333 */
5176 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5179 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5181 /* m7 = m6 & 0x0F0F0F0F */
5182 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5183 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5186 simm = ia32_create_Immediate(NULL, 0, 4);
5187 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5189 /* m8 = s3 & 0x0F0F0F0F */
5190 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5193 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5195 /* m10 = m9 & 0x00FF00FF */
5196 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5197 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5200 simm = ia32_create_Immediate(NULL, 0, 8);
5201 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5203 /* m11 = s4 & 0x00FF00FF */
5204 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5206 /* m12 = m10 + m11 */
5207 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5209 /* m13 = m12 & 0x0000FFFF */
5210 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5211 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5213 /* s5 = m12 >> 16 */
5214 simm = ia32_create_Immediate(NULL, 0, 16);
5215 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5217 /* res = m13 + s5 */
5218 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5222 * Transform builtin byte swap.
5224 static ir_node *gen_bswap(ir_node *node)
5226 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5227 dbg_info *dbgi = get_irn_dbg_info(node);
5229 ir_node *block = get_nodes_block(node);
5230 ir_node *new_block = be_transform_node(block);
5231 ir_mode *mode = get_irn_mode(param);
5232 unsigned size = get_mode_size_bits(mode);
5236 if (ia32_cg_config.use_bswap) {
5237 /* swap available */
5238 return new_bd_ia32_Bswap(dbgi, new_block, param);
5240 ir_node *i8 = ia32_create_Immediate(NULL, 0, 8);
5241 ir_node *rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5242 ir_node *i16 = ia32_create_Immediate(NULL, 0, 16);
5243 ir_node *rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5244 ir_node *rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5245 set_ia32_ls_mode(rol1, mode_Hu);
5246 set_ia32_ls_mode(rol2, mode_Iu);
5247 set_ia32_ls_mode(rol3, mode_Hu);
5252 /* swap16 always available */
5253 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5256 panic("Invalid bswap size (%d)", size);
5261 * Transform builtin outport.
5263 static ir_node *gen_outport(ir_node *node)
5265 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5266 ir_node *oldv = get_Builtin_param(node, 1);
5267 ir_mode *mode = get_irn_mode(oldv);
5268 ir_node *value = be_transform_node(oldv);
5269 ir_node *block = be_transform_node(get_nodes_block(node));
5270 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5271 dbg_info *dbgi = get_irn_dbg_info(node);
5273 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5274 set_ia32_ls_mode(res, mode);
5279 * Transform builtin inport.
5281 static ir_node *gen_inport(ir_node *node)
5283 ir_type *tp = get_Builtin_type(node);
5284 ir_type *rstp = get_method_res_type(tp, 0);
5285 ir_mode *mode = get_type_mode(rstp);
5286 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5287 ir_node *block = be_transform_node(get_nodes_block(node));
5288 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5289 dbg_info *dbgi = get_irn_dbg_info(node);
5291 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5292 set_ia32_ls_mode(res, mode);
5294 /* check for missing Result Proj */
5299 * Transform a builtin inner trampoline
5301 static ir_node *gen_inner_trampoline(ir_node *node)
5303 ir_node *ptr = get_Builtin_param(node, 0);
5304 ir_node *callee = get_Builtin_param(node, 1);
5305 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5306 ir_node *mem = get_Builtin_mem(node);
5307 ir_node *block = get_nodes_block(node);
5308 ir_node *new_block = be_transform_node(block);
5312 ir_node *trampoline;
5314 dbg_info *dbgi = get_irn_dbg_info(node);
5315 ia32_address_t addr;
5317 /* construct store address */
5318 memset(&addr, 0, sizeof(addr));
5319 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5321 if (addr.base == NULL) {
5322 addr.base = noreg_GP;
5324 addr.base = be_transform_node(addr.base);
5327 if (addr.index == NULL) {
5328 addr.index = noreg_GP;
5330 addr.index = be_transform_node(addr.index);
5332 addr.mem = be_transform_node(mem);
5334 /* mov ecx, <env> */
5335 val = ia32_create_Immediate(NULL, 0, 0xB9);
5336 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5337 addr.index, addr.mem, val);
5338 set_irn_pinned(store, get_irn_pinned(node));
5339 set_ia32_op_type(store, ia32_AddrModeD);
5340 set_ia32_ls_mode(store, mode_Bu);
5341 set_address(store, &addr);
5345 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5346 addr.index, addr.mem, env);
5347 set_irn_pinned(store, get_irn_pinned(node));
5348 set_ia32_op_type(store, ia32_AddrModeD);
5349 set_ia32_ls_mode(store, mode_Iu);
5350 set_address(store, &addr);
5354 /* jmp rel <callee> */
5355 val = ia32_create_Immediate(NULL, 0, 0xE9);
5356 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5357 addr.index, addr.mem, val);
5358 set_irn_pinned(store, get_irn_pinned(node));
5359 set_ia32_op_type(store, ia32_AddrModeD);
5360 set_ia32_ls_mode(store, mode_Bu);
5361 set_address(store, &addr);
5365 trampoline = be_transform_node(ptr);
5367 /* the callee is typically an immediate */
5368 if (is_SymConst(callee)) {
5369 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5371 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5372 add_ia32_am_offs_int(rel, -10);
5374 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5376 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5377 addr.index, addr.mem, rel);
5378 set_irn_pinned(store, get_irn_pinned(node));
5379 set_ia32_op_type(store, ia32_AddrModeD);
5380 set_ia32_ls_mode(store, mode_Iu);
5381 set_address(store, &addr);
5386 return new_r_Tuple(new_block, 2, in);
5390 * Transform Builtin node.
5392 static ir_node *gen_Builtin(ir_node *node)
5394 ir_builtin_kind kind = get_Builtin_kind(node);
5398 return gen_trap(node);
5399 case ir_bk_debugbreak:
5400 return gen_debugbreak(node);
5401 case ir_bk_return_address:
5402 return gen_return_address(node);
5403 case ir_bk_frame_address:
5404 return gen_frame_address(node);
5405 case ir_bk_prefetch:
5406 return gen_prefetch(node);
5408 return gen_ffs(node);
5410 return gen_clz(node);
5412 return gen_ctz(node);
5414 return gen_parity(node);
5415 case ir_bk_popcount:
5416 return gen_popcount(node);
5418 return gen_bswap(node);
5420 return gen_outport(node);
5422 return gen_inport(node);
5423 case ir_bk_inner_trampoline:
5424 return gen_inner_trampoline(node);
5426 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5430 * Transform Proj(Builtin) node.
5432 static ir_node *gen_Proj_Builtin(ir_node *proj)
5434 ir_node *node = get_Proj_pred(proj);
5435 ir_node *new_node = be_transform_node(node);
5436 ir_builtin_kind kind = get_Builtin_kind(node);
5439 case ir_bk_return_address:
5440 case ir_bk_frame_address:
5445 case ir_bk_popcount:
5447 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5450 case ir_bk_debugbreak:
5451 case ir_bk_prefetch:
5453 assert(get_Proj_proj(proj) == pn_Builtin_M);
5456 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5457 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5459 assert(get_Proj_proj(proj) == pn_Builtin_M);
5460 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5462 case ir_bk_inner_trampoline:
5463 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5464 return get_Tuple_pred(new_node, 1);
5466 assert(get_Proj_proj(proj) == pn_Builtin_M);
5467 return get_Tuple_pred(new_node, 0);
5470 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5473 static ir_node *gen_be_IncSP(ir_node *node)
5475 ir_node *res = be_duplicate_node(node);
5476 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5482 * Transform the Projs from a be_Call.
5484 static ir_node *gen_Proj_be_Call(ir_node *node)
5486 ir_node *call = get_Proj_pred(node);
5487 ir_node *new_call = be_transform_node(call);
5488 dbg_info *dbgi = get_irn_dbg_info(node);
5489 long proj = get_Proj_proj(node);
5490 ir_mode *mode = get_irn_mode(node);
5493 if (proj == pn_be_Call_M) {
5494 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5496 /* transform call modes */
5497 if (mode_is_data(mode)) {
5498 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5502 /* Map from be_Call to ia32_Call proj number */
5503 if (proj == pn_be_Call_sp) {
5504 proj = pn_ia32_Call_stack;
5505 } else if (proj == pn_be_Call_M) {
5506 proj = pn_ia32_Call_M;
5507 } else if (proj == pn_be_Call_X_except) {
5508 proj = pn_ia32_Call_X_except;
5509 } else if (proj == pn_be_Call_X_regular) {
5510 proj = pn_ia32_Call_X_regular;
5512 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5513 int const n_outs = arch_get_irn_n_outs(new_call);
5516 assert(proj >= pn_be_Call_first_res);
5517 assert(req->type & arch_register_req_type_limited);
5519 for (i = 0; i < n_outs; ++i) {
5520 arch_register_req_t const *const new_req
5521 = arch_get_irn_register_req_out(new_call, i);
5523 if (!(new_req->type & arch_register_req_type_limited) ||
5524 new_req->cls != req->cls ||
5525 *new_req->limited != *req->limited)
5534 res = new_rd_Proj(dbgi, new_call, mode, proj);
5536 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5538 case pn_ia32_Call_stack:
5539 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5542 case pn_ia32_Call_fpcw:
5543 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5550 static ir_node *gen_Proj_ASM(ir_node *node)
5552 ir_mode *mode = get_irn_mode(node);
5553 ir_node *pred = get_Proj_pred(node);
5554 ir_node *new_pred = be_transform_node(pred);
5555 long pos = get_Proj_proj(node);
5557 if (mode == mode_M) {
5558 pos = arch_get_irn_n_outs(new_pred)-1;
5559 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5561 } else if (mode_is_float(mode)) {
5564 panic("unexpected proj mode at ASM");
5567 return new_r_Proj(new_pred, mode, pos);
5571 * Transform and potentially renumber Proj nodes.
5573 static ir_node *gen_Proj(ir_node *node)
5575 ir_node *pred = get_Proj_pred(node);
5578 switch (get_irn_opcode(pred)) {
5580 return gen_Proj_Load(node);
5582 return gen_Proj_Store(node);
5584 return gen_Proj_ASM(node);
5586 return gen_Proj_Builtin(node);
5588 return gen_Proj_Div(node);
5590 return gen_Proj_Mod(node);
5592 return gen_Proj_CopyB(node);
5594 return gen_Proj_be_SubSP(node);
5596 return gen_Proj_be_AddSP(node);
5598 return gen_Proj_be_Call(node);
5600 proj = get_Proj_proj(node);
5602 case pn_Start_X_initial_exec: {
5603 ir_node *block = get_nodes_block(pred);
5604 ir_node *new_block = be_transform_node(block);
5605 dbg_info *dbgi = get_irn_dbg_info(node);
5606 /* we exchange the ProjX with a jump */
5607 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5615 if (is_ia32_l_FloattoLL(pred)) {
5616 return gen_Proj_l_FloattoLL(node);
5618 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5622 ir_mode *mode = get_irn_mode(node);
5623 if (ia32_mode_needs_gp_reg(mode)) {
5624 ir_node *new_pred = be_transform_node(pred);
5625 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5626 get_Proj_proj(node));
5627 new_proj->node_nr = node->node_nr;
5632 return be_duplicate_node(node);
5636 * Enters all transform functions into the generic pointer
5638 static void register_transformers(void)
5640 /* first clear the generic function pointer for all ops */
5641 be_start_transform_setup();
5643 be_set_transform_function(op_Add, gen_Add);
5644 be_set_transform_function(op_And, gen_And);
5645 be_set_transform_function(op_ASM, ia32_gen_ASM);
5646 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5647 be_set_transform_function(op_be_Call, gen_be_Call);
5648 be_set_transform_function(op_be_Copy, gen_be_Copy);
5649 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5650 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5651 be_set_transform_function(op_be_Return, gen_be_Return);
5652 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5653 be_set_transform_function(op_Builtin, gen_Builtin);
5654 be_set_transform_function(op_Cmp, gen_Cmp);
5655 be_set_transform_function(op_Cond, gen_Cond);
5656 be_set_transform_function(op_Const, gen_Const);
5657 be_set_transform_function(op_Conv, gen_Conv);
5658 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5659 be_set_transform_function(op_Div, gen_Div);
5660 be_set_transform_function(op_Eor, gen_Eor);
5661 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5662 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5663 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5664 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5665 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5666 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5667 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5668 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5669 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5670 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5671 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5672 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5673 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5674 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5675 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5676 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5677 be_set_transform_function(op_IJmp, gen_IJmp);
5678 be_set_transform_function(op_Jmp, gen_Jmp);
5679 be_set_transform_function(op_Load, gen_Load);
5680 be_set_transform_function(op_Minus, gen_Minus);
5681 be_set_transform_function(op_Mod, gen_Mod);
5682 be_set_transform_function(op_Mul, gen_Mul);
5683 be_set_transform_function(op_Mulh, gen_Mulh);
5684 be_set_transform_function(op_Mux, gen_Mux);
5685 be_set_transform_function(op_Not, gen_Not);
5686 be_set_transform_function(op_Or, gen_Or);
5687 be_set_transform_function(op_Phi, gen_Phi);
5688 be_set_transform_function(op_Proj, gen_Proj);
5689 be_set_transform_function(op_Rotl, gen_Rotl);
5690 be_set_transform_function(op_Shl, gen_Shl);
5691 be_set_transform_function(op_Shr, gen_Shr);
5692 be_set_transform_function(op_Shrs, gen_Shrs);
5693 be_set_transform_function(op_Store, gen_Store);
5694 be_set_transform_function(op_Sub, gen_Sub);
5695 be_set_transform_function(op_Switch, gen_Switch);
5696 be_set_transform_function(op_SymConst, gen_SymConst);
5697 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5699 be_set_upper_bits_clean_function(op_Mux, ia32_mux_upper_bits_clean);
5703 * Pre-transform all unknown and noreg nodes.
5705 static void ia32_pretransform_node(void)
5707 ir_graph *irg = current_ir_graph;
5708 ia32_irg_data_t *irg_data = ia32_get_irg_data(current_ir_graph);
5710 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5711 irg_data->noreg_vfp = be_pre_transform_node(irg_data->noreg_vfp);
5712 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5713 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5714 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5716 nomem = get_irg_no_mem(irg);
5717 noreg_GP = ia32_new_NoReg_gp(irg);
5721 * Post-process all calls if we are in SSE mode.
5722 * The ABI requires that the results are in st0, copy them
5723 * to a xmm register.
5725 static void postprocess_fp_call_results(void)
5729 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5730 ir_node *call = call_list[i];
5731 ir_type *mtp = call_types[i];
5734 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5735 ir_type *res_tp = get_method_res_type(mtp, j);
5736 ir_node *res, *new_res;
5739 if (! is_atomic_type(res_tp)) {
5740 /* no floating point return */
5743 res_mode = get_type_mode(res_tp);
5744 if (! mode_is_float(res_mode)) {
5745 /* no floating point return */
5749 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5752 /* now patch the users */
5753 foreach_out_edge_safe(res, edge) {
5754 ir_node *succ = get_edge_src_irn(edge);
5757 if (be_is_Keep(succ))
5760 if (is_ia32_xStore(succ)) {
5761 /* an xStore can be patched into an vfst */
5762 dbg_info *db = get_irn_dbg_info(succ);
5763 ir_node *block = get_nodes_block(succ);
5764 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5765 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5766 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5767 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5768 ir_mode *mode = get_ia32_ls_mode(succ);
5770 ir_node *st = new_bd_ia32_vfst(db, block, base, idx, mem, value, mode);
5771 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_vfst_M);
5772 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5773 if (is_ia32_use_frame(succ))
5774 set_ia32_use_frame(st);
5775 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5776 set_irn_pinned(st, get_irn_pinned(succ));
5777 set_ia32_op_type(st, ia32_AddrModeD);
5779 assert((long)pn_ia32_xStore_M == (long)pn_ia32_vfst_M);
5780 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_vfst_X_regular);
5781 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_vfst_X_except);
5788 if (new_res == NULL) {
5789 dbg_info *db = get_irn_dbg_info(call);
5790 ir_node *block = get_nodes_block(call);
5791 ir_node *frame = get_irg_frame(current_ir_graph);
5792 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5793 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5794 ir_node *vfst, *xld, *new_mem;
5797 /* store st(0) on stack */
5798 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem,
5800 set_ia32_op_type(vfst, ia32_AddrModeD);
5801 set_ia32_use_frame(vfst);
5803 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_vfst_M);
5805 /* load into SSE register */
5806 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5808 set_ia32_op_type(xld, ia32_AddrModeS);
5809 set_ia32_use_frame(xld);
5811 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5812 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5814 if (old_mem != NULL) {
5815 edges_reroute(old_mem, new_mem);
5819 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5825 /* do the transformation */
5826 void ia32_transform_graph(ir_graph *irg)
5830 register_transformers();
5831 initial_fpcw = NULL;
5832 ia32_no_pic_adjust = 0;
5834 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5836 be_timer_push(T_HEIGHTS);
5837 ia32_heights = heights_new(irg);
5838 be_timer_pop(T_HEIGHTS);
5839 ia32_calculate_non_address_mode_nodes(irg);
5841 /* the transform phase is not safe for CSE (yet) because several nodes get
5842 * attributes set after their creation */
5843 cse_last = get_opt_cse();
5846 call_list = NEW_ARR_F(ir_node *, 0);
5847 call_types = NEW_ARR_F(ir_type *, 0);
5848 be_transform_graph(irg, ia32_pretransform_node);
5850 if (ia32_cg_config.use_sse2)
5851 postprocess_fp_call_results();
5852 DEL_ARR_F(call_types);
5853 DEL_ARR_F(call_list);
5855 set_opt_cse(cse_last);
5857 ia32_free_non_address_mode_nodes();
5858 heights_free(ia32_heights);
5859 ia32_heights = NULL;
5862 void ia32_init_transform(void)
5864 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");