2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
33 #include "irgraph_t.h"
38 #include "iredges_t.h"
54 #include "betranshlp.h"
57 #include "bearch_ia32_t.h"
58 #include "ia32_common_transform.h"
59 #include "ia32_nodes_attr.h"
60 #include "ia32_transform.h"
61 #include "ia32_new_nodes.h"
62 #include "ia32_dbg_stat.h"
63 #include "ia32_optimize.h"
64 #include "ia32_address_mode.h"
65 #include "ia32_architecture.h"
67 #include "gen_ia32_regalloc_if.h"
69 /* define this to construct SSE constants instead of load them */
70 #undef CONSTRUCT_SSE_CONST
72 #define mode_fp (ia32_reg_classes[CLASS_ia32_fp].mode)
73 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
75 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
77 static ir_node *old_initial_fpcw = NULL;
78 static ir_node *initial_fpcw = NULL;
79 int ia32_no_pic_adjust;
81 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
82 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
85 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
86 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
89 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
90 ir_node *op1, ir_node *op2);
92 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
93 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
95 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
96 ir_node *base, ir_node *index, ir_node *mem);
98 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
102 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
104 static ir_node *create_immediate_or_transform(ir_node *node);
106 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
107 dbg_info *dbgi, ir_node *block,
108 ir_node *op, ir_node *orig_node);
110 /* its enough to have those once */
111 static ir_node *nomem, *noreg_GP;
113 /** a list to postprocess all calls */
114 static ir_node **call_list;
115 static ir_type **call_types;
117 /** Return non-zero is a node represents the 0 constant. */
118 static bool is_Const_0(ir_node *node)
120 return is_Const(node) && is_Const_null(node);
123 /** Return non-zero is a node represents the 1 constant. */
124 static bool is_Const_1(ir_node *node)
126 return is_Const(node) && is_Const_one(node);
129 /** Return non-zero is a node represents the -1 constant. */
130 static bool is_Const_Minus_1(ir_node *node)
132 return is_Const(node) && is_Const_all_one(node);
136 * returns true if constant can be created with a simple float command
138 static bool is_simple_x87_Const(ir_node *node)
140 ir_tarval *tv = get_Const_tarval(node);
141 if (tarval_is_null(tv) || tarval_is_one(tv))
144 /* TODO: match all the other float constants */
149 * returns true if constant can be created with a simple float command
151 static bool is_simple_sse_Const(ir_node *node)
153 ir_tarval *tv = get_Const_tarval(node);
154 ir_mode *mode = get_tarval_mode(tv);
159 if (tarval_is_null(tv)
160 #ifdef CONSTRUCT_SSE_CONST
165 #ifdef CONSTRUCT_SSE_CONST
166 if (mode == mode_D) {
167 unsigned val = get_tarval_sub_bits(tv, 0) |
168 (get_tarval_sub_bits(tv, 1) << 8) |
169 (get_tarval_sub_bits(tv, 2) << 16) |
170 (get_tarval_sub_bits(tv, 3) << 24);
172 /* lower 32bit are zero, really a 32bit constant */
175 #endif /* CONSTRUCT_SSE_CONST */
176 /* TODO: match all the other float constants */
181 * return NoREG or pic_base in case of PIC.
182 * This is necessary as base address for newly created symbols
184 static ir_node *get_symconst_base(void)
186 ir_graph *irg = current_ir_graph;
188 if (be_options.pic) {
189 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
190 return arch_env->impl->get_pic_base(irg);
197 * Transforms a Const.
199 static ir_node *gen_Const(ir_node *node)
201 ir_node *old_block = get_nodes_block(node);
202 ir_node *block = be_transform_node(old_block);
203 dbg_info *dbgi = get_irn_dbg_info(node);
204 ir_mode *mode = get_irn_mode(node);
205 ir_tarval *tv = get_Const_tarval(node);
207 if (mode_is_float(mode)) {
208 ir_graph *irg = get_irn_irg(node);
209 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
210 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
215 if (ia32_cg_config.use_sse2) {
216 if (tarval_is_null(tv)) {
217 load = new_bd_ia32_xZero(dbgi, block);
218 set_ia32_ls_mode(load, mode);
220 #ifdef CONSTRUCT_SSE_CONST
221 } else if (tarval_is_one(tv)) {
222 int cnst = mode == mode_F ? 26 : 55;
223 ir_node *imm1 = ia32_create_Immediate(irg, NULL, 0, cnst);
224 ir_node *imm2 = ia32_create_Immediate(irg, NULL, 0, 2);
225 ir_node *pslld, *psrld;
227 load = new_bd_ia32_xAllOnes(dbgi, block);
228 set_ia32_ls_mode(load, mode);
229 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
230 set_ia32_ls_mode(pslld, mode);
231 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
232 set_ia32_ls_mode(psrld, mode);
234 #endif /* CONSTRUCT_SSE_CONST */
235 } else if (mode == mode_F) {
236 /* we can place any 32bit constant by using a movd gp, sse */
237 unsigned val = get_tarval_sub_bits(tv, 0) |
238 (get_tarval_sub_bits(tv, 1) << 8) |
239 (get_tarval_sub_bits(tv, 2) << 16) |
240 (get_tarval_sub_bits(tv, 3) << 24);
241 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
242 load = new_bd_ia32_xMovd(dbgi, block, cnst);
243 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(irg, NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = ia32_create_float_const_entity(isa, tv, NULL);
274 base = get_symconst_base();
275 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
277 set_ia32_op_type(load, ia32_AddrModeS);
278 set_ia32_am_sc(load, floatent);
279 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
280 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
283 if (tarval_is_null(tv)) {
284 load = new_bd_ia32_fldz(dbgi, block);
286 set_ia32_ls_mode(load, mode);
287 } else if (tarval_is_one(tv)) {
288 load = new_bd_ia32_fld1(dbgi, block);
290 set_ia32_ls_mode(load, mode);
295 floatent = ia32_create_float_const_entity(isa, tv, NULL);
296 /* create_float_const_ent is smart and sometimes creates
298 ls_mode = get_type_mode(get_entity_type(floatent));
299 base = get_symconst_base();
300 load = new_bd_ia32_fld(dbgi, block, base, noreg_GP, nomem,
302 set_ia32_op_type(load, ia32_AddrModeS);
303 set_ia32_am_sc(load, floatent);
304 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
305 res = new_r_Proj(load, mode_fp, pn_ia32_fld_res);
308 #ifdef CONSTRUCT_SSE_CONST
310 #endif /* CONSTRUCT_SSE_CONST */
311 SET_IA32_ORIG_NODE(load, node);
313 } else { /* non-float mode */
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
333 * Transforms a SymConst.
335 static ir_node *gen_SymConst(ir_node *node)
337 ir_node *old_block = get_nodes_block(node);
338 ir_node *block = be_transform_node(old_block);
339 dbg_info *dbgi = get_irn_dbg_info(node);
340 ir_mode *mode = get_irn_mode(node);
343 if (mode_is_float(mode)) {
344 if (ia32_cg_config.use_sse2)
345 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_D);
347 cnst = new_bd_ia32_fld(dbgi, block, noreg_GP, noreg_GP, nomem, ia32_mode_E);
348 set_ia32_am_sc(cnst, get_SymConst_entity(node));
349 set_ia32_use_frame(cnst);
353 if (get_SymConst_kind(node) != symconst_addr_ent) {
354 panic("backend only support symconst_addr_ent (at %+F)", node);
356 entity = get_SymConst_entity(node);
357 if (get_entity_owner(entity) == get_tls_type()) {
358 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
359 ir_node *lea = new_bd_ia32_Lea(dbgi, block, tls_base, noreg_GP);
360 set_ia32_am_sc(lea, entity);
363 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
367 SET_IA32_ORIG_NODE(cnst, node);
372 static ir_type *make_array_type(ir_type *tp)
374 unsigned alignment = get_type_alignment_bytes(tp);
375 unsigned size = get_type_size_bytes(tp);
376 ir_type *res = new_type_array(1, tp);
377 set_type_alignment_bytes(res, alignment);
378 set_array_bounds_int(res, 0, 0, 2);
379 if (alignment > size)
381 set_type_size_bytes(res, 2 * size);
382 set_type_state(res, layout_fixed);
387 * Create a float[2] array type for the given atomic type.
389 * @param tp the atomic type
391 static ir_type *ia32_create_float_array(ir_type *tp)
393 ir_mode *mode = get_type_mode(tp);
396 if (mode == mode_F) {
397 static ir_type *float_F;
401 arr = float_F = make_array_type(tp);
402 } else if (mode == mode_D) {
403 static ir_type *float_D;
407 arr = float_D = make_array_type(tp);
409 static ir_type *float_E;
413 arr = float_E = make_array_type(tp);
418 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
419 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
421 static const struct {
423 const char *cnst_str;
425 } names [ia32_known_const_max] = {
426 { "C_sfp_sign", "0x80000000", 0 },
427 { "C_dfp_sign", "0x8000000000000000", 1 },
428 { "C_sfp_abs", "0x7FFFFFFF", 0 },
429 { "C_dfp_abs", "0x7FFFFFFFFFFFFFFF", 1 },
430 { "C_ull_bias", "0x10000000000000000", 2 }
432 static ir_entity *ent_cache[ia32_known_const_max];
434 ir_entity *ent = ent_cache[kct];
437 ir_graph *irg = current_ir_graph;
438 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
439 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
440 const char *cnst_str = names[kct].cnst_str;
441 ident *name = new_id_from_str(names[kct].name);
444 switch (names[kct].mode) {
445 case 0: mode = mode_Iu; break;
446 case 1: mode = mode_Lu; break;
447 case 2: mode = mode_F; break;
448 default: panic("internal compiler error");
450 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
452 if (kct == ia32_ULLBIAS) {
453 ir_type *type = ia32_get_prim_type(mode_F);
454 ir_type *atype = ia32_create_float_array(type);
455 ir_initializer_t *initializer;
457 ent = new_entity(get_glob_type(), name, atype);
459 set_entity_ld_ident(ent, name);
460 set_entity_visibility(ent, ir_visibility_private);
461 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
463 initializer = create_initializer_compound(2);
464 set_initializer_compound_value(initializer, 0,
465 create_initializer_tarval(get_mode_null(mode)));
466 set_initializer_compound_value(initializer, 1,
467 create_initializer_tarval(tv));
468 set_entity_initializer(ent, initializer);
470 ent = ia32_create_float_const_entity(isa, tv, name);
472 /* cache the entry */
473 ent_cache[kct] = ent;
476 return ent_cache[kct];
480 * return true if the node is a Proj(Load) and could be used in source address
481 * mode for another node. Will return only true if the @p other node is not
482 * dependent on the memory of the Load (for binary operations use the other
483 * input here, for unary operations use NULL).
485 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
486 ir_node *other, ir_node *other2,
493 /* float constants are always available */
494 if (is_Const(node)) {
495 mode = get_irn_mode(node);
496 if (mode_is_float(mode)) {
497 ir_tarval *tv = get_Const_tarval(node);
498 if (!tarval_ieee754_can_conv_lossless(tv, mode_D))
500 if (ia32_cg_config.use_sse2) {
501 if (is_simple_sse_Const(node))
504 if (is_simple_x87_Const(node))
507 if (get_irn_n_edges(node) > 1)
516 load = get_Proj_pred(node);
517 pn = get_Proj_proj(node);
518 if (!is_Load(load) || pn != pn_Load_res)
520 if (get_nodes_block(load) != block)
522 mode = get_irn_mode(node);
523 /* we can't fold mode_E AM */
524 if (mode == ia32_mode_E)
526 /* we only use address mode if we're the only user of the load */
527 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
529 /* in some edge cases with address mode we might reach the load normally
530 * and through some AM sequence, if it is already materialized then we
531 * can't create an AM node from it */
532 if (be_is_transformed(node))
535 /* don't do AM if other node inputs depend on the load (via mem-proj) */
536 if (other != NULL && ia32_prevents_AM(block, load, other))
539 if (other2 != NULL && ia32_prevents_AM(block, load, other2))
545 typedef struct ia32_address_mode_t ia32_address_mode_t;
546 struct ia32_address_mode_t {
551 ia32_op_type_t op_type;
555 unsigned commutative : 1;
556 unsigned ins_permuted : 1;
559 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
561 /* construct load address */
562 memset(addr, 0, sizeof(addr[0]));
563 ia32_create_address_mode(addr, ptr, ia32_create_am_normal);
565 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
566 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
567 addr->mem = be_transform_node(mem);
570 static void build_address(ia32_address_mode_t *am, ir_node *node,
571 ia32_create_am_flags_t flags)
573 ia32_address_t *addr = &am->addr;
579 /* floating point immediates */
580 if (is_Const(node)) {
581 ir_graph *irg = get_irn_irg(node);
582 const arch_env_t *arch_env = be_get_irg_arch_env(irg);
583 ia32_isa_t *isa = (ia32_isa_t*) arch_env;
584 ir_tarval *tv = get_Const_tarval(node);
585 ir_entity *entity = ia32_create_float_const_entity(isa, tv, NULL);
586 addr->base = get_symconst_base();
587 addr->index = noreg_GP;
589 addr->symconst_ent = entity;
590 addr->tls_segment = false;
592 am->ls_mode = get_type_mode(get_entity_type(entity));
593 am->pinned = op_pin_state_floats;
597 load = get_Proj_pred(node);
598 ptr = get_Load_ptr(load);
599 mem = get_Load_mem(load);
600 new_mem = be_transform_node(mem);
601 am->pinned = get_irn_pinned(load);
602 am->ls_mode = get_Load_mode(load);
603 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
606 /* construct load address */
607 ia32_create_address_mode(addr, ptr, flags);
609 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
610 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
614 static void set_address(ir_node *node, const ia32_address_t *addr)
616 set_ia32_am_scale(node, addr->scale);
617 set_ia32_am_sc(node, addr->symconst_ent);
618 set_ia32_am_offs_int(node, addr->offset);
619 set_ia32_am_tls_segment(node, addr->tls_segment);
620 if (addr->symconst_sign)
621 set_ia32_am_sc_sign(node);
623 set_ia32_use_frame(node);
624 set_ia32_frame_ent(node, addr->frame_entity);
628 * Apply attributes of a given address mode to a node.
630 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
632 set_address(node, &am->addr);
634 set_ia32_op_type(node, am->op_type);
635 set_ia32_ls_mode(node, am->ls_mode);
636 if (am->pinned == op_pin_state_pinned) {
637 /* beware: some nodes are already pinned and did not allow to change the state */
638 if (get_irn_pinned(node) != op_pin_state_pinned)
639 set_irn_pinned(node, op_pin_state_pinned);
642 set_ia32_commutative(node);
646 * Check, if a given node is a Down-Conv, i.e. a integer Conv
647 * from a mode with a mode with more bits to a mode with lesser bits.
648 * Moreover, we return only true if the node has not more than 1 user.
650 * @param node the node
651 * @return non-zero if node is a Down-Conv
653 static int is_downconv(const ir_node *node)
661 src_mode = get_irn_mode(get_Conv_op(node));
662 dest_mode = get_irn_mode(node);
664 ia32_mode_needs_gp_reg(src_mode) &&
665 ia32_mode_needs_gp_reg(dest_mode) &&
666 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
669 /** Skip all Down-Conv's on a given node and return the resulting node. */
670 ir_node *ia32_skip_downconv(ir_node *node)
672 while (is_downconv(node)) {
673 /* we only want to skip the conv when we're the only user
674 * (because this test is used in the context of address-mode selection
675 * and we don't want to use address mode for multiple users) */
676 if (get_irn_n_edges(node) > 1)
679 node = get_Conv_op(node);
685 static bool is_float_downconv(const ir_node *node)
689 ir_node *pred = get_Conv_op(node);
690 ir_mode *pred_mode = get_irn_mode(pred);
691 ir_mode *mode = get_irn_mode(node);
692 return mode_is_float(pred_mode)
693 && get_mode_size_bits(mode) <= get_mode_size_bits(pred_mode);
696 static ir_node *ia32_skip_float_downconv(ir_node *node)
698 while (is_float_downconv(node)) {
699 node = get_Conv_op(node);
704 static bool is_sameconv(ir_node *node)
712 /* we only want to skip the conv when we're the only user
713 * (because this test is used in the context of address-mode selection
714 * and we don't want to use address mode for multiple users) */
715 if (get_irn_n_edges(node) > 1)
718 src_mode = get_irn_mode(get_Conv_op(node));
719 dest_mode = get_irn_mode(node);
721 ia32_mode_needs_gp_reg(src_mode) &&
722 ia32_mode_needs_gp_reg(dest_mode) &&
723 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
726 /** Skip all signedness convs */
727 static ir_node *ia32_skip_sameconv(ir_node *node)
729 while (is_sameconv(node)) {
730 node = get_Conv_op(node);
736 static ir_node *transform_sext(ir_node *node, ir_node *orig_node)
738 ir_mode *mode = get_irn_mode(node);
739 ir_node *block = get_nodes_block(node);
740 dbg_info *dbgi = get_irn_dbg_info(node);
741 return create_I2I_Conv(mode, mode_Is, dbgi, block, node, orig_node);
744 static ir_node *transform_zext(ir_node *node, ir_node *orig_node)
746 ir_mode *mode = get_irn_mode(node);
747 ir_node *block = get_nodes_block(node);
748 dbg_info *dbgi = get_irn_dbg_info(node);
749 /* normalize to an unsigned mode */
750 switch (get_mode_size_bits(mode)) {
751 case 8: mode = mode_Bu; break;
752 case 16: mode = mode_Hu; break;
754 panic("ia32: invalid mode in zest: %+F", node);
756 return create_I2I_Conv(mode, mode_Iu, dbgi, block, node, orig_node);
759 static ir_node *transform_upconv(ir_node *node, ir_node *orig_node)
761 ir_mode *mode = get_irn_mode(node);
762 if (mode_is_signed(mode)) {
763 return transform_sext(node, orig_node);
765 return transform_zext(node, orig_node);
769 static ir_node *get_noreg(ir_mode *const mode)
771 if (!mode_is_float(mode)) {
773 } else if (ia32_cg_config.use_sse2) {
774 return ia32_new_NoReg_xmm(current_ir_graph);
776 return ia32_new_NoReg_fp(current_ir_graph);
781 * matches operands of a node into ia32 addressing/operand modes. This covers
782 * usage of source address mode, immediates, operations with non 32-bit modes,
784 * The resulting data is filled into the @p am struct. block is the block
785 * of the node whose arguments are matched. op1, op2 are the first and second
786 * input that are matched (op1 may be NULL). other_op is another unrelated
787 * input that is not matched! but which is needed sometimes to check if AM
788 * for op1/op2 is legal.
789 * @p flags describes the supported modes of the operation in detail.
791 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
792 ir_node *op1, ir_node *op2, ir_node *other_op,
795 ia32_address_t *addr = &am->addr;
796 ir_mode *mode = get_irn_mode(op2);
797 int mode_bits = get_mode_size_bits(mode);
798 ir_node *new_op1, *new_op2;
800 unsigned commutative;
801 int use_am_and_immediates;
804 memset(am, 0, sizeof(am[0]));
806 commutative = (flags & match_commutative) != 0;
807 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
808 use_am = (flags & match_am) != 0;
809 use_immediate = (flags & match_immediate) != 0;
810 assert(!use_am_and_immediates || use_immediate);
813 assert(!commutative || op1 != NULL);
814 assert(use_am || !(flags & match_8bit_am));
815 assert(use_am || !(flags & match_16bit_am));
817 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
818 (mode_bits == 16 && !(flags & match_16bit_am))) {
822 /* we can simply skip downconvs for mode neutral nodes: the upper bits
823 * can be random for these operations */
824 if (flags & match_mode_neutral) {
825 op2 = ia32_skip_downconv(op2);
827 op1 = ia32_skip_downconv(op1);
830 op2 = ia32_skip_sameconv(op2);
832 op1 = ia32_skip_sameconv(op1);
836 /* match immediates. firm nodes are normalized: constants are always on the
839 if (!(flags & match_try_am) && use_immediate) {
840 new_op2 = ia32_try_create_Immediate(op2, 'i');
843 if (new_op2 == NULL &&
844 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
845 build_address(am, op2, ia32_create_am_normal);
846 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
847 new_op2 = get_noreg(mode);
848 am->op_type = ia32_AddrModeS;
849 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
851 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
852 build_address(am, op1, ia32_create_am_normal);
854 ir_node *const noreg = get_noreg(mode);
855 if (new_op2 != NULL) {
858 new_op1 = be_transform_node(op2);
860 am->ins_permuted = true;
862 am->op_type = ia32_AddrModeS;
864 am->op_type = ia32_Normal;
866 if (flags & match_try_am) {
872 mode = get_irn_mode(op2);
873 if (get_mode_size_bits(mode) != 32
874 && (flags & (match_mode_neutral | match_upconv | match_zero_ext))) {
875 if (flags & match_upconv) {
876 new_op1 = (op1 == NULL ? NULL : transform_upconv(op1, op1));
878 new_op2 = transform_upconv(op2, op2);
879 } else if (flags & match_zero_ext) {
880 new_op1 = (op1 == NULL ? NULL : transform_zext(op1, op1));
882 new_op2 = transform_zext(op2, op2);
884 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
886 new_op2 = be_transform_node(op2);
887 assert(flags & match_mode_neutral);
891 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
893 new_op2 = be_transform_node(op2);
897 if (addr->base == NULL)
898 addr->base = noreg_GP;
899 if (addr->index == NULL)
900 addr->index = noreg_GP;
901 if (addr->mem == NULL)
904 am->new_op1 = new_op1;
905 am->new_op2 = new_op2;
906 am->commutative = commutative;
910 * "Fixes" a node that uses address mode by turning it into mode_T
911 * and returning a pn_ia32_res Proj.
913 * @param node the node
914 * @param am its address mode
916 * @return a Proj(pn_ia32_res) if a memory address mode is used,
919 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
924 if (am->mem_proj == NULL)
927 /* we have to create a mode_T so the old MemProj can attach to us */
928 mode = get_irn_mode(node);
929 load = get_Proj_pred(am->mem_proj);
931 be_set_transformed_node(load, node);
933 if (mode != mode_T) {
934 set_irn_mode(node, mode_T);
935 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
942 * Construct a standard binary operation, set AM and immediate if required.
944 * @param node The original node for which the binop is created
945 * @param op1 The first operand
946 * @param op2 The second operand
947 * @param func The node constructor function
948 * @return The constructed ia32 node.
950 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
951 construct_binop_func *func, match_flags_t flags)
954 ir_node *block, *new_block, *new_node;
955 ia32_address_mode_t am;
956 ia32_address_t *addr = &am.addr;
958 block = get_nodes_block(node);
959 match_arguments(&am, block, op1, op2, NULL, flags);
961 dbgi = get_irn_dbg_info(node);
962 new_block = be_transform_node(block);
963 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
964 am.new_op1, am.new_op2);
965 set_am_attributes(new_node, &am);
966 /* we can't use source address mode anymore when using immediates */
967 if (!(flags & match_am_and_immediates) &&
968 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
969 set_ia32_am_support(new_node, ia32_am_none);
970 SET_IA32_ORIG_NODE(new_node, node);
972 new_node = fix_mem_proj(new_node, &am);
978 * Generic names for the inputs of an ia32 binary op.
981 n_ia32_l_binop_left, /**< ia32 left input */
982 n_ia32_l_binop_right, /**< ia32 right input */
983 n_ia32_l_binop_eflags /**< ia32 eflags input */
985 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Adc_left, n_Adc_left)
986 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Adc_right, n_Adc_right)
987 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Adc_eflags, n_Adc_eflags)
988 COMPILETIME_ASSERT((int)n_ia32_l_binop_left == (int)n_ia32_l_Sbb_minuend, n_Sbb_minuend)
989 COMPILETIME_ASSERT((int)n_ia32_l_binop_right == (int)n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
990 COMPILETIME_ASSERT((int)n_ia32_l_binop_eflags == (int)n_ia32_l_Sbb_eflags, n_Sbb_eflags)
993 * Construct a binary operation which also consumes the eflags.
995 * @param node The node to transform
996 * @param func The node constructor function
997 * @param flags The match flags
998 * @return The constructor ia32 node
1000 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1001 match_flags_t flags)
1003 ir_node *src_block = get_nodes_block(node);
1004 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1005 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1006 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1008 ir_node *block, *new_node, *new_eflags;
1009 ia32_address_mode_t am;
1010 ia32_address_t *addr = &am.addr;
1012 match_arguments(&am, src_block, op1, op2, eflags, flags);
1014 dbgi = get_irn_dbg_info(node);
1015 block = be_transform_node(src_block);
1016 new_eflags = be_transform_node(eflags);
1017 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1018 am.new_op1, am.new_op2, new_eflags);
1019 set_am_attributes(new_node, &am);
1020 /* we can't use source address mode anymore when using immediates */
1021 if (!(flags & match_am_and_immediates) &&
1022 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1023 set_ia32_am_support(new_node, ia32_am_none);
1024 SET_IA32_ORIG_NODE(new_node, node);
1026 new_node = fix_mem_proj(new_node, &am);
1031 static ir_node *get_fpcw(void)
1033 if (initial_fpcw != NULL)
1034 return initial_fpcw;
1036 initial_fpcw = be_transform_node(old_initial_fpcw);
1037 return initial_fpcw;
1040 static ir_node *skip_float_upconv(ir_node *node)
1042 ir_mode *mode = get_irn_mode(node);
1043 assert(mode_is_float(mode));
1045 while (is_Conv(node)) {
1046 ir_node *pred = get_Conv_op(node);
1047 ir_mode *pred_mode = get_irn_mode(pred);
1050 * suboptimal, but without this check the address mode matcher
1051 * can incorrectly think that something has only 1 user
1053 if (get_irn_n_edges(node) > 1)
1056 if (!mode_is_float(pred_mode)
1057 || get_mode_size_bits(pred_mode) > get_mode_size_bits(mode))
1065 static void check_x87_floatmode(ir_mode *mode)
1067 if (mode != ia32_mode_E) {
1068 panic("ia32: x87 only supports x86 extended float mode");
1073 * Construct a standard binary operation, set AM and immediate if required.
1075 * @param op1 The first operand
1076 * @param op2 The second operand
1077 * @param func The node constructor function
1078 * @return The constructed ia32 node.
1080 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1081 construct_binop_float_func *func)
1087 ia32_address_mode_t am;
1088 ia32_address_t *addr = &am.addr;
1089 ia32_x87_attr_t *attr;
1090 /* All operations are considered commutative, because there are reverse
1092 match_flags_t flags = match_commutative | match_am;
1094 = is_Div(node) ? get_Div_resmode(node) : get_irn_mode(node);
1095 check_x87_floatmode(mode);
1097 op1 = skip_float_upconv(op1);
1098 op2 = skip_float_upconv(op2);
1100 block = get_nodes_block(node);
1101 match_arguments(&am, block, op1, op2, NULL, flags);
1103 dbgi = get_irn_dbg_info(node);
1104 new_block = be_transform_node(block);
1105 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1106 am.new_op1, am.new_op2, get_fpcw());
1107 set_am_attributes(new_node, &am);
1109 attr = get_ia32_x87_attr(new_node);
1110 attr->attr.data.ins_permuted = am.ins_permuted;
1112 SET_IA32_ORIG_NODE(new_node, node);
1114 new_node = fix_mem_proj(new_node, &am);
1120 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1122 * @param op1 The first operand
1123 * @param op2 The second operand
1124 * @param func The node constructor function
1125 * @return The constructed ia32 node.
1127 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1128 construct_shift_func *func,
1129 match_flags_t flags)
1131 ir_mode *mode = get_irn_mode(node);
1133 assert(! mode_is_float(mode));
1134 assert(flags & match_immediate);
1135 assert((flags & ~(match_mode_neutral | match_zero_ext | match_upconv | match_immediate)) == 0);
1137 if (get_mode_modulo_shift(mode) != 32) {
1138 /* TODO: implement special cases for non-modulo shifts */
1139 panic("modulo shift!=32 not supported by ia32 backend");
1144 if (flags & match_mode_neutral) {
1145 op1 = ia32_skip_downconv(op1);
1146 new_op1 = be_transform_node(op1);
1148 op1 = ia32_skip_sameconv(op1);
1149 if (get_mode_size_bits(mode) != 32) {
1150 if (flags & match_upconv) {
1151 new_op1 = transform_upconv(op1, node);
1152 } else if (flags & match_zero_ext) {
1153 new_op1 = transform_zext(op1, node);
1155 /* match_mode_neutral not handled here because it makes no
1156 * sense for shift operations */
1157 panic("ia32 code selection failed for %+F", node);
1160 new_op1 = be_transform_node(op1);
1164 /* the shift amount can be any mode that is bigger than 5 bits, since all
1165 * other bits are ignored anyway */
1166 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1167 ir_node *const op = get_Conv_op(op2);
1168 if (mode_is_float(get_irn_mode(op)))
1171 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1173 new_op2 = create_immediate_or_transform(op2);
1175 dbg_info *dbgi = get_irn_dbg_info(node);
1176 ir_node *block = get_nodes_block(node);
1177 ir_node *new_block = be_transform_node(block);
1178 ir_node *new_node = func(dbgi, new_block, new_op1, new_op2);
1179 SET_IA32_ORIG_NODE(new_node, node);
1181 /* lowered shift instruction may have a dependency operand, handle it here */
1182 if (get_irn_arity(node) == 3) {
1183 /* we have a dependency */
1184 ir_node* dep = get_irn_n(node, 2);
1185 if (get_irn_n_edges(dep) > 1) {
1186 /* ... which has at least one user other than 'node' */
1187 ir_node *new_dep = be_transform_node(dep);
1188 add_irn_dep(new_node, new_dep);
1197 * Construct a standard unary operation, set AM and immediate if required.
1199 * @param op The operand
1200 * @param func The node constructor function
1201 * @return The constructed ia32 node.
1203 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1204 match_flags_t flags)
1207 ir_node *block, *new_block, *new_op, *new_node;
1209 assert(flags == 0 || flags == match_mode_neutral);
1210 if (flags & match_mode_neutral) {
1211 op = ia32_skip_downconv(op);
1214 new_op = be_transform_node(op);
1215 dbgi = get_irn_dbg_info(node);
1216 block = get_nodes_block(node);
1217 new_block = be_transform_node(block);
1218 new_node = func(dbgi, new_block, new_op);
1220 SET_IA32_ORIG_NODE(new_node, node);
1225 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1226 ia32_address_t *addr)
1236 base = be_transform_node(base);
1243 idx = be_transform_node(idx);
1246 /* segment overrides are ineffective for Leas :-( so we have to patch
1248 if (addr->tls_segment) {
1249 ir_node *tls_base = new_bd_ia32_LdTls(NULL, block);
1250 assert(addr->symconst_ent != NULL);
1251 if (base == noreg_GP)
1254 base = new_bd_ia32_Lea(dbgi, block, tls_base, base);
1255 addr->tls_segment = false;
1258 res = new_bd_ia32_Lea(dbgi, block, base, idx);
1259 set_address(res, addr);
1265 * Returns non-zero if a given address mode has a symbolic or
1266 * numerical offset != 0.
1268 static int am_has_immediates(const ia32_address_t *addr)
1270 return addr->offset != 0 || addr->symconst_ent != NULL
1271 || addr->frame_entity || addr->use_frame;
1274 typedef ir_node* (*new_shiftd_func)(dbg_info *dbgi, ir_node *block,
1275 ir_node *high, ir_node *low,
1279 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
1280 * op1 - target to be shifted
1281 * op2 - contains bits to be shifted into target
1283 * Only op3 can be an immediate.
1285 static ir_node *gen_64bit_shifts(dbg_info *dbgi, ir_node *block,
1286 ir_node *high, ir_node *low, ir_node *count,
1287 new_shiftd_func func)
1289 ir_node *new_block = be_transform_node(block);
1290 ir_node *new_high = be_transform_node(high);
1291 ir_node *new_low = be_transform_node(low);
1295 /* the shift amount can be any mode that is bigger than 5 bits, since all
1296 * other bits are ignored anyway */
1297 while (is_Conv(count) &&
1298 get_irn_n_edges(count) == 1 &&
1299 mode_is_int(get_irn_mode(count))) {
1300 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
1301 count = get_Conv_op(count);
1303 new_count = create_immediate_or_transform(count);
1305 new_node = func(dbgi, new_block, new_high, new_low, new_count);
1310 * Tests whether 2 values result in 'x' and '32-x' when interpreted as a shift
1313 static bool is_complementary_shifts(ir_node *value1, ir_node *value2)
1315 if (is_Const(value1) && is_Const(value2)) {
1316 ir_tarval *tv1 = get_Const_tarval(value1);
1317 ir_tarval *tv2 = get_Const_tarval(value2);
1318 if (tarval_is_long(tv1) && tarval_is_long(tv2)) {
1319 long v1 = get_tarval_long(tv1);
1320 long v2 = get_tarval_long(tv2);
1321 return v1 <= v2 && v2 == 32-v1;
1327 static ir_node *match_64bit_shift(ir_node *node)
1329 ir_node *op1 = get_binop_left(node);
1330 ir_node *op2 = get_binop_right(node);
1331 assert(is_Or(node) || is_Add(node));
1339 /* match ShlD operation */
1340 if (is_Shl(op1) && is_Shr(op2)) {
1341 ir_node *shl_right = get_Shl_right(op1);
1342 ir_node *shl_left = get_Shl_left(op1);
1343 ir_node *shr_right = get_Shr_right(op2);
1344 ir_node *shr_left = get_Shr_left(op2);
1345 /* constant ShlD operation */
1346 if (is_complementary_shifts(shl_right, shr_right)) {
1347 dbg_info *dbgi = get_irn_dbg_info(node);
1348 ir_node *block = get_nodes_block(node);
1349 return gen_64bit_shifts(dbgi, block, shl_left, shr_left, shl_right,
1352 /* constant ShrD operation */
1353 if (is_complementary_shifts(shr_right, shl_right)) {
1354 dbg_info *dbgi = get_irn_dbg_info(node);
1355 ir_node *block = get_nodes_block(node);
1356 return gen_64bit_shifts(dbgi, block, shr_left, shl_left, shr_right,
1359 /* lower_dw produces the following for ShlD:
1360 * Or(Shr(Shr(high,1),Not(c)),Shl(low,c)) */
1361 if (is_Shr(shr_left) && is_Not(shr_right)
1362 && is_Const_1(get_Shr_right(shr_left))
1363 && get_Not_op(shr_right) == shl_right) {
1364 dbg_info *dbgi = get_irn_dbg_info(node);
1365 ir_node *block = get_nodes_block(node);
1366 ir_node *val_h = get_Shr_left(shr_left);
1367 return gen_64bit_shifts(dbgi, block, shl_left, val_h, shl_right,
1370 /* lower_dw produces the following for ShrD:
1371 * Or(Shl(Shl(high,1),Not(c)), Shr(low,c)) */
1372 if (is_Shl(shl_left) && is_Not(shl_right)
1373 && is_Const_1(get_Shl_right(shl_left))
1374 && get_Not_op(shl_right) == shr_right) {
1375 dbg_info *dbgi = get_irn_dbg_info(node);
1376 ir_node *block = get_nodes_block(node);
1377 ir_node *val_h = get_Shl_left(shl_left);
1378 return gen_64bit_shifts(dbgi, block, shr_left, val_h, shr_right,
1387 * Creates an ia32 Add.
1389 * @return the created ia32 Add node
1391 static ir_node *gen_Add(ir_node *node)
1393 ir_mode *mode = get_irn_mode(node);
1394 ir_node *op1 = get_Add_left(node);
1395 ir_node *op2 = get_Add_right(node);
1397 ir_node *block, *new_block, *new_node, *add_immediate_op;
1398 ia32_address_t addr;
1399 ia32_address_mode_t am;
1401 new_node = match_64bit_shift(node);
1402 if (new_node != NULL)
1405 if (mode_is_float(mode)) {
1406 if (ia32_cg_config.use_sse2)
1407 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1408 match_commutative | match_am);
1410 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fadd);
1413 ia32_mark_non_am(node);
1417 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1418 * 1. Add with immediate -> Lea
1419 * 2. Add with possible source address mode -> Add
1420 * 3. Otherwise -> Lea
1422 memset(&addr, 0, sizeof(addr));
1423 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1424 add_immediate_op = NULL;
1426 dbgi = get_irn_dbg_info(node);
1427 block = get_nodes_block(node);
1428 new_block = be_transform_node(block);
1431 if (addr.base == NULL && addr.index == NULL) {
1432 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1433 addr.symconst_sign, 0, addr.offset);
1434 SET_IA32_ORIG_NODE(new_node, node);
1437 /* add with immediate? */
1438 if (addr.index == NULL) {
1439 add_immediate_op = addr.base;
1440 } else if (addr.base == NULL && addr.scale == 0) {
1441 add_immediate_op = addr.index;
1444 if (add_immediate_op != NULL) {
1445 if (!am_has_immediates(&addr)) {
1446 #ifdef DEBUG_libfirm
1447 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1450 return be_transform_node(add_immediate_op);
1453 new_node = create_lea_from_address(dbgi, new_block, &addr);
1454 SET_IA32_ORIG_NODE(new_node, node);
1458 /* test if we can use source address mode */
1459 match_arguments(&am, block, op1, op2, NULL, match_commutative
1460 | match_mode_neutral | match_am | match_immediate | match_try_am);
1462 /* construct an Add with source address mode */
1463 if (am.op_type == ia32_AddrModeS) {
1464 ia32_address_t *am_addr = &am.addr;
1465 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1466 am_addr->index, am_addr->mem, am.new_op1,
1468 set_am_attributes(new_node, &am);
1469 SET_IA32_ORIG_NODE(new_node, node);
1471 new_node = fix_mem_proj(new_node, &am);
1476 /* otherwise construct a lea */
1477 new_node = create_lea_from_address(dbgi, new_block, &addr);
1478 SET_IA32_ORIG_NODE(new_node, node);
1483 * Creates an ia32 Mul.
1485 * @return the created ia32 Mul node
1487 static ir_node *gen_Mul(ir_node *node)
1489 ir_node *op1 = get_Mul_left(node);
1490 ir_node *op2 = get_Mul_right(node);
1491 ir_mode *mode = get_irn_mode(node);
1493 if (mode_is_float(mode)) {
1494 if (ia32_cg_config.use_sse2)
1495 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1496 match_commutative | match_am);
1498 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fmul);
1500 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1501 match_commutative | match_am | match_mode_neutral |
1502 match_immediate | match_am_and_immediates);
1506 * Creates an ia32 Mulh.
1507 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1508 * this result while Mul returns the lower 32 bit.
1510 * @return the created ia32 Mulh node
1512 static ir_node *gen_Mulh(ir_node *node)
1514 dbg_info *dbgi = get_irn_dbg_info(node);
1515 ir_node *op1 = get_Mulh_left(node);
1516 ir_node *op2 = get_Mulh_right(node);
1517 ir_mode *mode = get_irn_mode(node);
1519 ir_node *proj_res_high;
1521 if (get_mode_size_bits(mode) != 32) {
1522 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1525 if (mode_is_signed(mode)) {
1526 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1527 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1529 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1530 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1532 return proj_res_high;
1536 * Creates an ia32 And.
1538 * @return The created ia32 And node
1540 static ir_node *gen_And(ir_node *node)
1542 ir_node *op1 = get_And_left(node);
1543 ir_node *op2 = get_And_right(node);
1544 assert(! mode_is_float(get_irn_mode(node)));
1546 /* is it a zero extension? */
1547 if (is_Const(op2)) {
1548 ir_tarval *tv = get_Const_tarval(op2);
1549 long v = get_tarval_long(tv);
1551 if (v == 0xFF || v == 0xFFFF) {
1552 dbg_info *dbgi = get_irn_dbg_info(node);
1553 ir_node *block = get_nodes_block(node);
1560 assert(v == 0xFFFF);
1563 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1568 return gen_binop(node, op1, op2, new_bd_ia32_And,
1569 match_commutative | match_mode_neutral | match_am | match_immediate);
1573 * Creates an ia32 Or.
1575 * @return The created ia32 Or node
1577 static ir_node *gen_Or(ir_node *node)
1579 ir_node *op1 = get_Or_left(node);
1580 ir_node *op2 = get_Or_right(node);
1583 res = match_64bit_shift(node);
1587 assert (! mode_is_float(get_irn_mode(node)));
1588 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1589 | match_mode_neutral | match_am | match_immediate);
1595 * Creates an ia32 Eor.
1597 * @return The created ia32 Eor node
1599 static ir_node *gen_Eor(ir_node *node)
1601 ir_node *op1 = get_Eor_left(node);
1602 ir_node *op2 = get_Eor_right(node);
1604 assert(! mode_is_float(get_irn_mode(node)));
1605 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1606 | match_mode_neutral | match_am | match_immediate);
1611 * Creates an ia32 Sub.
1613 * @return The created ia32 Sub node
1615 static ir_node *gen_Sub(ir_node *node)
1617 ir_node *op1 = get_Sub_left(node);
1618 ir_node *op2 = get_Sub_right(node);
1619 ir_mode *mode = get_irn_mode(node);
1621 if (mode_is_float(mode)) {
1622 if (ia32_cg_config.use_sse2)
1623 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1625 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fsub);
1628 if (is_Const(op2)) {
1629 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1633 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1634 | match_am | match_immediate);
1637 static ir_node *transform_AM_mem(ir_node *const block,
1638 ir_node *const src_val,
1639 ir_node *const src_mem,
1640 ir_node *const am_mem)
1642 if (is_NoMem(am_mem)) {
1643 return be_transform_node(src_mem);
1644 } else if (is_Proj(src_val) &&
1646 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1647 /* avoid memory loop */
1649 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1650 ir_node *const ptr_pred = get_Proj_pred(src_val);
1651 int const arity = get_Sync_n_preds(src_mem);
1656 NEW_ARR_A(ir_node*, ins, arity + 1);
1658 /* NOTE: This sometimes produces dead-code because the old sync in
1659 * src_mem might not be used anymore, we should detect this case
1660 * and kill the sync... */
1661 for (i = arity - 1; i >= 0; --i) {
1662 ir_node *const pred = get_Sync_pred(src_mem, i);
1664 /* avoid memory loop */
1665 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1668 ins[n++] = be_transform_node(pred);
1671 if (n==1 && ins[0] == am_mem) {
1673 /* creating a new Sync and relying on CSE may fail,
1674 * if am_mem is a ProjM, which does not yet verify. */
1678 return new_r_Sync(block, n, ins);
1682 ins[0] = be_transform_node(src_mem);
1684 return new_r_Sync(block, 2, ins);
1689 * Create a 32bit to 64bit signed extension.
1691 * @param dbgi debug info
1692 * @param block the block where node nodes should be placed
1693 * @param val the value to extend
1694 * @param orig the original node
1696 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1697 ir_node *val, const ir_node *orig)
1702 if (ia32_cg_config.use_short_sex_eax) {
1703 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1704 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1706 ir_graph *const irg = get_Block_irg(block);
1707 ir_node *const imm31 = ia32_create_Immediate(irg, NULL, 0, 31);
1708 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1710 SET_IA32_ORIG_NODE(res, orig);
1715 * Generates an ia32 Div with additional infrastructure for the
1716 * register allocator if needed.
1718 static ir_node *create_Div(ir_node *node)
1720 dbg_info *dbgi = get_irn_dbg_info(node);
1721 ir_node *block = get_nodes_block(node);
1722 ir_node *new_block = be_transform_node(block);
1723 int throws_exception = ir_throws_exception(node);
1730 ir_node *sign_extension;
1731 ia32_address_mode_t am;
1732 ia32_address_t *addr = &am.addr;
1734 /* the upper bits have random contents for smaller modes */
1735 switch (get_irn_opcode(node)) {
1737 op1 = get_Div_left(node);
1738 op2 = get_Div_right(node);
1739 mem = get_Div_mem(node);
1740 mode = get_Div_resmode(node);
1743 op1 = get_Mod_left(node);
1744 op2 = get_Mod_right(node);
1745 mem = get_Mod_mem(node);
1746 mode = get_Mod_resmode(node);
1749 panic("invalid divmod node %+F", node);
1752 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv);
1754 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1755 is the memory of the consumed address. We can have only the second op as address
1756 in Div nodes, so check only op2. */
1757 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1759 if (mode_is_signed(mode)) {
1760 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1761 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1762 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1764 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1766 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1767 addr->index, new_mem, am.new_op2,
1768 am.new_op1, sign_extension);
1770 ir_set_throws_exception(new_node, throws_exception);
1772 set_irn_pinned(new_node, get_irn_pinned(node));
1774 set_am_attributes(new_node, &am);
1775 SET_IA32_ORIG_NODE(new_node, node);
1777 new_node = fix_mem_proj(new_node, &am);
1783 * Generates an ia32 Mod.
1785 static ir_node *gen_Mod(ir_node *node)
1787 return create_Div(node);
1791 * Generates an ia32 Div.
1793 static ir_node *gen_Div(ir_node *node)
1795 ir_mode *mode = get_Div_resmode(node);
1796 if (mode_is_float(mode)) {
1797 ir_node *op1 = get_Div_left(node);
1798 ir_node *op2 = get_Div_right(node);
1800 if (ia32_cg_config.use_sse2) {
1801 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1803 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_fdiv);
1807 return create_Div(node);
1811 * Creates an ia32 Shl.
1813 * @return The created ia32 Shl node
1815 static ir_node *gen_Shl(ir_node *node)
1817 ir_node *left = get_Shl_left(node);
1818 ir_node *right = get_Shl_right(node);
1820 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1821 match_mode_neutral | match_immediate);
1825 * Creates an ia32 Shr.
1827 * @return The created ia32 Shr node
1829 static ir_node *gen_Shr(ir_node *node)
1831 ir_node *left = get_Shr_left(node);
1832 ir_node *right = get_Shr_right(node);
1834 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
1835 match_immediate | match_zero_ext);
1839 * Creates an ia32 Sar.
1841 * @return The created ia32 Shrs node
1843 static ir_node *gen_Shrs(ir_node *node)
1845 ir_node *left = get_Shrs_left(node);
1846 ir_node *right = get_Shrs_right(node);
1848 if (is_Const(right)) {
1849 ir_tarval *tv = get_Const_tarval(right);
1850 long val = get_tarval_long(tv);
1852 /* this is a sign extension */
1853 dbg_info *dbgi = get_irn_dbg_info(node);
1854 ir_node *block = be_transform_node(get_nodes_block(node));
1855 ir_node *new_op = be_transform_node(left);
1857 return create_sex_32_64(dbgi, block, new_op, node);
1861 /* 8 or 16 bit sign extension? */
1862 if (is_Const(right) && is_Shl(left)) {
1863 ir_node *shl_left = get_Shl_left(left);
1864 ir_node *shl_right = get_Shl_right(left);
1865 if (is_Const(shl_right)) {
1866 ir_tarval *tv1 = get_Const_tarval(right);
1867 ir_tarval *tv2 = get_Const_tarval(shl_right);
1868 if (tv1 == tv2 && tarval_is_long(tv1)) {
1869 long val = get_tarval_long(tv1);
1870 if (val == 16 || val == 24) {
1871 dbg_info *dbgi = get_irn_dbg_info(node);
1872 ir_node *block = get_nodes_block(node);
1882 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1891 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
1892 match_immediate | match_upconv);
1898 * Creates an ia32 Rol.
1900 * @param op1 The first operator
1901 * @param op2 The second operator
1902 * @return The created ia32 RotL node
1904 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1906 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1912 * Creates an ia32 Ror.
1913 * NOTE: There is no RotR with immediate because this would always be a RotL
1914 * "imm-mode_size_bits" which can be pre-calculated.
1916 * @param op1 The first operator
1917 * @param op2 The second operator
1918 * @return The created ia32 RotR node
1920 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1922 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1928 * Creates an ia32 RotR or RotL (depending on the found pattern).
1930 * @return The created ia32 RotL or RotR node
1932 static ir_node *gen_Rotl(ir_node *node)
1934 ir_node *op1 = get_Rotl_left(node);
1935 ir_node *op2 = get_Rotl_right(node);
1937 if (is_Minus(op2)) {
1938 return gen_Ror(node, op1, get_Minus_op(op2));
1941 return gen_Rol(node, op1, op2);
1947 * Transforms a Minus node.
1949 * @return The created ia32 Minus node
1951 static ir_node *gen_Minus(ir_node *node)
1953 ir_node *op = get_Minus_op(node);
1954 ir_node *block = be_transform_node(get_nodes_block(node));
1955 dbg_info *dbgi = get_irn_dbg_info(node);
1956 ir_mode *mode = get_irn_mode(node);
1961 if (mode_is_float(mode)) {
1962 ir_node *new_op = be_transform_node(op);
1963 if (ia32_cg_config.use_sse2) {
1964 /* TODO: non-optimal... if we have many xXors, then we should
1965 * rather create a load for the const and use that instead of
1966 * several AM nodes... */
1967 ir_node *noreg_xmm = ia32_new_NoReg_xmm(current_ir_graph);
1969 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1970 noreg_GP, nomem, new_op, noreg_xmm);
1972 size = get_mode_size_bits(mode);
1973 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1975 set_ia32_am_sc(new_node, ent);
1976 set_ia32_op_type(new_node, ia32_AddrModeS);
1977 set_ia32_ls_mode(new_node, mode);
1979 new_node = new_bd_ia32_fchs(dbgi, block, new_op);
1982 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1985 SET_IA32_ORIG_NODE(new_node, node);
1991 * Transforms a Not node.
1993 * @return The created ia32 Not node
1995 static ir_node *gen_Not(ir_node *node)
1997 ir_node *op = get_Not_op(node);
1999 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
2000 assert(!mode_is_float(get_irn_mode(node)));
2002 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
2005 static ir_node *create_float_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
2006 bool negate, ir_node *node)
2008 ir_node *new_block = be_transform_node(block);
2009 ir_mode *mode = get_irn_mode(op);
2010 ir_node *new_op = be_transform_node(op);
2015 assert(mode_is_float(mode));
2017 if (ia32_cg_config.use_sse2) {
2018 ir_node *noreg_fp = ia32_new_NoReg_xmm(current_ir_graph);
2019 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
2020 noreg_GP, nomem, new_op, noreg_fp);
2022 size = get_mode_size_bits(mode);
2023 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
2025 set_ia32_am_sc(new_node, ent);
2027 SET_IA32_ORIG_NODE(new_node, node);
2029 set_ia32_op_type(new_node, ia32_AddrModeS);
2030 set_ia32_ls_mode(new_node, mode);
2032 /* TODO, implement -Abs case */
2035 check_x87_floatmode(mode);
2036 new_node = new_bd_ia32_fabs(dbgi, new_block, new_op);
2037 SET_IA32_ORIG_NODE(new_node, node);
2039 new_node = new_bd_ia32_fchs(dbgi, new_block, new_node);
2040 SET_IA32_ORIG_NODE(new_node, node);
2048 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
2050 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
2052 dbg_info *dbgi = get_irn_dbg_info(cmp);
2053 ir_node *block = get_nodes_block(cmp);
2054 ir_node *new_block = be_transform_node(block);
2055 ir_node *op1 = be_transform_node(x);
2056 ir_node *op2 = be_transform_node(n);
2058 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
2061 static ia32_condition_code_t relation_to_condition_code(ir_relation relation,
2063 bool overflow_possible)
2065 if (mode_is_float(mode)) {
2067 case ir_relation_equal: return ia32_cc_float_equal;
2068 case ir_relation_less: return ia32_cc_float_below;
2069 case ir_relation_less_equal: return ia32_cc_float_below_equal;
2070 case ir_relation_greater: return ia32_cc_float_above;
2071 case ir_relation_greater_equal: return ia32_cc_float_above_equal;
2072 case ir_relation_less_greater: return ia32_cc_not_equal;
2073 case ir_relation_less_equal_greater: return ia32_cc_not_parity;
2074 case ir_relation_unordered: return ia32_cc_parity;
2075 case ir_relation_unordered_equal: return ia32_cc_equal;
2076 case ir_relation_unordered_less: return ia32_cc_float_unordered_below;
2077 case ir_relation_unordered_less_equal:
2078 return ia32_cc_float_unordered_below_equal;
2079 case ir_relation_unordered_greater:
2080 return ia32_cc_float_unordered_above;
2081 case ir_relation_unordered_greater_equal:
2082 return ia32_cc_float_unordered_above_equal;
2083 case ir_relation_unordered_less_greater:
2084 return ia32_cc_float_not_equal;
2085 case ir_relation_false:
2086 case ir_relation_true:
2087 /* should we introduce a jump always/jump never? */
2090 panic("Unexpected float pnc");
2091 } else if (mode_is_signed(mode)) {
2093 case ir_relation_unordered_equal:
2094 case ir_relation_equal: return ia32_cc_equal;
2095 case ir_relation_unordered_less:
2096 case ir_relation_less:
2097 return overflow_possible ? ia32_cc_less : ia32_cc_sign;
2098 case ir_relation_unordered_less_equal:
2099 case ir_relation_less_equal: return ia32_cc_less_equal;
2100 case ir_relation_unordered_greater:
2101 case ir_relation_greater: return ia32_cc_greater;
2102 case ir_relation_unordered_greater_equal:
2103 case ir_relation_greater_equal:
2104 return overflow_possible ? ia32_cc_greater_equal : ia32_cc_not_sign;
2105 case ir_relation_unordered_less_greater:
2106 case ir_relation_less_greater: return ia32_cc_not_equal;
2107 case ir_relation_less_equal_greater:
2108 case ir_relation_unordered:
2109 case ir_relation_false:
2110 case ir_relation_true:
2111 /* introduce jump always/jump never? */
2114 panic("Unexpected pnc");
2117 case ir_relation_unordered_equal:
2118 case ir_relation_equal: return ia32_cc_equal;
2119 case ir_relation_unordered_less:
2120 case ir_relation_less: return ia32_cc_below;
2121 case ir_relation_unordered_less_equal:
2122 case ir_relation_less_equal: return ia32_cc_below_equal;
2123 case ir_relation_unordered_greater:
2124 case ir_relation_greater: return ia32_cc_above;
2125 case ir_relation_unordered_greater_equal:
2126 case ir_relation_greater_equal: return ia32_cc_above_equal;
2127 case ir_relation_unordered_less_greater:
2128 case ir_relation_less_greater: return ia32_cc_not_equal;
2129 case ir_relation_less_equal_greater:
2130 case ir_relation_unordered:
2131 case ir_relation_false:
2132 case ir_relation_true:
2133 /* introduce jump always/jump never? */
2136 panic("Unexpected pnc");
2140 static ir_node *get_flags_node(ir_node *cmp, ia32_condition_code_t *cc_out)
2142 /* must have a Cmp as input */
2143 ir_relation relation = get_Cmp_relation(cmp);
2144 ir_node *l = get_Cmp_left(cmp);
2145 ir_node *r = get_Cmp_right(cmp);
2146 ir_mode *mode = get_irn_mode(l);
2147 bool overflow_possible;
2150 /* check for bit-test */
2151 if (ia32_cg_config.use_bt
2152 && (relation == ir_relation_equal
2153 || (mode_is_signed(mode) && relation == ir_relation_less_greater)
2154 || (!mode_is_signed(mode) && ((relation & ir_relation_greater_equal) == ir_relation_greater)))
2156 ir_node *la = get_And_left(l);
2157 ir_node *ra = get_And_right(l);
2164 ir_node *c = get_Shl_left(la);
2165 if (is_Const_1(c) && is_Const_0(r)) {
2166 /* (1 << n) & ra) */
2167 ir_node *n = get_Shl_right(la);
2168 flags = gen_bt(cmp, ra, n);
2169 /* the bit is copied into the CF flag */
2170 if (relation & ir_relation_equal)
2171 *cc_out = ia32_cc_above_equal; /* test for CF=0 */
2173 *cc_out = ia32_cc_below; /* test for CF=1 */
2179 /* the middle-end tries to eliminate impossible relations, so a ptr <> 0
2180 * test becomes ptr > 0. But for x86 an equal comparison is preferable to
2181 * a >0 (we can sometimes eliminate the cmp in favor of flags produced by
2182 * a predecessor node). So add the < bit.
2183 * (Note that we do not want to produce <=> (which can happen for
2184 * unoptimized code), because no x86 flag can represent that */
2185 if (!(relation & ir_relation_equal) && relation & ir_relation_less_greater)
2186 relation |= get_negated_relation(ir_get_possible_cmp_relations(l, r)) & ir_relation_less_greater;
2188 overflow_possible = true;
2189 if (is_Const(r) && is_Const_null(r))
2190 overflow_possible = false;
2192 /* just do a normal transformation of the Cmp */
2193 *cc_out = relation_to_condition_code(relation, mode, overflow_possible);
2194 flags = be_transform_node(cmp);
2199 * Transforms a Load.
2201 * @return the created ia32 Load node
2203 static ir_node *gen_Load(ir_node *node)
2205 ir_node *old_block = get_nodes_block(node);
2206 ir_node *block = be_transform_node(old_block);
2207 ir_node *ptr = get_Load_ptr(node);
2208 ir_node *mem = get_Load_mem(node);
2209 ir_node *new_mem = be_transform_node(mem);
2210 dbg_info *dbgi = get_irn_dbg_info(node);
2211 ir_mode *mode = get_Load_mode(node);
2212 int throws_exception = ir_throws_exception(node);
2216 ia32_address_t addr;
2218 /* construct load address */
2219 memset(&addr, 0, sizeof(addr));
2220 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2227 base = be_transform_node(base);
2233 idx = be_transform_node(idx);
2236 if (mode_is_float(mode)) {
2237 if (ia32_cg_config.use_sse2) {
2238 new_node = new_bd_ia32_xLoad(dbgi, block, base, idx, new_mem,
2241 new_node = new_bd_ia32_fld(dbgi, block, base, idx, new_mem,
2245 assert(mode != mode_b);
2247 /* create a conv node with address mode for smaller modes */
2248 if (get_mode_size_bits(mode) < 32) {
2249 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, idx,
2250 new_mem, noreg_GP, mode);
2252 new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
2255 ir_set_throws_exception(new_node, throws_exception);
2257 set_irn_pinned(new_node, get_irn_pinned(node));
2258 set_ia32_op_type(new_node, ia32_AddrModeS);
2259 set_ia32_ls_mode(new_node, mode);
2260 set_address(new_node, &addr);
2262 if (get_irn_pinned(node) == op_pin_state_floats) {
2263 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
2264 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
2265 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
2266 arch_add_irn_flags(new_node, arch_irn_flags_rematerializable);
2269 SET_IA32_ORIG_NODE(new_node, node);
2274 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2275 ir_node *ptr, ir_node *other)
2282 /* we only use address mode if we're the only user of the load */
2283 if (get_irn_n_edges(node) > 1)
2286 load = get_Proj_pred(node);
2289 if (get_nodes_block(load) != block)
2292 /* store should have the same pointer as the load */
2293 if (get_Load_ptr(load) != ptr)
2296 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2297 if (other != NULL &&
2298 get_nodes_block(other) == block &&
2299 heights_reachable_in_block(ia32_heights, other, load)) {
2303 if (ia32_prevents_AM(block, load, mem))
2305 /* Store should be attached to the load via mem */
2306 assert(heights_reachable_in_block(ia32_heights, mem, load));
2311 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2312 ir_node *mem, ir_node *ptr, ir_mode *mode,
2313 construct_binop_dest_func *func,
2314 construct_binop_dest_func *func8bit,
2315 match_flags_t flags)
2317 ir_node *src_block = get_nodes_block(node);
2325 ia32_address_mode_t am;
2326 ia32_address_t *addr = &am.addr;
2327 memset(&am, 0, sizeof(am));
2329 assert(flags & match_immediate); /* there is no destam node without... */
2330 commutative = (flags & match_commutative) != 0;
2332 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2333 build_address(&am, op1, ia32_create_am_double_use);
2334 new_op = create_immediate_or_transform(op2);
2335 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2336 build_address(&am, op2, ia32_create_am_double_use);
2337 new_op = create_immediate_or_transform(op1);
2342 if (addr->base == NULL)
2343 addr->base = noreg_GP;
2344 if (addr->index == NULL)
2345 addr->index = noreg_GP;
2346 if (addr->mem == NULL)
2349 dbgi = get_irn_dbg_info(node);
2350 block = be_transform_node(src_block);
2351 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2353 if (get_mode_size_bits(mode) == 8) {
2354 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2356 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2358 set_address(new_node, addr);
2359 set_ia32_op_type(new_node, ia32_AddrModeD);
2360 set_ia32_ls_mode(new_node, mode);
2361 SET_IA32_ORIG_NODE(new_node, node);
2363 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2364 mem_proj = be_transform_node(am.mem_proj);
2365 be_set_transformed_node(am.mem_proj, new_node);
2366 be_set_transformed_node(mem_proj, new_node);
2371 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2372 ir_node *ptr, ir_mode *mode,
2373 construct_unop_dest_func *func)
2375 ir_node *src_block = get_nodes_block(node);
2381 ia32_address_mode_t am;
2382 ia32_address_t *addr = &am.addr;
2384 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2387 memset(&am, 0, sizeof(am));
2388 build_address(&am, op, ia32_create_am_double_use);
2390 dbgi = get_irn_dbg_info(node);
2391 block = be_transform_node(src_block);
2392 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2393 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2394 set_address(new_node, addr);
2395 set_ia32_op_type(new_node, ia32_AddrModeD);
2396 set_ia32_ls_mode(new_node, mode);
2397 SET_IA32_ORIG_NODE(new_node, node);
2399 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2400 mem_proj = be_transform_node(am.mem_proj);
2401 be_set_transformed_node(am.mem_proj, new_node);
2402 be_set_transformed_node(mem_proj, new_node);
2407 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2409 ir_mode *mode = get_irn_mode(node);
2410 ir_node *mux_true = get_Mux_true(node);
2411 ir_node *mux_false = get_Mux_false(node);
2419 ia32_condition_code_t cc;
2420 ia32_address_t addr;
2422 if (get_mode_size_bits(mode) != 8)
2425 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2427 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2433 cond = get_Mux_sel(node);
2434 flags = get_flags_node(cond, &cc);
2435 /* we can't handle the float special cases with SetM */
2436 if (cc & ia32_cc_additional_float_cases)
2439 cc = ia32_negate_condition_code(cc);
2441 build_address_ptr(&addr, ptr, mem);
2443 dbgi = get_irn_dbg_info(node);
2444 block = get_nodes_block(node);
2445 new_block = be_transform_node(block);
2446 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2447 addr.index, addr.mem, flags, cc);
2448 set_address(new_node, &addr);
2449 set_ia32_op_type(new_node, ia32_AddrModeD);
2450 set_ia32_ls_mode(new_node, mode);
2451 SET_IA32_ORIG_NODE(new_node, node);
2456 static ir_node *try_create_dest_am(ir_node *node)
2458 ir_node *val = get_Store_value(node);
2459 ir_node *mem = get_Store_mem(node);
2460 ir_node *ptr = get_Store_ptr(node);
2461 ir_mode *mode = get_irn_mode(val);
2462 unsigned bits = get_mode_size_bits(mode);
2467 /* handle only GP modes for now... */
2468 if (!ia32_mode_needs_gp_reg(mode))
2472 /* store must be the only user of the val node */
2473 if (get_irn_n_edges(val) > 1)
2475 /* skip pointless convs */
2477 ir_node *conv_op = get_Conv_op(val);
2478 ir_mode *pred_mode = get_irn_mode(conv_op);
2479 if (!ia32_mode_needs_gp_reg(pred_mode))
2481 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2489 /* value must be in the same block */
2490 if (get_nodes_block(node) != get_nodes_block(val))
2493 switch (get_irn_opcode(val)) {
2495 op1 = get_Add_left(val);
2496 op2 = get_Add_right(val);
2497 if (ia32_cg_config.use_incdec) {
2498 if (is_Const_1(op2)) {
2499 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2501 } else if (is_Const_Minus_1(op2)) {
2502 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2506 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2507 new_bd_ia32_AddMem, new_bd_ia32_AddMem_8bit,
2508 match_commutative | match_immediate);
2511 op1 = get_Sub_left(val);
2512 op2 = get_Sub_right(val);
2513 if (is_Const(op2)) {
2514 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2516 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2517 new_bd_ia32_SubMem, new_bd_ia32_SubMem_8bit,
2521 op1 = get_And_left(val);
2522 op2 = get_And_right(val);
2523 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2524 new_bd_ia32_AndMem, new_bd_ia32_AndMem_8bit,
2525 match_commutative | match_immediate);
2528 op1 = get_Or_left(val);
2529 op2 = get_Or_right(val);
2530 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2531 new_bd_ia32_OrMem, new_bd_ia32_OrMem_8bit,
2532 match_commutative | match_immediate);
2535 op1 = get_Eor_left(val);
2536 op2 = get_Eor_right(val);
2537 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2538 new_bd_ia32_XorMem, new_bd_ia32_XorMem_8bit,
2539 match_commutative | match_immediate);
2542 op1 = get_Shl_left(val);
2543 op2 = get_Shl_right(val);
2544 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2545 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2549 op1 = get_Shr_left(val);
2550 op2 = get_Shr_right(val);
2551 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2552 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2556 op1 = get_Shrs_left(val);
2557 op2 = get_Shrs_right(val);
2558 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2559 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2563 op1 = get_Rotl_left(val);
2564 op2 = get_Rotl_right(val);
2565 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2566 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2569 /* TODO: match ROR patterns... */
2571 new_node = try_create_SetMem(val, ptr, mem);
2575 op1 = get_Minus_op(val);
2576 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2579 /* should be lowered already */
2580 assert(mode != mode_b);
2581 op1 = get_Not_op(val);
2582 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2588 if (new_node != NULL) {
2589 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2590 get_irn_pinned(node) == op_pin_state_pinned) {
2591 set_irn_pinned(new_node, op_pin_state_pinned);
2598 static bool possible_int_mode_for_fp(ir_mode *mode)
2602 if (!mode_is_signed(mode))
2604 size = get_mode_size_bits(mode);
2605 if (size != 16 && size != 32)
2610 static int is_float_to_int_conv(const ir_node *node)
2612 ir_mode *mode = get_irn_mode(node);
2616 if (!possible_int_mode_for_fp(mode))
2621 conv_op = get_Conv_op(node);
2622 conv_mode = get_irn_mode(conv_op);
2624 if (!mode_is_float(conv_mode))
2631 * Transform a Store(floatConst) into a sequence of
2634 * @return the created ia32 Store node
2636 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2638 ir_mode *mode = get_irn_mode(cns);
2639 unsigned size = get_mode_size_bytes(mode);
2640 ir_tarval *tv = get_Const_tarval(cns);
2641 ir_node *block = get_nodes_block(node);
2642 ir_node *new_block = be_transform_node(block);
2643 ir_node *ptr = get_Store_ptr(node);
2644 ir_node *mem = get_Store_mem(node);
2645 dbg_info *dbgi = get_irn_dbg_info(node);
2648 int throws_exception = ir_throws_exception(node);
2650 ia32_address_t addr;
2652 build_address_ptr(&addr, ptr, mem);
2659 val= get_tarval_sub_bits(tv, ofs) |
2660 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2661 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2662 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2665 } else if (size >= 2) {
2666 val= get_tarval_sub_bits(tv, ofs) |
2667 (get_tarval_sub_bits(tv, ofs + 1) << 8);
2671 panic("invalid size of Store float to mem (%+F)", node);
2673 ir_graph *const irg = get_Block_irg(new_block);
2674 ir_node *const imm = ia32_create_Immediate(irg, NULL, 0, val);
2676 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2677 addr.index, addr.mem, imm);
2678 ir_node *new_mem = new_r_Proj(new_node, mode_M, pn_ia32_Store_M);
2680 ir_set_throws_exception(new_node, throws_exception);
2681 set_irn_pinned(new_node, get_irn_pinned(node));
2682 set_ia32_op_type(new_node, ia32_AddrModeD);
2683 set_ia32_ls_mode(new_node, mode);
2684 set_address(new_node, &addr);
2685 SET_IA32_ORIG_NODE(new_node, node);
2692 addr.offset += delta;
2693 } while (size != 0);
2696 return new_rd_Sync(dbgi, new_block, i, ins);
2698 return get_Proj_pred(ins[0]);
2703 * Generate a vfist or vfisttp instruction.
2705 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base,
2706 ir_node *index, ir_node *mem, ir_node *val)
2708 if (ia32_cg_config.use_fisttp) {
2709 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2710 if other users exists */
2711 ir_node *vfisttp = new_bd_ia32_fisttp(dbgi, block, base, index, mem, val);
2712 ir_node *value = new_r_Proj(vfisttp, ia32_mode_E, pn_ia32_fisttp_res);
2713 be_new_Keep(block, 1, &value);
2717 ir_node *trunc_mode = ia32_new_Fpu_truncate(current_ir_graph);
2720 ir_node *vfist = new_bd_ia32_fist(dbgi, block, base, index, mem, val, trunc_mode);
2726 * Transforms a general (no special case) Store.
2728 * @return the created ia32 Store node
2730 static ir_node *gen_general_Store(ir_node *node)
2732 ir_node *val = get_Store_value(node);
2733 ir_mode *mode = get_irn_mode(val);
2734 ir_node *block = get_nodes_block(node);
2735 ir_node *new_block = be_transform_node(block);
2736 ir_node *ptr = get_Store_ptr(node);
2737 ir_node *mem = get_Store_mem(node);
2738 dbg_info *dbgi = get_irn_dbg_info(node);
2739 int throws_exception = ir_throws_exception(node);
2742 ia32_address_t addr;
2744 /* check for destination address mode */
2745 new_node = try_create_dest_am(node);
2746 if (new_node != NULL)
2749 /* construct store address */
2750 memset(&addr, 0, sizeof(addr));
2751 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
2753 if (addr.base == NULL) {
2754 addr.base = noreg_GP;
2756 addr.base = be_transform_node(addr.base);
2759 if (addr.index == NULL) {
2760 addr.index = noreg_GP;
2762 addr.index = be_transform_node(addr.index);
2764 addr.mem = be_transform_node(mem);
2766 if (mode_is_float(mode)) {
2767 if (ia32_cg_config.use_sse2) {
2768 new_val = be_transform_node(val);
2769 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2770 addr.index, addr.mem, new_val);
2772 val = ia32_skip_float_downconv(val);
2773 new_val = be_transform_node(val);
2774 new_node = new_bd_ia32_fst(dbgi, new_block, addr.base,
2775 addr.index, addr.mem, new_val, mode);
2777 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2778 val = get_Conv_op(val);
2779 new_val = be_transform_node(val);
2780 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2782 unsigned dest_bits = get_mode_size_bits(mode);
2783 while (is_downconv(val)
2784 && get_mode_size_bits(get_irn_mode(val)) >= dest_bits) {
2785 val = get_Conv_op(val);
2787 new_val = create_immediate_or_transform(val);
2788 assert(mode != mode_b);
2790 new_node = dest_bits == 8
2791 ? new_bd_ia32_Store_8bit(dbgi, new_block, addr.base, addr.index, addr.mem, new_val)
2792 : new_bd_ia32_Store (dbgi, new_block, addr.base, addr.index, addr.mem, new_val);
2794 ir_set_throws_exception(new_node, throws_exception);
2796 set_irn_pinned(new_node, get_irn_pinned(node));
2797 set_ia32_op_type(new_node, ia32_AddrModeD);
2798 set_ia32_ls_mode(new_node, mode);
2800 set_address(new_node, &addr);
2801 SET_IA32_ORIG_NODE(new_node, node);
2807 * Transforms a Store.
2809 * @return the created ia32 Store node
2811 static ir_node *gen_Store(ir_node *node)
2813 ir_node *val = get_Store_value(node);
2814 ir_mode *mode = get_irn_mode(val);
2816 if (mode_is_float(mode) && is_Const(val)) {
2817 /* We can transform every floating const store
2818 into a sequence of integer stores.
2819 If the constant is already in a register,
2820 it would be better to use it, but we don't
2821 have this information here. */
2822 return gen_float_const_Store(node, val);
2824 return gen_general_Store(node);
2828 * Transforms a Switch.
2830 * @return the created ia32 SwitchJmp node
2832 static ir_node *gen_Switch(ir_node *node)
2834 dbg_info *dbgi = get_irn_dbg_info(node);
2835 ir_graph *irg = get_irn_irg(node);
2836 ir_node *block = be_transform_node(get_nodes_block(node));
2837 ir_node *sel = get_Switch_selector(node);
2838 ir_node *new_sel = be_transform_node(sel);
2839 ir_mode *sel_mode = get_irn_mode(sel);
2840 const ir_switch_table *table = get_Switch_table(node);
2841 unsigned n_outs = get_Switch_n_outs(node);
2845 assert(get_mode_size_bits(sel_mode) <= 32);
2846 assert(!mode_is_float(sel_mode));
2847 sel = ia32_skip_sameconv(sel);
2848 if (get_mode_size_bits(sel_mode) < 32)
2849 new_sel = transform_upconv(sel, node);
2851 entity = new_entity(NULL, id_unique("TBL%u"), get_unknown_type());
2852 set_entity_visibility(entity, ir_visibility_private);
2853 add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
2855 table = ir_switch_table_duplicate(irg, table);
2857 new_node = new_bd_ia32_SwitchJmp(dbgi, block, noreg_GP, new_sel, n_outs, table);
2858 set_ia32_am_scale(new_node, 2);
2859 set_ia32_am_sc(new_node, entity);
2860 set_ia32_op_type(new_node, ia32_AddrModeS);
2861 set_ia32_ls_mode(new_node, mode_Iu);
2862 SET_IA32_ORIG_NODE(new_node, node);
2863 // FIXME This seems wrong. GCC uses PIC for switch on OS X.
2864 get_ia32_attr(new_node)->data.am_sc_no_pic_adjust = true;
2870 * Transform a Cond node.
2872 static ir_node *gen_Cond(ir_node *node)
2874 ir_node *block = get_nodes_block(node);
2875 ir_node *new_block = be_transform_node(block);
2876 dbg_info *dbgi = get_irn_dbg_info(node);
2877 ir_node *sel = get_Cond_selector(node);
2878 ir_node *flags = NULL;
2880 ia32_condition_code_t cc;
2882 /* we get flags from a Cmp */
2883 flags = get_flags_node(sel, &cc);
2885 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, cc);
2886 SET_IA32_ORIG_NODE(new_node, node);
2892 * Transform a be_Copy.
2894 static ir_node *gen_be_Copy(ir_node *node)
2896 ir_node *new_node = be_duplicate_node(node);
2897 ir_mode *mode = get_irn_mode(new_node);
2899 if (ia32_mode_needs_gp_reg(mode)) {
2900 set_irn_mode(new_node, mode_Iu);
2906 static ir_node *create_Fucom(ir_node *node)
2908 dbg_info *dbgi = get_irn_dbg_info(node);
2909 ir_node *block = get_nodes_block(node);
2910 ir_node *new_block = be_transform_node(block);
2911 ir_node *left = get_Cmp_left(node);
2912 ir_node *new_left = be_transform_node(left);
2913 ir_node *right = get_Cmp_right(node);
2914 ir_mode *cmp_mode = get_irn_mode(left);
2917 check_x87_floatmode(cmp_mode);
2919 if (ia32_cg_config.use_fucomi) {
2920 new_right = be_transform_node(right);
2921 new_node = new_bd_ia32_Fucomi(dbgi, new_block, new_left,
2923 set_ia32_commutative(new_node);
2924 SET_IA32_ORIG_NODE(new_node, node);
2926 if (is_Const_0(right)) {
2927 new_node = new_bd_ia32_FtstFnstsw(dbgi, new_block, new_left, 0);
2929 new_right = be_transform_node(right);
2930 new_node = new_bd_ia32_FucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2931 set_ia32_commutative(new_node);
2934 SET_IA32_ORIG_NODE(new_node, node);
2936 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2937 SET_IA32_ORIG_NODE(new_node, node);
2943 static ir_node *create_Ucomi(ir_node *node)
2945 dbg_info *dbgi = get_irn_dbg_info(node);
2946 ir_node *src_block = get_nodes_block(node);
2947 ir_node *new_block = be_transform_node(src_block);
2948 ir_node *left = get_Cmp_left(node);
2949 ir_node *right = get_Cmp_right(node);
2951 ia32_address_mode_t am;
2952 ia32_address_t *addr = &am.addr;
2954 match_arguments(&am, src_block, left, right, NULL,
2955 match_commutative | match_am);
2957 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2958 addr->mem, am.new_op1, am.new_op2,
2960 set_am_attributes(new_node, &am);
2962 SET_IA32_ORIG_NODE(new_node, node);
2964 new_node = fix_mem_proj(new_node, &am);
2969 static bool ia32_mux_upper_bits_clean(const ir_node *node, ir_mode *mode)
2971 ir_node *mux_true = get_Mux_true(node);
2972 ir_node *mux_false = get_Mux_false(node);
2973 ir_mode *mux_mode = get_irn_mode(node);
2974 /* mux nodes which get transformed to the set instruction are not clean */
2975 if (is_Const(mux_true) && is_Const(mux_false)
2976 && get_mode_size_bits(mux_mode) == 8) {
2979 return be_upper_bits_clean(mux_true, mode)
2980 && be_upper_bits_clean(mux_false, mode);
2984 * Generate code for a Cmp.
2986 static ir_node *gen_Cmp(ir_node *node)
2988 dbg_info *dbgi = get_irn_dbg_info(node);
2989 ir_node *block = get_nodes_block(node);
2990 ir_node *new_block = be_transform_node(block);
2991 ir_node *left = get_Cmp_left(node);
2992 ir_node *right = get_Cmp_right(node);
2993 ir_mode *cmp_mode = get_irn_mode(left);
2995 ia32_address_mode_t am;
2996 ia32_address_t *addr = &am.addr;
2998 if (mode_is_float(cmp_mode)) {
2999 if (ia32_cg_config.use_sse2) {
3000 return create_Ucomi(node);
3002 return create_Fucom(node);
3006 assert(ia32_mode_needs_gp_reg(cmp_mode));
3008 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
3009 if (is_Const_0(right) &&
3011 get_irn_n_edges(left) == 1) {
3012 /* Test(and_left, and_right) */
3013 ir_node *and_left = get_And_left(left);
3014 ir_node *and_right = get_And_right(left);
3016 /* matze: code here used mode instead of cmd_mode, I think it is always
3017 * the same as cmp_mode, but I leave this here to see if this is really
3020 assert(get_irn_mode(and_left) == cmp_mode);
3022 match_arguments(&am, block, and_left, and_right, NULL,
3024 match_am | match_8bit_am | match_16bit_am |
3025 match_am_and_immediates | match_immediate);
3027 /* use 32bit compare mode if possible since the opcode is smaller */
3028 if (am.op_type == ia32_Normal &&
3029 be_upper_bits_clean(and_left, cmp_mode) &&
3030 be_upper_bits_clean(and_right, cmp_mode)) {
3031 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3034 new_node = get_mode_size_bits(cmp_mode) == 8
3035 ? new_bd_ia32_Test_8bit(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted)
3036 : new_bd_ia32_Test (dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3038 /* Cmp(left, right) */
3039 match_arguments(&am, block, left, right, NULL,
3041 match_am | match_8bit_am | match_16bit_am |
3042 match_am_and_immediates | match_immediate);
3043 /* use 32bit compare mode if possible since the opcode is smaller */
3044 if (am.op_type == ia32_Normal &&
3045 be_upper_bits_clean(left, cmp_mode) &&
3046 be_upper_bits_clean(right, cmp_mode)) {
3047 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
3050 new_node = get_mode_size_bits(cmp_mode) == 8
3051 ? new_bd_ia32_Cmp_8bit(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted)
3052 : new_bd_ia32_Cmp (dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted);
3054 set_am_attributes(new_node, &am);
3055 set_ia32_ls_mode(new_node, cmp_mode);
3057 SET_IA32_ORIG_NODE(new_node, node);
3059 new_node = fix_mem_proj(new_node, &am);
3064 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3065 ia32_condition_code_t cc)
3067 dbg_info *dbgi = get_irn_dbg_info(node);
3068 ir_node *block = get_nodes_block(node);
3069 ir_node *new_block = be_transform_node(block);
3070 ir_node *val_true = get_Mux_true(node);
3071 ir_node *val_false = get_Mux_false(node);
3073 ia32_address_mode_t am;
3074 ia32_address_t *addr;
3076 assert(ia32_cg_config.use_cmov);
3077 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3081 match_arguments(&am, block, val_false, val_true, flags,
3082 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3084 if (am.ins_permuted)
3085 cc = ia32_negate_condition_code(cc);
3087 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3088 addr->mem, am.new_op1, am.new_op2, new_flags,
3090 set_am_attributes(new_node, &am);
3092 SET_IA32_ORIG_NODE(new_node, node);
3094 new_node = fix_mem_proj(new_node, &am);
3100 * Creates a ia32 Setcc instruction.
3102 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3103 ir_node *flags, ia32_condition_code_t cc,
3106 ir_mode *mode = get_irn_mode(orig_node);
3109 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, cc);
3110 SET_IA32_ORIG_NODE(new_node, orig_node);
3112 /* we might need to conv the result up */
3113 if (get_mode_size_bits(mode) > 8) {
3114 new_node = new_bd_ia32_Conv_I2I_8bit(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, mode_Bu);
3115 SET_IA32_ORIG_NODE(new_node, orig_node);
3122 * Create instruction for an unsigned Difference or Zero.
3124 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3126 ir_mode *mode = get_irn_mode(psi);
3136 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3137 match_mode_neutral | match_am | match_immediate | match_two_users);
3139 block = get_nodes_block(new_node);
3141 if (is_Proj(new_node)) {
3142 sub = get_Proj_pred(new_node);
3145 set_irn_mode(sub, mode_T);
3146 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3148 assert(is_ia32_Sub(sub));
3149 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3151 dbgi = get_irn_dbg_info(psi);
3152 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3153 set_ia32_ls_mode(sbb, mode_Iu);
3154 notn = new_bd_ia32_Not(dbgi, block, sbb);
3156 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, notn);
3157 set_ia32_ls_mode(new_node, mode_Iu);
3158 set_ia32_commutative(new_node);
3163 * Create an const array of two float consts.
3165 * @param c0 the first constant
3166 * @param c1 the second constant
3167 * @param new_mode IN/OUT for the mode of the constants, if NULL
3168 * smallest possible mode will be used
3170 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3173 ir_mode *mode = *new_mode;
3175 ir_initializer_t *initializer;
3176 ir_tarval *tv0 = get_Const_tarval(c0);
3177 ir_tarval *tv1 = get_Const_tarval(c1);
3180 /* detect the best mode for the constants */
3181 mode = get_tarval_mode(tv0);
3183 if (mode != mode_F) {
3184 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3185 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3187 tv0 = tarval_convert_to(tv0, mode);
3188 tv1 = tarval_convert_to(tv1, mode);
3189 } else if (mode != mode_D) {
3190 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3191 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3193 tv0 = tarval_convert_to(tv0, mode);
3194 tv1 = tarval_convert_to(tv1, mode);
3201 tp = ia32_get_prim_type(mode);
3202 tp = ia32_create_float_array(tp);
3204 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3206 set_entity_ld_ident(ent, get_entity_ident(ent));
3207 set_entity_visibility(ent, ir_visibility_private);
3208 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3210 initializer = create_initializer_compound(2);
3212 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3213 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3215 set_entity_initializer(ent, initializer);
3222 * Possible transformations for creating a Setcc.
3224 enum setcc_transform_insn {
3236 typedef struct setcc_transform {
3238 ia32_condition_code_t cc;
3240 enum setcc_transform_insn transform;
3244 } setcc_transform_t;
3247 * Setcc can only handle 0 and 1 result.
3248 * Find a transformation that creates 0 and 1 from
3251 static void find_const_transform(ia32_condition_code_t cc,
3252 ir_tarval *t, ir_tarval *f,
3253 setcc_transform_t *res)
3259 if (tarval_is_null(t)) {
3263 cc = ia32_negate_condition_code(cc);
3264 } else if (tarval_cmp(t, f) == ir_relation_less) {
3265 // now, t is the bigger one
3269 cc = ia32_negate_condition_code(cc);
3273 if (! tarval_is_null(f)) {
3274 ir_tarval *t_sub = tarval_sub(t, f, NULL);
3277 res->steps[step].transform = SETCC_TR_ADD;
3279 if (t == tarval_bad)
3280 panic("constant subtract failed");
3281 if (! tarval_is_long(f))
3282 panic("tarval is not long");
3284 res->steps[step].val = get_tarval_long(f);
3286 f = tarval_sub(f, f, NULL);
3287 assert(tarval_is_null(f));
3290 if (tarval_is_one(t)) {
3291 res->steps[step].transform = SETCC_TR_SET;
3292 res->num_steps = ++step;
3296 if (tarval_is_minus_one(t)) {
3297 res->steps[step].transform = SETCC_TR_NEG;
3299 res->steps[step].transform = SETCC_TR_SET;
3300 res->num_steps = ++step;
3303 if (tarval_is_long(t)) {
3304 long v = get_tarval_long(t);
3306 res->steps[step].val = 0;
3309 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3311 res->steps[step].transform = SETCC_TR_LEAxx;
3312 res->steps[step].scale = 3; /* (a << 3) + a */
3315 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3317 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3318 res->steps[step].scale = 3; /* (a << 3) */
3321 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3323 res->steps[step].transform = SETCC_TR_LEAxx;
3324 res->steps[step].scale = 2; /* (a << 2) + a */
3327 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3329 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3330 res->steps[step].scale = 2; /* (a << 2) */
3333 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3335 res->steps[step].transform = SETCC_TR_LEAxx;
3336 res->steps[step].scale = 1; /* (a << 1) + a */
3339 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3341 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3342 res->steps[step].scale = 1; /* (a << 1) */
3345 res->num_steps = step;
3348 if (! tarval_is_single_bit(t)) {
3349 res->steps[step].transform = SETCC_TR_AND;
3350 res->steps[step].val = v;
3352 res->steps[step].transform = SETCC_TR_NEG;
3354 int val = get_tarval_lowest_bit(t);
3357 res->steps[step].transform = SETCC_TR_SHL;
3358 res->steps[step].scale = val;
3362 res->steps[step].transform = SETCC_TR_SET;
3363 res->num_steps = ++step;
3366 panic("tarval is not long");
3370 * Transforms a Mux node into some code sequence.
3372 * @return The transformed node.
3374 static ir_node *gen_Mux(ir_node *node)
3376 dbg_info *dbgi = get_irn_dbg_info(node);
3377 ir_node *block = get_nodes_block(node);
3378 ir_node *new_block = be_transform_node(block);
3379 ir_node *mux_true = get_Mux_true(node);
3380 ir_node *mux_false = get_Mux_false(node);
3381 ir_node *sel = get_Mux_sel(node);
3382 ir_mode *mode = get_irn_mode(node);
3386 ia32_condition_code_t cc;
3388 assert(get_irn_mode(sel) == mode_b);
3390 is_abs = ir_mux_is_abs(sel, mux_false, mux_true);
3392 if (ia32_mode_needs_gp_reg(mode)) {
3393 ir_fprintf(stderr, "Optimisation warning: Integer abs %+F not transformed\n",
3396 ir_node *op = ir_get_abs_op(sel, mux_false, mux_true);
3397 return create_float_abs(dbgi, block, op, is_abs < 0, node);
3401 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3402 if (mode_is_float(mode)) {
3403 ir_node *cmp_left = get_Cmp_left(sel);
3404 ir_node *cmp_right = get_Cmp_right(sel);
3405 ir_relation relation = get_Cmp_relation(sel);
3407 if (ia32_cg_config.use_sse2) {
3408 if (relation == ir_relation_less || relation == ir_relation_less_equal) {
3409 if (cmp_left == mux_true && cmp_right == mux_false) {
3410 /* Mux(a <= b, a, b) => MIN */
3411 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3412 match_commutative | match_am | match_two_users);
3413 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3414 /* Mux(a <= b, b, a) => MAX */
3415 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3416 match_commutative | match_am | match_two_users);
3418 } else if (relation == ir_relation_greater || relation == ir_relation_greater_equal) {
3419 if (cmp_left == mux_true && cmp_right == mux_false) {
3420 /* Mux(a >= b, a, b) => MAX */
3421 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3422 match_commutative | match_am | match_two_users);
3423 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3424 /* Mux(a >= b, b, a) => MIN */
3425 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3426 match_commutative | match_am | match_two_users);
3431 if (is_Const(mux_true) && is_Const(mux_false)) {
3432 ia32_address_mode_t am;
3437 flags = get_flags_node(sel, &cc);
3438 new_node = create_set_32bit(dbgi, new_block, flags, cc, node);
3440 if (ia32_cg_config.use_sse2) {
3441 /* cannot load from different mode on SSE */
3444 /* x87 can load any mode */
3448 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3450 if (new_mode == mode_F) {
3452 } else if (new_mode == mode_D) {
3454 } else if (new_mode == ia32_mode_E) {
3455 /* arg, shift 16 NOT supported */
3457 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3459 panic("Unsupported constant size");
3462 am.ls_mode = new_mode;
3463 am.addr.base = get_symconst_base();
3464 am.addr.index = new_node;
3465 am.addr.mem = nomem;
3467 am.addr.scale = scale;
3468 am.addr.use_frame = 0;
3469 am.addr.tls_segment = false;
3470 am.addr.frame_entity = NULL;
3471 am.addr.symconst_sign = 0;
3472 am.mem_proj = am.addr.mem;
3473 am.op_type = ia32_AddrModeS;
3476 am.pinned = op_pin_state_floats;
3478 am.ins_permuted = false;
3480 if (ia32_cg_config.use_sse2)
3481 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3483 load = new_bd_ia32_fld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3484 set_am_attributes(load, &am);
3486 return new_rd_Proj(NULL, load, mode_fp, pn_ia32_res);
3488 panic("cannot transform floating point Mux");
3491 assert(ia32_mode_needs_gp_reg(mode));
3494 ir_node *cmp_left = get_Cmp_left(sel);
3495 ir_node *cmp_right = get_Cmp_right(sel);
3496 ir_relation relation = get_Cmp_relation(sel);
3497 ir_node *val_true = mux_true;
3498 ir_node *val_false = mux_false;
3500 if (is_Const(val_true) && is_Const_null(val_true)) {
3501 ir_node *tmp = val_false;
3502 val_false = val_true;
3504 relation = get_negated_relation(relation);
3506 if (is_Const_0(val_false) && is_Sub(val_true)) {
3507 if ((relation & ir_relation_greater)
3508 && get_Sub_left(val_true) == cmp_left
3509 && get_Sub_right(val_true) == cmp_right) {
3510 return create_doz(node, cmp_left, cmp_right);
3512 if ((relation & ir_relation_less)
3513 && get_Sub_left(val_true) == cmp_right
3514 && get_Sub_right(val_true) == cmp_left) {
3515 return create_doz(node, cmp_right, cmp_left);
3520 flags = get_flags_node(sel, &cc);
3522 if (is_Const(mux_true) && is_Const(mux_false)) {
3523 /* both are const, good */
3524 ir_tarval *tv_true = get_Const_tarval(mux_true);
3525 ir_tarval *tv_false = get_Const_tarval(mux_false);
3526 setcc_transform_t res;
3529 find_const_transform(cc, tv_true, tv_false, &res);
3531 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3534 switch (res.steps[step].transform) {
3536 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, noreg_GP);
3537 add_ia32_am_offs_int(new_node, res.steps[step].val);
3539 case SETCC_TR_ADDxx:
3540 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3543 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3544 set_ia32_am_scale(new_node, res.steps[step].scale);
3545 set_ia32_am_offs_int(new_node, res.steps[step].val);
3547 case SETCC_TR_LEAxx:
3548 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3549 set_ia32_am_scale(new_node, res.steps[step].scale);
3550 set_ia32_am_offs_int(new_node, res.steps[step].val);
3553 imm = ia32_immediate_from_long(res.steps[step].scale);
3554 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3557 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3560 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3563 imm = ia32_immediate_from_long(res.steps[step].val);
3564 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3567 new_node = create_set_32bit(dbgi, new_block, flags, res.cc, node);
3570 panic("unknown setcc transform");
3574 new_node = create_CMov(node, sel, flags, cc);
3581 * Create a conversion from x87 state register to general purpose.
3583 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3585 ir_node *block = be_transform_node(get_nodes_block(node));
3586 ir_node *op = get_Conv_op(node);
3587 ir_node *new_op = be_transform_node(op);
3588 ir_graph *irg = current_ir_graph;
3589 dbg_info *dbgi = get_irn_dbg_info(node);
3590 ir_mode *mode = get_irn_mode(node);
3591 ir_node *frame = get_irg_frame(irg);
3592 ir_node *fist, *load, *mem;
3594 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_op);
3595 set_irn_pinned(fist, op_pin_state_floats);
3596 set_ia32_use_frame(fist);
3597 set_ia32_op_type(fist, ia32_AddrModeD);
3598 arch_add_irn_flags(fist, arch_irn_flags_spill);
3600 assert((long)pn_ia32_fist_M == (long) pn_ia32_fisttp_M);
3601 mem = new_r_Proj(fist, mode_M, pn_ia32_fist_M);
3603 assert(get_mode_size_bits(mode) <= 32);
3604 /* exception we can only store signed 32 bit integers, so for unsigned
3605 we store a 64bit (signed) integer and load the lower bits */
3606 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3607 set_ia32_ls_mode(fist, mode_Ls);
3609 set_ia32_ls_mode(fist, mode_Is);
3611 SET_IA32_ORIG_NODE(fist, node);
3614 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3616 set_irn_pinned(load, op_pin_state_floats);
3617 set_ia32_use_frame(load);
3618 set_ia32_op_type(load, ia32_AddrModeS);
3619 set_ia32_ls_mode(load, mode_Is);
3620 if (get_ia32_ls_mode(fist) == mode_Ls) {
3621 ia32_attr_t *attr = get_ia32_attr(load);
3622 attr->data.need_64bit_stackent = 1;
3624 ia32_attr_t *attr = get_ia32_attr(load);
3625 attr->data.need_32bit_stackent = 1;
3627 SET_IA32_ORIG_NODE(load, node);
3629 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3633 * Creates a x87 Conv by placing a Store and a Load
3635 static ir_node *gen_x87_conv(ir_mode *tgt_mode, ir_node *node)
3637 ir_node *block = get_nodes_block(node);
3638 ir_graph *irg = get_Block_irg(block);
3639 dbg_info *dbgi = get_irn_dbg_info(node);
3640 ir_node *frame = get_irg_frame(irg);
3642 ir_node *store, *load;
3645 store = new_bd_ia32_fst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3646 set_ia32_use_frame(store);
3647 set_ia32_op_type(store, ia32_AddrModeD);
3648 arch_add_irn_flags(store, arch_irn_flags_spill);
3649 SET_IA32_ORIG_NODE(store, node);
3651 store_mem = new_r_Proj(store, mode_M, pn_ia32_fst_M);
3653 load = new_bd_ia32_fld(dbgi, block, frame, noreg_GP, store_mem, tgt_mode);
3654 set_ia32_use_frame(load);
3655 set_ia32_op_type(load, ia32_AddrModeS);
3656 SET_IA32_ORIG_NODE(load, node);
3658 new_node = new_r_Proj(load, ia32_mode_E, pn_ia32_fld_res);
3662 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3663 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3665 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3667 func = get_mode_size_bits(mode) == 8 ?
3668 new_bd_ia32_Conv_I2I_8bit : new_bd_ia32_Conv_I2I;
3669 return func(dbgi, block, base, index, mem, val, mode);
3673 * Create a conversion from general purpose to x87 register
3675 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3677 ir_node *src_block = get_nodes_block(node);
3678 ir_node *block = be_transform_node(src_block);
3679 ir_graph *irg = get_Block_irg(block);
3680 dbg_info *dbgi = get_irn_dbg_info(node);
3681 ir_node *op = get_Conv_op(node);
3682 ir_node *new_op = NULL;
3684 ir_mode *store_mode;
3690 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3691 if (possible_int_mode_for_fp(src_mode)) {
3692 ia32_address_mode_t am;
3694 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am | match_upconv);
3695 if (am.op_type == ia32_AddrModeS) {
3696 ia32_address_t *addr = &am.addr;
3698 fild = new_bd_ia32_fild(dbgi, block, addr->base, addr->index, addr->mem);
3699 new_node = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
3701 set_am_attributes(fild, &am);
3702 SET_IA32_ORIG_NODE(fild, node);
3704 fix_mem_proj(fild, &am);
3709 if (new_op == NULL) {
3710 new_op = be_transform_node(op);
3713 mode = get_irn_mode(op);
3715 /* first convert to 32 bit signed if necessary */
3716 if (get_mode_size_bits(src_mode) < 32) {
3717 if (!be_upper_bits_clean(op, src_mode)) {
3718 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3719 SET_IA32_ORIG_NODE(new_op, node);
3724 assert(get_mode_size_bits(mode) == 32);
3727 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3729 set_ia32_use_frame(store);
3730 set_ia32_op_type(store, ia32_AddrModeD);
3731 set_ia32_ls_mode(store, mode_Iu);
3732 arch_add_irn_flags(store, arch_irn_flags_spill);
3734 store_mem = new_r_Proj(store, mode_M, pn_ia32_Store_M);
3736 /* exception for 32bit unsigned, do a 64bit spill+load */
3737 if (!mode_is_signed(mode)) {
3740 ir_node *zero_const = ia32_create_Immediate(irg, NULL, 0, 0);
3742 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3743 noreg_GP, nomem, zero_const);
3744 ir_node *zero_store_mem = new_r_Proj(zero_store, mode_M, pn_ia32_Store_M);
3746 set_ia32_use_frame(zero_store);
3747 set_ia32_op_type(zero_store, ia32_AddrModeD);
3748 add_ia32_am_offs_int(zero_store, 4);
3749 set_ia32_ls_mode(zero_store, mode_Iu);
3750 arch_add_irn_flags(zero_store, arch_irn_flags_spill);
3752 in[0] = zero_store_mem;
3755 store_mem = new_rd_Sync(dbgi, block, 2, in);
3756 store_mode = mode_Ls;
3758 store_mode = mode_Is;
3762 fild = new_bd_ia32_fild(dbgi, block, get_irg_frame(irg), noreg_GP, store_mem);
3764 set_ia32_use_frame(fild);
3765 set_ia32_op_type(fild, ia32_AddrModeS);
3766 set_ia32_ls_mode(fild, store_mode);
3768 new_node = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
3774 * Create a conversion from one integer mode into another one
3776 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3777 dbg_info *dbgi, ir_node *block, ir_node *op,
3780 ir_node *new_block = be_transform_node(block);
3782 ia32_address_mode_t am;
3783 ia32_address_t *addr = &am.addr;
3786 assert(get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode));
3788 #ifdef DEBUG_libfirm
3790 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3795 if (be_upper_bits_clean(op, src_mode)) {
3796 return be_transform_node(op);
3799 match_arguments(&am, block, NULL, op, NULL,
3800 match_am | match_8bit_am | match_16bit_am);
3802 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3803 addr->mem, am.new_op2, src_mode);
3804 set_am_attributes(new_node, &am);
3805 /* match_arguments assume that out-mode = in-mode, this isn't true here
3807 set_ia32_ls_mode(new_node, src_mode);
3808 SET_IA32_ORIG_NODE(new_node, node);
3809 new_node = fix_mem_proj(new_node, &am);
3814 * Transforms a Conv node.
3816 * @return The created ia32 Conv node
3818 static ir_node *gen_Conv(ir_node *node)
3820 ir_node *block = get_nodes_block(node);
3821 ir_node *new_block = be_transform_node(block);
3822 ir_node *op = get_Conv_op(node);
3823 ir_node *new_op = NULL;
3824 dbg_info *dbgi = get_irn_dbg_info(node);
3825 ir_mode *src_mode = get_irn_mode(op);
3826 ir_mode *tgt_mode = get_irn_mode(node);
3827 int src_bits = get_mode_size_bits(src_mode);
3828 int tgt_bits = get_mode_size_bits(tgt_mode);
3829 ir_node *res = NULL;
3831 assert(!mode_is_int(src_mode) || src_bits <= 32);
3832 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3834 /* modeB -> X should already be lowered by the lower_mode_b pass */
3835 if (src_mode == mode_b) {
3836 panic("ConvB not lowered %+F", node);
3839 if (src_mode == tgt_mode) {
3840 /* this should be optimized already, but who knows... */
3841 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node);)
3842 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3843 return be_transform_node(op);
3846 if (mode_is_float(src_mode)) {
3847 new_op = be_transform_node(op);
3848 /* we convert from float ... */
3849 if (mode_is_float(tgt_mode)) {
3851 if (ia32_cg_config.use_sse2) {
3852 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3853 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3855 set_ia32_ls_mode(res, tgt_mode);
3857 if (src_bits < tgt_bits) {
3858 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3861 res = gen_x87_conv(tgt_mode, new_op);
3862 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3868 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3869 if (ia32_cg_config.use_sse2) {
3870 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3872 set_ia32_ls_mode(res, src_mode);
3874 return gen_x87_fp_to_gp(node);
3878 /* we convert from int ... */
3879 if (mode_is_float(tgt_mode)) {
3881 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3882 if (ia32_cg_config.use_sse2) {
3883 new_op = be_transform_node(op);
3884 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3886 set_ia32_ls_mode(res, tgt_mode);
3888 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3889 unsigned float_mantissa = get_mode_mantissa_size(tgt_mode);
3890 res = gen_x87_gp_to_fp(node, src_mode);
3892 /* we need a float-conv, if the int mode has more bits than the
3894 if (float_mantissa < int_mantissa) {
3895 res = gen_x87_conv(tgt_mode, res);
3896 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3900 } else if (tgt_mode == mode_b) {
3901 /* mode_b lowering already took care that we only have 0/1 values */
3902 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3903 src_mode, tgt_mode));
3904 return be_transform_node(op);
3907 if (src_bits >= tgt_bits) {
3908 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3909 src_mode, tgt_mode));
3910 return be_transform_node(op);
3913 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3921 static ir_node *create_immediate_or_transform(ir_node *const node)
3923 ir_node *new_node = ia32_try_create_Immediate(node, 'i');
3924 if (new_node == NULL) {
3925 new_node = be_transform_node(node);
3931 * Transforms a FrameAddr into an ia32 Add.
3933 static ir_node *gen_be_FrameAddr(ir_node *node)
3935 ir_node *block = be_transform_node(get_nodes_block(node));
3936 ir_node *op = be_get_FrameAddr_frame(node);
3937 ir_node *new_op = be_transform_node(op);
3938 dbg_info *dbgi = get_irn_dbg_info(node);
3941 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3942 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3943 set_ia32_use_frame(new_node);
3945 SET_IA32_ORIG_NODE(new_node, node);
3951 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3953 static ir_node *gen_be_Return(ir_node *node)
3955 ir_node *ret_val = get_irn_n(node, n_be_Return_val);
3956 ir_node *ret_mem = get_irn_n(node, n_be_Return_mem);
3957 ir_node *new_ret_val = be_transform_node(ret_val);
3958 ir_node *new_ret_mem = be_transform_node(ret_mem);
3959 dbg_info *dbgi = get_irn_dbg_info(node);
3960 ir_node *block = be_transform_node(get_nodes_block(node));
3961 ir_graph *irg = get_Block_irg(block);
3962 ir_entity *ent = get_irg_entity(irg);
3963 ir_type *tp = get_entity_type(ent);
3977 assert(ret_val != NULL);
3978 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3979 return be_duplicate_node(node);
3982 res_type = get_method_res_type(tp, 0);
3984 if (! is_Primitive_type(res_type)) {
3985 return be_duplicate_node(node);
3988 mode = get_type_mode(res_type);
3989 if (! mode_is_float(mode)) {
3990 return be_duplicate_node(node);
3993 assert(get_method_n_ress(tp) == 1);
3995 frame = get_irg_frame(irg);
3997 /* store xmm0 onto stack */
3998 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3999 new_ret_mem, new_ret_val);
4000 set_ia32_ls_mode(sse_store, mode);
4001 set_ia32_op_type(sse_store, ia32_AddrModeD);
4002 set_ia32_use_frame(sse_store);
4003 arch_add_irn_flags(sse_store, arch_irn_flags_spill);
4004 store_mem = new_r_Proj(sse_store, mode_M, pn_ia32_xStoreSimple_M);
4006 /* load into x87 register */
4007 fld = new_bd_ia32_fld(dbgi, block, frame, noreg_GP, store_mem, mode);
4008 set_ia32_op_type(fld, ia32_AddrModeS);
4009 set_ia32_use_frame(fld);
4011 mproj = new_r_Proj(fld, mode_M, pn_ia32_fld_M);
4012 fld = new_r_Proj(fld, mode_fp, pn_ia32_fld_res);
4014 /* create a new return */
4015 arity = get_irn_arity(node);
4016 in = ALLOCAN(ir_node*, arity);
4017 pop = be_Return_get_pop(node);
4018 for (i = 0; i < arity; ++i) {
4019 ir_node *op = get_irn_n(node, i);
4020 if (op == ret_val) {
4022 } else if (op == ret_mem) {
4025 in[i] = be_transform_node(op);
4028 new_node = be_new_Return(dbgi, irg, block, arity, pop, arity, in);
4029 copy_node_attr(irg, node, new_node);
4035 * Transform a be_AddSP into an ia32_SubSP.
4037 static ir_node *gen_be_AddSP(ir_node *node)
4039 ir_node *sz = get_irn_n(node, n_be_AddSP_size);
4040 ir_node *sp = get_irn_n(node, n_be_AddSP_old_sp);
4042 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4043 match_am | match_immediate);
4044 assert(is_ia32_SubSP(new_node));
4045 arch_set_irn_register_out(new_node, pn_ia32_SubSP_stack,
4046 &ia32_registers[REG_ESP]);
4051 * Transform a be_SubSP into an ia32_AddSP
4053 static ir_node *gen_be_SubSP(ir_node *node)
4055 ir_node *sz = get_irn_n(node, n_be_SubSP_size);
4056 ir_node *sp = get_irn_n(node, n_be_SubSP_old_sp);
4058 ir_node *new_node = gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4059 match_am | match_immediate);
4060 assert(is_ia32_AddSP(new_node));
4061 arch_set_irn_register_out(new_node, pn_ia32_AddSP_stack,
4062 &ia32_registers[REG_ESP]);
4066 static ir_node *gen_Phi(ir_node *node)
4068 ir_mode *mode = get_irn_mode(node);
4069 const arch_register_req_t *req;
4070 if (ia32_mode_needs_gp_reg(mode)) {
4071 /* we shouldn't have any 64bit stuff around anymore */
4072 assert(get_mode_size_bits(mode) <= 32);
4073 /* all integer operations are on 32bit registers now */
4075 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4076 } else if (mode_is_float(mode)) {
4077 if (ia32_cg_config.use_sse2) {
4079 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4082 req = ia32_reg_classes[CLASS_ia32_fp].class_req;
4085 req = arch_no_register_req;
4088 return be_transform_phi(node, req);
4091 static ir_node *gen_Jmp(ir_node *node)
4093 ir_node *block = get_nodes_block(node);
4094 ir_node *new_block = be_transform_node(block);
4095 dbg_info *dbgi = get_irn_dbg_info(node);
4098 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4099 SET_IA32_ORIG_NODE(new_node, node);
4107 static ir_node *gen_IJmp(ir_node *node)
4109 ir_node *block = get_nodes_block(node);
4110 ir_node *new_block = be_transform_node(block);
4111 dbg_info *dbgi = get_irn_dbg_info(node);
4112 ir_node *op = get_IJmp_target(node);
4114 ia32_address_mode_t am;
4115 ia32_address_t *addr = &am.addr;
4117 assert(get_irn_mode(op) == mode_P);
4119 match_arguments(&am, block, NULL, op, NULL,
4120 match_am | match_immediate | match_upconv);
4122 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4123 addr->mem, am.new_op2);
4124 set_am_attributes(new_node, &am);
4125 SET_IA32_ORIG_NODE(new_node, node);
4127 new_node = fix_mem_proj(new_node, &am);
4132 static ir_node *gen_ia32_l_Add(ir_node *node)
4134 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4135 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4136 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4137 match_commutative | match_am | match_immediate |
4138 match_mode_neutral);
4140 if (is_Proj(lowered)) {
4141 lowered = get_Proj_pred(lowered);
4143 assert(is_ia32_Add(lowered));
4144 set_irn_mode(lowered, mode_T);
4150 static ir_node *gen_ia32_l_Adc(ir_node *node)
4152 return gen_binop_flags(node, new_bd_ia32_Adc,
4153 match_commutative | match_am | match_immediate |
4154 match_mode_neutral);
4158 * Transforms a l_MulS into a "real" MulS node.
4160 * @return the created ia32 Mul node
4162 static ir_node *gen_ia32_l_Mul(ir_node *node)
4164 ir_node *left = get_binop_left(node);
4165 ir_node *right = get_binop_right(node);
4167 return gen_binop(node, left, right, new_bd_ia32_Mul,
4168 match_commutative | match_am | match_mode_neutral);
4172 * Transforms a l_IMulS into a "real" IMul1OPS node.
4174 * @return the created ia32 IMul1OP node
4176 static ir_node *gen_ia32_l_IMul(ir_node *node)
4178 ir_node *left = get_binop_left(node);
4179 ir_node *right = get_binop_right(node);
4181 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4182 match_commutative | match_am | match_mode_neutral);
4185 static ir_node *gen_ia32_l_Sub(ir_node *node)
4187 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4188 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4189 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4190 match_am | match_immediate | match_mode_neutral);
4192 if (is_Proj(lowered)) {
4193 lowered = get_Proj_pred(lowered);
4195 assert(is_ia32_Sub(lowered));
4196 set_irn_mode(lowered, mode_T);
4202 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4204 return gen_binop_flags(node, new_bd_ia32_Sbb,
4205 match_am | match_immediate | match_mode_neutral);
4208 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4210 ir_node *src_block = get_nodes_block(node);
4211 ir_node *block = be_transform_node(src_block);
4212 ir_graph *irg = get_Block_irg(block);
4213 dbg_info *dbgi = get_irn_dbg_info(node);
4214 ir_node *frame = get_irg_frame(irg);
4215 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4216 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4217 ir_node *new_val_low = be_transform_node(val_low);
4218 ir_node *new_val_high = be_transform_node(val_high);
4220 ir_node *sync, *fild, *res;
4222 ir_node *store_high;
4226 if (ia32_cg_config.use_sse2) {
4227 panic("not implemented for SSE2");
4231 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4233 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4235 SET_IA32_ORIG_NODE(store_low, node);
4236 SET_IA32_ORIG_NODE(store_high, node);
4238 mem_low = new_r_Proj(store_low, mode_M, pn_ia32_Store_M);
4239 mem_high = new_r_Proj(store_high, mode_M, pn_ia32_Store_M);
4241 set_ia32_use_frame(store_low);
4242 set_ia32_use_frame(store_high);
4243 set_ia32_op_type(store_low, ia32_AddrModeD);
4244 set_ia32_op_type(store_high, ia32_AddrModeD);
4245 set_ia32_ls_mode(store_low, mode_Iu);
4246 set_ia32_ls_mode(store_high, mode_Is);
4247 arch_add_irn_flags(store_low, arch_irn_flags_spill);
4248 arch_add_irn_flags(store_high, arch_irn_flags_spill);
4249 add_ia32_am_offs_int(store_high, 4);
4253 sync = new_rd_Sync(dbgi, block, 2, in);
4256 fild = new_bd_ia32_fild(dbgi, block, frame, noreg_GP, sync);
4258 set_ia32_use_frame(fild);
4259 set_ia32_op_type(fild, ia32_AddrModeS);
4260 set_ia32_ls_mode(fild, mode_Ls);
4262 SET_IA32_ORIG_NODE(fild, node);
4264 res = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
4266 if (! mode_is_signed(get_irn_mode(val_high))) {
4267 ia32_address_mode_t am;
4269 ir_node *count = ia32_create_Immediate(irg, NULL, 0, 31);
4272 am.addr.base = get_symconst_base();
4273 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4274 am.addr.mem = nomem;
4277 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4278 am.addr.tls_segment = false;
4279 am.addr.use_frame = 0;
4280 am.addr.frame_entity = NULL;
4281 am.addr.symconst_sign = 0;
4282 am.ls_mode = mode_F;
4283 am.mem_proj = nomem;
4284 am.op_type = ia32_AddrModeS;
4286 am.new_op2 = ia32_new_NoReg_fp(irg);
4287 am.pinned = op_pin_state_floats;
4289 am.ins_permuted = false;
4291 fadd = new_bd_ia32_fadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4292 am.new_op1, am.new_op2, get_fpcw());
4293 set_am_attributes(fadd, &am);
4295 set_irn_mode(fadd, mode_T);
4296 res = new_rd_Proj(NULL, fadd, mode_fp, pn_ia32_res);
4301 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4303 ir_node *src_block = get_nodes_block(node);
4304 ir_node *block = be_transform_node(src_block);
4305 ir_graph *irg = get_Block_irg(block);
4306 dbg_info *dbgi = get_irn_dbg_info(node);
4307 ir_node *frame = get_irg_frame(irg);
4308 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4309 ir_node *new_val = be_transform_node(val);
4312 fist = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val);
4313 SET_IA32_ORIG_NODE(fist, node);
4314 set_ia32_use_frame(fist);
4315 set_ia32_op_type(fist, ia32_AddrModeD);
4316 set_ia32_ls_mode(fist, mode_Ls);
4317 arch_add_irn_flags(fist, arch_irn_flags_spill);
4319 assert((long)pn_ia32_fist_M == (long) pn_ia32_fisttp_M);
4320 return new_r_Proj(fist, mode_M, pn_ia32_fist_M);
4323 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4325 ir_node *block = be_transform_node(get_nodes_block(node));
4326 ir_graph *irg = get_Block_irg(block);
4327 ir_node *pred = get_Proj_pred(node);
4328 ir_node *new_pred = be_transform_node(pred);
4329 ir_node *frame = get_irg_frame(irg);
4330 dbg_info *dbgi = get_irn_dbg_info(node);
4331 long pn = get_Proj_proj(node);
4336 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4337 SET_IA32_ORIG_NODE(load, node);
4338 set_ia32_use_frame(load);
4339 set_ia32_op_type(load, ia32_AddrModeS);
4340 set_ia32_ls_mode(load, mode_Iu);
4341 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4342 * 32 bit from it with this particular load */
4343 attr = get_ia32_attr(load);
4344 attr->data.need_64bit_stackent = 1;
4346 if (pn == pn_ia32_l_FloattoLL_res_high) {
4347 add_ia32_am_offs_int(load, 4);
4349 assert(pn == pn_ia32_l_FloattoLL_res_low);
4352 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4358 * Transform the Projs of an AddSP.
4360 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4362 ir_node *pred = get_Proj_pred(node);
4363 ir_node *new_pred = be_transform_node(pred);
4364 dbg_info *dbgi = get_irn_dbg_info(node);
4365 long proj = get_Proj_proj(node);
4367 if (proj == pn_be_AddSP_sp) {
4368 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4369 pn_ia32_SubSP_stack);
4370 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4372 } else if (proj == pn_be_AddSP_res) {
4373 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4374 pn_ia32_SubSP_addr);
4375 } else if (proj == pn_be_AddSP_M) {
4376 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4379 panic("No idea how to transform proj->AddSP");
4383 * Transform the Projs of a SubSP.
4385 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4387 ir_node *pred = get_Proj_pred(node);
4388 ir_node *new_pred = be_transform_node(pred);
4389 dbg_info *dbgi = get_irn_dbg_info(node);
4390 long proj = get_Proj_proj(node);
4392 if (proj == pn_be_SubSP_sp) {
4393 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4394 pn_ia32_AddSP_stack);
4395 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
4397 } else if (proj == pn_be_SubSP_M) {
4398 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4401 panic("No idea how to transform proj->SubSP");
4405 * Transform and renumber the Projs from a Load.
4407 static ir_node *gen_Proj_Load(ir_node *node)
4410 ir_node *pred = get_Proj_pred(node);
4411 dbg_info *dbgi = get_irn_dbg_info(node);
4412 long proj = get_Proj_proj(node);
4414 /* loads might be part of source address mode matches, so we don't
4415 * transform the ProjMs yet (with the exception of loads whose result is
4418 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4421 /* this is needed, because sometimes we have loops that are only
4422 reachable through the ProjM */
4423 be_enqueue_preds(node);
4424 /* do it in 2 steps, to silence firm verifier */
4425 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4426 set_Proj_proj(res, pn_ia32_mem);
4430 /* renumber the proj */
4431 new_pred = be_transform_node(pred);
4432 if (is_ia32_Load(new_pred)) {
4433 switch ((pn_Load)proj) {
4435 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4437 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4438 case pn_Load_X_except:
4439 /* This Load might raise an exception. Mark it. */
4440 set_ia32_exc_label(new_pred, 1);
4441 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_except);
4442 case pn_Load_X_regular:
4443 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_regular);
4445 } else if (is_ia32_Conv_I2I(new_pred)) {
4446 set_irn_mode(new_pred, mode_T);
4447 switch ((pn_Load)proj) {
4449 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4451 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4452 case pn_Load_X_except:
4453 /* This Load might raise an exception. Mark it. */
4454 set_ia32_exc_label(new_pred, 1);
4455 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_except);
4456 case pn_Load_X_regular:
4457 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Conv_I2I_X_regular);
4459 } else if (is_ia32_xLoad(new_pred)) {
4460 switch ((pn_Load)proj) {
4462 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4464 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4465 case pn_Load_X_except:
4466 /* This Load might raise an exception. Mark it. */
4467 set_ia32_exc_label(new_pred, 1);
4468 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_except);
4469 case pn_Load_X_regular:
4470 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_regular);
4472 } else if (is_ia32_fld(new_pred)) {
4473 switch ((pn_Load)proj) {
4475 return new_rd_Proj(dbgi, new_pred, mode_fp, pn_ia32_fld_res);
4477 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fld_M);
4478 case pn_Load_X_except:
4479 /* This Load might raise an exception. Mark it. */
4480 set_ia32_exc_label(new_pred, 1);
4481 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fld_X_except);
4482 case pn_Load_X_regular:
4483 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fld_X_regular);
4486 /* can happen for ProJMs when source address mode happened for the
4489 /* however it should not be the result proj, as that would mean the
4490 load had multiple users and should not have been used for
4492 if (proj != pn_Load_M) {
4493 panic("internal error: transformed node not a Load");
4495 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4498 panic("No idea how to transform Proj(Load) %+F", node);
4501 static ir_node *gen_Proj_Store(ir_node *node)
4503 ir_node *pred = get_Proj_pred(node);
4504 ir_node *new_pred = be_transform_node(pred);
4505 dbg_info *dbgi = get_irn_dbg_info(node);
4506 long pn = get_Proj_proj(node);
4508 if (is_ia32_Store(new_pred)) {
4509 switch ((pn_Store)pn) {
4511 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Store_M);
4512 case pn_Store_X_except:
4513 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_except);
4514 case pn_Store_X_regular:
4515 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Store_X_regular);
4517 } else if (is_ia32_fist(new_pred)) {
4518 switch ((pn_Store)pn) {
4520 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fist_M);
4521 case pn_Store_X_except:
4522 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fist_X_except);
4523 case pn_Store_X_regular:
4524 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fist_X_regular);
4526 } else if (is_ia32_fisttp(new_pred)) {
4527 switch ((pn_Store)pn) {
4529 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fisttp_M);
4530 case pn_Store_X_except:
4531 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fisttp_X_except);
4532 case pn_Store_X_regular:
4533 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fisttp_X_regular);
4535 } else if (is_ia32_fst(new_pred)) {
4536 switch ((pn_Store)pn) {
4538 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fst_M);
4539 case pn_Store_X_except:
4540 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fst_X_except);
4541 case pn_Store_X_regular:
4542 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_fst_X_regular);
4544 } else if (is_ia32_xStore(new_pred)) {
4545 switch ((pn_Store)pn) {
4547 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xStore_M);
4548 case pn_Store_X_except:
4549 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_except);
4550 case pn_Store_X_regular:
4551 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xStore_X_regular);
4553 } else if (is_Sync(new_pred)) {
4554 /* hack for the case that gen_float_const_Store produced a Sync */
4555 if (pn == pn_Store_M) {
4558 panic("exception control flow not implemented yet");
4559 } else if (get_ia32_op_type(new_pred) == ia32_AddrModeD) {
4560 /* destination address mode */
4561 if (pn == pn_Store_M) {
4564 panic("exception control flow for destination AM not implemented yet");
4567 panic("No idea how to transform Proj(Store) %+F", node);
4571 * Transform and renumber the Projs from a Div or Mod instruction.
4573 static ir_node *gen_Proj_Div(ir_node *node)
4575 ir_node *pred = get_Proj_pred(node);
4576 ir_node *new_pred = be_transform_node(pred);
4577 dbg_info *dbgi = get_irn_dbg_info(node);
4578 long proj = get_Proj_proj(node);
4580 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4581 assert((long)pn_ia32_Div_div_res == (long)pn_ia32_IDiv_div_res);
4583 switch ((pn_Div)proj) {
4585 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4586 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4587 } else if (is_ia32_xDiv(new_pred)) {
4588 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4589 } else if (is_ia32_fdiv(new_pred)) {
4590 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_fdiv_M);
4592 panic("Div transformed to unexpected thing %+F", new_pred);
4595 if (is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred)) {
4596 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4597 } else if (is_ia32_xDiv(new_pred)) {
4598 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4599 } else if (is_ia32_fdiv(new_pred)) {
4600 return new_rd_Proj(dbgi, new_pred, mode_fp, pn_ia32_fdiv_res);
4602 panic("Div transformed to unexpected thing %+F", new_pred);
4604 case pn_Div_X_except:
4605 set_ia32_exc_label(new_pred, 1);
4606 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4607 case pn_Div_X_regular:
4608 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4611 panic("No idea how to transform proj->Div");
4615 * Transform and renumber the Projs from a Div or Mod instruction.
4617 static ir_node *gen_Proj_Mod(ir_node *node)
4619 ir_node *pred = get_Proj_pred(node);
4620 ir_node *new_pred = be_transform_node(pred);
4621 dbg_info *dbgi = get_irn_dbg_info(node);
4622 long proj = get_Proj_proj(node);
4624 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4625 assert((long)pn_ia32_Div_M == (long)pn_ia32_IDiv_M);
4626 assert((long)pn_ia32_Div_mod_res == (long)pn_ia32_IDiv_mod_res);
4628 switch ((pn_Mod)proj) {
4630 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4632 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4633 case pn_Mod_X_except:
4634 set_ia32_exc_label(new_pred, 1);
4635 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_except);
4636 case pn_Mod_X_regular:
4637 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_regular);
4639 panic("No idea how to transform proj->Mod");
4643 * Transform and renumber the Projs from a CopyB.
4645 static ir_node *gen_Proj_CopyB(ir_node *node)
4647 ir_node *pred = get_Proj_pred(node);
4648 ir_node *new_pred = be_transform_node(pred);
4649 dbg_info *dbgi = get_irn_dbg_info(node);
4650 long proj = get_Proj_proj(node);
4652 switch ((pn_CopyB)proj) {
4654 if (is_ia32_CopyB_i(new_pred)) {
4655 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4656 } else if (is_ia32_CopyB(new_pred)) {
4657 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4660 case pn_CopyB_X_regular:
4661 if (is_ia32_CopyB_i(new_pred)) {
4662 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_regular);
4663 } else if (is_ia32_CopyB(new_pred)) {
4664 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_regular);
4667 case pn_CopyB_X_except:
4668 if (is_ia32_CopyB_i(new_pred)) {
4669 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_i_X_except);
4670 } else if (is_ia32_CopyB(new_pred)) {
4671 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_CopyB_X_except);
4676 panic("No idea how to transform proj->CopyB");
4679 static ir_node *gen_be_Call(ir_node *node)
4681 dbg_info *const dbgi = get_irn_dbg_info(node);
4682 ir_node *const src_block = get_nodes_block(node);
4683 ir_node *const block = be_transform_node(src_block);
4684 ir_node *const src_mem = get_irn_n(node, n_be_Call_mem);
4685 ir_node *const src_sp = get_irn_n(node, n_be_Call_sp);
4686 ir_node *const sp = be_transform_node(src_sp);
4687 ir_node *const src_ptr = get_irn_n(node, n_be_Call_ptr);
4688 ia32_address_mode_t am;
4689 ia32_address_t *const addr = &am.addr;
4694 ir_node * eax = noreg_GP;
4695 ir_node * ecx = noreg_GP;
4696 ir_node * edx = noreg_GP;
4697 unsigned const pop = be_Call_get_pop(node);
4698 ir_type *const call_tp = be_Call_get_type(node);
4699 int old_no_pic_adjust;
4700 int throws_exception = ir_throws_exception(node);
4702 /* Run the x87 simulator if the call returns a float value */
4703 if (get_method_n_ress(call_tp) > 0) {
4704 ir_type *const res_type = get_method_res_type(call_tp, 0);
4705 ir_mode *const res_mode = get_type_mode(res_type);
4707 if (res_mode != NULL && mode_is_float(res_mode)) {
4708 ir_graph *const irg = get_Block_irg(block);
4709 ia32_request_x87_sim(irg);
4713 /* We do not want be_Call direct calls */
4714 assert(be_Call_get_entity(node) == NULL);
4716 /* special case for PIC trampoline calls */
4717 old_no_pic_adjust = ia32_no_pic_adjust;
4718 ia32_no_pic_adjust = be_options.pic;
4720 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4721 match_am | match_immediate | match_upconv);
4723 ia32_no_pic_adjust = old_no_pic_adjust;
4725 i = get_irn_arity(node) - 1;
4726 fpcw = be_transform_node(get_irn_n(node, i--));
4727 for (; i >= n_be_Call_first_arg; --i) {
4728 arch_register_req_t const *const req
4729 = arch_get_irn_register_req_in(node, i);
4730 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4732 assert(req->type == arch_register_req_type_limited);
4733 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4735 switch (*req->limited) {
4736 case 1 << REG_GP_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4737 case 1 << REG_GP_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4738 case 1 << REG_GP_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4739 default: panic("Invalid GP register for register parameter");
4743 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4744 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4745 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4746 ir_set_throws_exception(call, throws_exception);
4747 set_am_attributes(call, &am);
4748 call = fix_mem_proj(call, &am);
4750 if (get_irn_pinned(node) == op_pin_state_pinned)
4751 set_irn_pinned(call, op_pin_state_pinned);
4753 SET_IA32_ORIG_NODE(call, node);
4755 if (ia32_cg_config.use_sse2) {
4756 /* remember this call for post-processing */
4757 ARR_APP1(ir_node *, call_list, call);
4758 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4765 * Transform Builtin trap
4767 static ir_node *gen_trap(ir_node *node)
4769 dbg_info *dbgi = get_irn_dbg_info(node);
4770 ir_node *block = be_transform_node(get_nodes_block(node));
4771 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4773 return new_bd_ia32_UD2(dbgi, block, mem);
4777 * Transform Builtin debugbreak
4779 static ir_node *gen_debugbreak(ir_node *node)
4781 dbg_info *dbgi = get_irn_dbg_info(node);
4782 ir_node *block = be_transform_node(get_nodes_block(node));
4783 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4785 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4789 * Transform Builtin return_address
4791 static ir_node *gen_return_address(ir_node *node)
4793 ir_node *param = get_Builtin_param(node, 0);
4794 ir_node *frame = get_Builtin_param(node, 1);
4795 dbg_info *dbgi = get_irn_dbg_info(node);
4796 ir_tarval *tv = get_Const_tarval(param);
4797 ir_graph *irg = get_irn_irg(node);
4798 unsigned long value = get_tarval_long(tv);
4800 ir_node *block = be_transform_node(get_nodes_block(node));
4801 ir_node *ptr = be_transform_node(frame);
4805 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4806 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4807 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4810 /* load the return address from this frame */
4811 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4813 set_irn_pinned(load, get_irn_pinned(node));
4814 set_ia32_op_type(load, ia32_AddrModeS);
4815 set_ia32_ls_mode(load, mode_Iu);
4817 set_ia32_am_offs_int(load, 0);
4818 set_ia32_use_frame(load);
4819 set_ia32_frame_ent(load, ia32_get_return_address_entity(irg));
4821 if (get_irn_pinned(node) == op_pin_state_floats) {
4822 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
4823 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
4824 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4825 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4828 SET_IA32_ORIG_NODE(load, node);
4829 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4833 * Transform Builtin frame_address
4835 static ir_node *gen_frame_address(ir_node *node)
4837 ir_node *param = get_Builtin_param(node, 0);
4838 ir_node *frame = get_Builtin_param(node, 1);
4839 dbg_info *dbgi = get_irn_dbg_info(node);
4840 ir_tarval *tv = get_Const_tarval(param);
4841 ir_graph *irg = get_irn_irg(node);
4842 unsigned long value = get_tarval_long(tv);
4844 ir_node *block = be_transform_node(get_nodes_block(node));
4845 ir_node *ptr = be_transform_node(frame);
4850 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4851 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4852 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4855 /* load the frame address from this frame */
4856 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4858 set_irn_pinned(load, get_irn_pinned(node));
4859 set_ia32_op_type(load, ia32_AddrModeS);
4860 set_ia32_ls_mode(load, mode_Iu);
4862 ent = ia32_get_frame_address_entity(irg);
4864 set_ia32_am_offs_int(load, 0);
4865 set_ia32_use_frame(load);
4866 set_ia32_frame_ent(load, ent);
4868 /* will fail anyway, but gcc does this: */
4869 set_ia32_am_offs_int(load, 0);
4872 if (get_irn_pinned(node) == op_pin_state_floats) {
4873 assert((int)pn_ia32_xLoad_res == (int)pn_ia32_fld_res
4874 && (int)pn_ia32_fld_res == (int)pn_ia32_Load_res
4875 && (int)pn_ia32_Load_res == (int)pn_ia32_res);
4876 arch_add_irn_flags(load, arch_irn_flags_rematerializable);
4879 SET_IA32_ORIG_NODE(load, node);
4880 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4884 * Transform Builtin frame_address
4886 static ir_node *gen_prefetch(ir_node *node)
4889 ir_node *ptr, *block, *mem, *base, *idx;
4890 ir_node *param, *new_node;
4893 ia32_address_t addr;
4895 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4896 /* no prefetch at all, route memory */
4897 return be_transform_node(get_Builtin_mem(node));
4900 param = get_Builtin_param(node, 1);
4901 tv = get_Const_tarval(param);
4902 rw = get_tarval_long(tv);
4904 /* construct load address */
4905 memset(&addr, 0, sizeof(addr));
4906 ptr = get_Builtin_param(node, 0);
4907 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
4914 base = be_transform_node(base);
4920 idx = be_transform_node(idx);
4923 dbgi = get_irn_dbg_info(node);
4924 block = be_transform_node(get_nodes_block(node));
4925 mem = be_transform_node(get_Builtin_mem(node));
4927 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4928 /* we have 3DNow!, this was already checked above */
4929 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, idx, mem);
4930 } else if (ia32_cg_config.use_sse_prefetch) {
4931 /* note: rw == 1 is IGNORED in that case */
4932 param = get_Builtin_param(node, 2);
4933 tv = get_Const_tarval(param);
4934 locality = get_tarval_long(tv);
4936 /* SSE style prefetch */
4939 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, idx, mem);
4942 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, idx, mem);
4945 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, idx, mem);
4948 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, idx, mem);
4952 assert(ia32_cg_config.use_3dnow_prefetch);
4953 /* 3DNow! style prefetch */
4954 new_node = new_bd_ia32_Prefetch(dbgi, block, base, idx, mem);
4957 set_irn_pinned(new_node, get_irn_pinned(node));
4958 set_ia32_op_type(new_node, ia32_AddrModeS);
4959 set_ia32_ls_mode(new_node, mode_Bu);
4960 set_address(new_node, &addr);
4962 SET_IA32_ORIG_NODE(new_node, node);
4964 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4968 * Transform bsf like node
4970 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4972 ir_node *param = get_Builtin_param(node, 0);
4973 dbg_info *dbgi = get_irn_dbg_info(node);
4975 ir_node *block = get_nodes_block(node);
4976 ir_node *new_block = be_transform_node(block);
4978 ia32_address_mode_t am;
4979 ia32_address_t *addr = &am.addr;
4982 match_arguments(&am, block, NULL, param, NULL, match_am);
4984 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4985 set_am_attributes(cnt, &am);
4986 set_ia32_ls_mode(cnt, get_irn_mode(param));
4988 SET_IA32_ORIG_NODE(cnt, node);
4989 return fix_mem_proj(cnt, &am);
4993 * Transform builtin ffs.
4995 static ir_node *gen_ffs(ir_node *node)
4997 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4998 ir_node *real = skip_Proj(bsf);
4999 dbg_info *dbgi = get_irn_dbg_info(real);
5000 ir_node *block = get_nodes_block(real);
5001 ir_node *flag, *set, *conv, *neg, *orn, *add;
5004 if (get_irn_mode(real) != mode_T) {
5005 set_irn_mode(real, mode_T);
5006 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5009 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5012 set = new_bd_ia32_Setcc(dbgi, block, flag, ia32_cc_equal);
5013 SET_IA32_ORIG_NODE(set, node);
5016 conv = new_bd_ia32_Conv_I2I_8bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5017 SET_IA32_ORIG_NODE(conv, node);
5020 neg = new_bd_ia32_Neg(dbgi, block, conv);
5023 orn = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5024 set_ia32_ls_mode(orn, mode_Iu);
5025 set_ia32_commutative(orn);
5028 add = new_bd_ia32_Lea(dbgi, block, orn, noreg_GP);
5029 add_ia32_am_offs_int(add, 1);
5034 * Transform builtin clz.
5036 static ir_node *gen_clz(ir_node *node)
5038 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5039 ir_node *real = skip_Proj(bsr);
5040 dbg_info *dbgi = get_irn_dbg_info(real);
5041 ir_node *block = get_nodes_block(real);
5042 ir_graph *irg = get_Block_irg(block);
5043 ir_node *imm = ia32_create_Immediate(irg, NULL, 0, 31);
5045 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5049 * Transform builtin ctz.
5051 static ir_node *gen_ctz(ir_node *node)
5053 return gen_unop_AM(node, new_bd_ia32_Bsf);
5057 * Transform builtin parity.
5059 static ir_node *gen_parity(ir_node *node)
5061 dbg_info *dbgi = get_irn_dbg_info(node);
5062 ir_node *block = get_nodes_block(node);
5063 ir_node *new_block = be_transform_node(block);
5064 ir_node *param = get_Builtin_param(node, 0);
5065 ir_node *new_param = be_transform_node(param);
5068 /* the x86 parity bit is stupid: it only looks at the lowest byte,
5069 * so we have to do complicated xoring first.
5070 * (we should also better lower this before the backend so we still have a
5071 * chance for CSE, constant folding and other goodies for some of these
5074 ir_graph *const irg = get_Block_irg(new_block);
5075 ir_node *const count = ia32_create_Immediate(irg, NULL, 0, 16);
5076 ir_node *const shr = new_bd_ia32_Shr(dbgi, new_block, new_param, count);
5077 ir_node *const xorn = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP, nomem, shr, new_param);
5078 ir_node *const xor2 = new_bd_ia32_XorHighLow(dbgi, new_block, xorn);
5081 set_ia32_ls_mode(xorn, mode_Iu);
5082 set_ia32_commutative(xorn);
5084 set_irn_mode(xor2, mode_T);
5085 flags = new_r_Proj(xor2, mode_Iu, pn_ia32_XorHighLow_flags);
5088 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, ia32_cc_not_parity);
5089 SET_IA32_ORIG_NODE(new_node, node);
5092 new_node = new_bd_ia32_Conv_I2I_8bit(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, mode_Bu);
5093 SET_IA32_ORIG_NODE(new_node, node);
5098 * Transform builtin popcount
5100 static ir_node *gen_popcount(ir_node *node)
5102 ir_node *param = get_Builtin_param(node, 0);
5103 dbg_info *dbgi = get_irn_dbg_info(node);
5105 ir_node *block = get_nodes_block(node);
5106 ir_node *new_block = be_transform_node(block);
5109 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5111 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5112 if (ia32_cg_config.use_popcnt) {
5113 ia32_address_mode_t am;
5114 ia32_address_t *addr = &am.addr;
5117 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am | match_upconv);
5119 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5120 set_am_attributes(cnt, &am);
5121 set_ia32_ls_mode(cnt, get_irn_mode(param));
5123 SET_IA32_ORIG_NODE(cnt, node);
5124 return fix_mem_proj(cnt, &am);
5127 new_param = be_transform_node(param);
5129 /* do the standard popcount algo */
5130 /* TODO: This is stupid, we should transform this before the backend,
5131 * to get CSE, localopts, etc. for the operations
5132 * TODO: This is also not the optimal algorithm (it is just the starting
5133 * example in hackers delight, they optimize it more on the following page)
5134 * But I'm too lazy to fix this now, as the code should get lowered before
5135 * the backend anyway.
5137 ir_graph *const irg = get_Block_irg(new_block);
5139 /* m1 = x & 0x55555555 */
5140 imm = ia32_create_Immediate(irg, NULL, 0, 0x55555555);
5141 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5144 simm = ia32_create_Immediate(irg, NULL, 0, 1);
5145 s1 = new_bd_ia32_Shr(dbgi, new_block, new_param, simm);
5147 /* m2 = s1 & 0x55555555 */
5148 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5151 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5153 /* m4 = m3 & 0x33333333 */
5154 imm = ia32_create_Immediate(irg, NULL, 0, 0x33333333);
5155 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5158 simm = ia32_create_Immediate(irg, NULL, 0, 2);
5159 s2 = new_bd_ia32_Shr(dbgi, new_block, m3, simm);
5161 /* m5 = s2 & 0x33333333 */
5162 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5165 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5167 /* m7 = m6 & 0x0F0F0F0F */
5168 imm = ia32_create_Immediate(irg, NULL, 0, 0x0F0F0F0F);
5169 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5172 simm = ia32_create_Immediate(irg, NULL, 0, 4);
5173 s3 = new_bd_ia32_Shr(dbgi, new_block, m6, simm);
5175 /* m8 = s3 & 0x0F0F0F0F */
5176 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5179 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5181 /* m10 = m9 & 0x00FF00FF */
5182 imm = ia32_create_Immediate(irg, NULL, 0, 0x00FF00FF);
5183 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5186 simm = ia32_create_Immediate(irg, NULL, 0, 8);
5187 s4 = new_bd_ia32_Shr(dbgi, new_block, m9, simm);
5189 /* m11 = s4 & 0x00FF00FF */
5190 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5192 /* m12 = m10 + m11 */
5193 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5195 /* m13 = m12 & 0x0000FFFF */
5196 imm = ia32_create_Immediate(irg, NULL, 0, 0x0000FFFF);
5197 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5199 /* s5 = m12 >> 16 */
5200 simm = ia32_create_Immediate(irg, NULL, 0, 16);
5201 s5 = new_bd_ia32_Shr(dbgi, new_block, m12, simm);
5203 /* res = m13 + s5 */
5204 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5208 * Transform builtin byte swap.
5210 static ir_node *gen_bswap(ir_node *node)
5212 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5213 dbg_info *dbgi = get_irn_dbg_info(node);
5215 ir_node *block = get_nodes_block(node);
5216 ir_node *new_block = be_transform_node(block);
5217 ir_mode *mode = get_irn_mode(param);
5218 unsigned size = get_mode_size_bits(mode);
5222 if (ia32_cg_config.use_bswap) {
5223 /* swap available */
5224 return new_bd_ia32_Bswap(dbgi, new_block, param);
5226 ir_graph *const irg = get_Block_irg(new_block);
5227 ir_node *const i8 = ia32_create_Immediate(irg, NULL, 0, 8);
5228 ir_node *const rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
5229 ir_node *const i16 = ia32_create_Immediate(irg, NULL, 0, 16);
5230 ir_node *const rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
5231 ir_node *const rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
5232 set_ia32_ls_mode(rol1, mode_Hu);
5233 set_ia32_ls_mode(rol2, mode_Iu);
5234 set_ia32_ls_mode(rol3, mode_Hu);
5239 /* swap16 always available */
5240 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5243 panic("Invalid bswap size (%d)", size);
5248 * Transform builtin outport.
5250 static ir_node *gen_outport(ir_node *node)
5252 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0));
5253 ir_node *oldv = get_Builtin_param(node, 1);
5254 ir_mode *mode = get_irn_mode(oldv);
5255 ir_node *value = be_transform_node(oldv);
5256 ir_node *block = be_transform_node(get_nodes_block(node));
5257 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5258 dbg_info *dbgi = get_irn_dbg_info(node);
5260 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5261 set_ia32_ls_mode(res, mode);
5266 * Transform builtin inport.
5268 static ir_node *gen_inport(ir_node *node)
5270 ir_type *tp = get_Builtin_type(node);
5271 ir_type *rstp = get_method_res_type(tp, 0);
5272 ir_mode *mode = get_type_mode(rstp);
5273 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0));
5274 ir_node *block = be_transform_node(get_nodes_block(node));
5275 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5276 dbg_info *dbgi = get_irn_dbg_info(node);
5278 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5279 set_ia32_ls_mode(res, mode);
5281 /* check for missing Result Proj */
5286 * Transform a builtin inner trampoline
5288 static ir_node *gen_inner_trampoline(ir_node *node)
5290 ir_node *ptr = get_Builtin_param(node, 0);
5291 ir_node *callee = get_Builtin_param(node, 1);
5292 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5293 ir_node *mem = get_Builtin_mem(node);
5294 ir_node *block = get_nodes_block(node);
5295 ir_node *new_block = be_transform_node(block);
5299 ir_node *trampoline;
5301 dbg_info *dbgi = get_irn_dbg_info(node);
5302 ia32_address_t addr;
5304 /* construct store address */
5305 memset(&addr, 0, sizeof(addr));
5306 ia32_create_address_mode(&addr, ptr, ia32_create_am_normal);
5308 if (addr.base == NULL) {
5309 addr.base = noreg_GP;
5311 addr.base = be_transform_node(addr.base);
5314 if (addr.index == NULL) {
5315 addr.index = noreg_GP;
5317 addr.index = be_transform_node(addr.index);
5319 addr.mem = be_transform_node(mem);
5321 ir_graph *const irg = get_Block_irg(new_block);
5322 /* mov ecx, <env> */
5323 val = ia32_create_Immediate(irg, NULL, 0, 0xB9);
5324 store = new_bd_ia32_Store_8bit(dbgi, new_block, addr.base, addr.index, addr.mem, val);
5325 set_irn_pinned(store, get_irn_pinned(node));
5326 set_ia32_op_type(store, ia32_AddrModeD);
5327 set_ia32_ls_mode(store, mode_Bu);
5328 set_address(store, &addr);
5332 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5333 addr.index, addr.mem, env);
5334 set_irn_pinned(store, get_irn_pinned(node));
5335 set_ia32_op_type(store, ia32_AddrModeD);
5336 set_ia32_ls_mode(store, mode_Iu);
5337 set_address(store, &addr);
5341 /* jmp rel <callee> */
5342 val = ia32_create_Immediate(irg, NULL, 0, 0xE9);
5343 store = new_bd_ia32_Store_8bit(dbgi, new_block, addr.base, addr.index, addr.mem, val);
5344 set_irn_pinned(store, get_irn_pinned(node));
5345 set_ia32_op_type(store, ia32_AddrModeD);
5346 set_ia32_ls_mode(store, mode_Bu);
5347 set_address(store, &addr);
5351 trampoline = be_transform_node(ptr);
5353 /* the callee is typically an immediate */
5354 if (is_SymConst(callee)) {
5355 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5357 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), noreg_GP);
5358 add_ia32_am_offs_int(rel, -10);
5360 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5362 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5363 addr.index, addr.mem, rel);
5364 set_irn_pinned(store, get_irn_pinned(node));
5365 set_ia32_op_type(store, ia32_AddrModeD);
5366 set_ia32_ls_mode(store, mode_Iu);
5367 set_address(store, &addr);
5372 return new_r_Tuple(new_block, 2, in);
5376 * Transform Builtin node.
5378 static ir_node *gen_Builtin(ir_node *node)
5380 ir_builtin_kind kind = get_Builtin_kind(node);
5384 return gen_trap(node);
5385 case ir_bk_debugbreak:
5386 return gen_debugbreak(node);
5387 case ir_bk_return_address:
5388 return gen_return_address(node);
5389 case ir_bk_frame_address:
5390 return gen_frame_address(node);
5391 case ir_bk_prefetch:
5392 return gen_prefetch(node);
5394 return gen_ffs(node);
5396 return gen_clz(node);
5398 return gen_ctz(node);
5400 return gen_parity(node);
5401 case ir_bk_popcount:
5402 return gen_popcount(node);
5404 return gen_bswap(node);
5406 return gen_outport(node);
5408 return gen_inport(node);
5409 case ir_bk_inner_trampoline:
5410 return gen_inner_trampoline(node);
5412 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5416 * Transform Proj(Builtin) node.
5418 static ir_node *gen_Proj_Builtin(ir_node *proj)
5420 ir_node *node = get_Proj_pred(proj);
5421 ir_node *new_node = be_transform_node(node);
5422 ir_builtin_kind kind = get_Builtin_kind(node);
5425 case ir_bk_return_address:
5426 case ir_bk_frame_address:
5431 case ir_bk_popcount:
5433 assert(get_Proj_proj(proj) == pn_Builtin_max+1);
5436 case ir_bk_debugbreak:
5437 case ir_bk_prefetch:
5439 assert(get_Proj_proj(proj) == pn_Builtin_M);
5442 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5443 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5445 assert(get_Proj_proj(proj) == pn_Builtin_M);
5446 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5448 case ir_bk_inner_trampoline:
5449 if (get_Proj_proj(proj) == pn_Builtin_max+1) {
5450 return get_Tuple_pred(new_node, 1);
5452 assert(get_Proj_proj(proj) == pn_Builtin_M);
5453 return get_Tuple_pred(new_node, 0);
5456 panic("Builtin %s not implemented", get_builtin_kind_name(kind));
5459 static ir_node *gen_be_IncSP(ir_node *node)
5461 ir_node *res = be_duplicate_node(node);
5462 arch_add_irn_flags(res, arch_irn_flags_modify_flags);
5468 * Transform the Projs from a be_Call.
5470 static ir_node *gen_Proj_be_Call(ir_node *node)
5472 ir_node *call = get_Proj_pred(node);
5473 ir_node *new_call = be_transform_node(call);
5474 dbg_info *dbgi = get_irn_dbg_info(node);
5475 long proj = get_Proj_proj(node);
5476 ir_mode *mode = get_irn_mode(node);
5479 if (proj == pn_be_Call_M) {
5480 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5482 /* transform call modes */
5483 if (mode_is_data(mode)) {
5484 const arch_register_class_t *cls = arch_get_irn_reg_class(node);
5488 /* Map from be_Call to ia32_Call proj number */
5489 if (proj == pn_be_Call_sp) {
5490 proj = pn_ia32_Call_stack;
5491 } else if (proj == pn_be_Call_M) {
5492 proj = pn_ia32_Call_M;
5493 } else if (proj == pn_be_Call_X_except) {
5494 proj = pn_ia32_Call_X_except;
5495 } else if (proj == pn_be_Call_X_regular) {
5496 proj = pn_ia32_Call_X_regular;
5498 arch_register_req_t const *const req = arch_get_irn_register_req(node);
5500 assert(proj >= pn_be_Call_first_res);
5501 assert(arch_register_req_is(req, limited));
5503 be_foreach_out(new_call, i) {
5504 arch_register_req_t const *const new_req = arch_get_irn_register_req_out(new_call, i);
5505 if (!arch_register_req_is(new_req, limited) ||
5506 new_req->cls != req->cls ||
5507 *new_req->limited != *req->limited)
5513 panic("no matching out requirement found");
5517 res = new_rd_Proj(dbgi, new_call, mode, proj);
5519 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5521 case pn_ia32_Call_stack:
5522 arch_set_irn_register(res, &ia32_registers[REG_ESP]);
5525 case pn_ia32_Call_fpcw:
5526 arch_set_irn_register(res, &ia32_registers[REG_FPCW]);
5533 static ir_node *gen_Proj_ASM(ir_node *node)
5535 ir_mode *mode = get_irn_mode(node);
5536 ir_node *pred = get_Proj_pred(node);
5537 ir_node *new_pred = be_transform_node(pred);
5538 long pos = get_Proj_proj(node);
5540 if (mode == mode_M) {
5541 pos = arch_get_irn_n_outs(new_pred)-1;
5542 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5544 } else if (mode_is_float(mode)) {
5547 panic("unexpected proj mode at ASM");
5550 return new_r_Proj(new_pred, mode, pos);
5554 * Transform and potentially renumber Proj nodes.
5556 static ir_node *gen_Proj(ir_node *node)
5558 ir_node *pred = get_Proj_pred(node);
5561 switch (get_irn_opcode(pred)) {
5563 return gen_Proj_Load(node);
5565 return gen_Proj_Store(node);
5567 return gen_Proj_ASM(node);
5569 return gen_Proj_Builtin(node);
5571 return gen_Proj_Div(node);
5573 return gen_Proj_Mod(node);
5575 return gen_Proj_CopyB(node);
5577 return gen_Proj_be_SubSP(node);
5579 return gen_Proj_be_AddSP(node);
5581 return gen_Proj_be_Call(node);
5583 proj = get_Proj_proj(node);
5585 case pn_Start_X_initial_exec: {
5586 ir_node *block = get_nodes_block(pred);
5587 ir_node *new_block = be_transform_node(block);
5588 dbg_info *dbgi = get_irn_dbg_info(node);
5589 /* we exchange the ProjX with a jump */
5590 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5598 if (is_ia32_l_FloattoLL(pred)) {
5599 return gen_Proj_l_FloattoLL(node);
5601 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5605 ir_mode *mode = get_irn_mode(node);
5606 if (ia32_mode_needs_gp_reg(mode)) {
5607 ir_node *new_pred = be_transform_node(pred);
5608 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5609 get_Proj_proj(node));
5610 new_proj->node_nr = node->node_nr;
5615 return be_duplicate_node(node);
5619 * Enters all transform functions into the generic pointer
5621 static void register_transformers(void)
5623 /* first clear the generic function pointer for all ops */
5624 be_start_transform_setup();
5626 be_set_transform_function(op_Add, gen_Add);
5627 be_set_transform_function(op_And, gen_And);
5628 be_set_transform_function(op_ASM, ia32_gen_ASM);
5629 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5630 be_set_transform_function(op_be_Call, gen_be_Call);
5631 be_set_transform_function(op_be_Copy, gen_be_Copy);
5632 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5633 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5634 be_set_transform_function(op_be_Return, gen_be_Return);
5635 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5636 be_set_transform_function(op_Builtin, gen_Builtin);
5637 be_set_transform_function(op_Cmp, gen_Cmp);
5638 be_set_transform_function(op_Cond, gen_Cond);
5639 be_set_transform_function(op_Const, gen_Const);
5640 be_set_transform_function(op_Conv, gen_Conv);
5641 be_set_transform_function(op_CopyB, ia32_gen_CopyB);
5642 be_set_transform_function(op_Div, gen_Div);
5643 be_set_transform_function(op_Eor, gen_Eor);
5644 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5645 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5646 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5647 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5648 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5649 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5650 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5651 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5652 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5653 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5654 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5655 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5656 be_set_transform_function(op_ia32_NoReg_FP, be_duplicate_node);
5657 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5658 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5659 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5660 be_set_transform_function(op_IJmp, gen_IJmp);
5661 be_set_transform_function(op_Jmp, gen_Jmp);
5662 be_set_transform_function(op_Load, gen_Load);
5663 be_set_transform_function(op_Minus, gen_Minus);
5664 be_set_transform_function(op_Mod, gen_Mod);
5665 be_set_transform_function(op_Mul, gen_Mul);
5666 be_set_transform_function(op_Mulh, gen_Mulh);
5667 be_set_transform_function(op_Mux, gen_Mux);
5668 be_set_transform_function(op_Not, gen_Not);
5669 be_set_transform_function(op_Or, gen_Or);
5670 be_set_transform_function(op_Phi, gen_Phi);
5671 be_set_transform_function(op_Proj, gen_Proj);
5672 be_set_transform_function(op_Rotl, gen_Rotl);
5673 be_set_transform_function(op_Shl, gen_Shl);
5674 be_set_transform_function(op_Shr, gen_Shr);
5675 be_set_transform_function(op_Shrs, gen_Shrs);
5676 be_set_transform_function(op_Store, gen_Store);
5677 be_set_transform_function(op_Sub, gen_Sub);
5678 be_set_transform_function(op_Switch, gen_Switch);
5679 be_set_transform_function(op_SymConst, gen_SymConst);
5680 be_set_transform_function(op_Unknown, ia32_gen_Unknown);
5682 be_set_upper_bits_clean_function(op_Mux, ia32_mux_upper_bits_clean);
5686 * Pre-transform all unknown and noreg nodes.
5688 static void ia32_pretransform_node(void)
5690 ir_graph *irg = current_ir_graph;
5691 ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
5693 irg_data->noreg_gp = be_pre_transform_node(irg_data->noreg_gp);
5694 irg_data->noreg_fp = be_pre_transform_node(irg_data->noreg_fp);
5695 irg_data->noreg_xmm = be_pre_transform_node(irg_data->noreg_xmm);
5696 irg_data->get_eip = be_pre_transform_node(irg_data->get_eip);
5697 irg_data->fpu_trunc_mode = be_pre_transform_node(irg_data->fpu_trunc_mode);
5699 nomem = get_irg_no_mem(irg);
5700 noreg_GP = ia32_new_NoReg_gp(irg);
5704 * Post-process all calls if we are in SSE mode.
5705 * The ABI requires that the results are in st0, copy them
5706 * to a xmm register.
5708 static void postprocess_fp_call_results(void)
5712 for (i = 0, n = ARR_LEN(call_list); i < n; ++i) {
5713 ir_node *call = call_list[i];
5714 ir_type *mtp = call_types[i];
5717 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5718 ir_type *res_tp = get_method_res_type(mtp, j);
5719 ir_node *res, *new_res;
5722 if (! is_atomic_type(res_tp)) {
5723 /* no floating point return */
5726 res_mode = get_type_mode(res_tp);
5727 if (! mode_is_float(res_mode)) {
5728 /* no floating point return */
5732 res = be_get_Proj_for_pn(call, pn_ia32_Call_st0 + j);
5735 /* now patch the users */
5736 foreach_out_edge_safe(res, edge) {
5737 ir_node *succ = get_edge_src_irn(edge);
5740 if (be_is_Keep(succ))
5743 if (is_ia32_xStore(succ)) {
5744 /* an xStore can be patched into an vfst */
5745 dbg_info *db = get_irn_dbg_info(succ);
5746 ir_node *block = get_nodes_block(succ);
5747 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5748 ir_node *idx = get_irn_n(succ, n_ia32_xStore_index);
5749 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5750 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5751 ir_mode *mode = get_ia32_ls_mode(succ);
5753 ir_node *st = new_bd_ia32_fst(db, block, base, idx, mem, value, mode);
5754 //ir_node *mem = new_r_Proj(st, mode_M, pn_ia32_fst_M);
5755 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5756 if (is_ia32_use_frame(succ))
5757 set_ia32_use_frame(st);
5758 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5759 set_irn_pinned(st, get_irn_pinned(succ));
5760 set_ia32_op_type(st, ia32_AddrModeD);
5762 assert((long)pn_ia32_xStore_M == (long)pn_ia32_fst_M);
5763 assert((long)pn_ia32_xStore_X_regular == (long)pn_ia32_fst_X_regular);
5764 assert((long)pn_ia32_xStore_X_except == (long)pn_ia32_fst_X_except);
5771 if (new_res == NULL) {
5772 dbg_info *db = get_irn_dbg_info(call);
5773 ir_node *block = get_nodes_block(call);
5774 ir_node *frame = get_irg_frame(current_ir_graph);
5775 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5776 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5777 ir_node *vfst, *xld, *new_mem;
5780 /* store st(0) on stack */
5781 vfst = new_bd_ia32_fst(db, block, frame, noreg_GP, call_mem,
5783 set_ia32_op_type(vfst, ia32_AddrModeD);
5784 set_ia32_use_frame(vfst);
5785 arch_add_irn_flags(vfst, arch_irn_flags_spill);
5787 vfst_mem = new_r_Proj(vfst, mode_M, pn_ia32_fst_M);
5789 /* load into SSE register */
5790 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst_mem,
5792 set_ia32_op_type(xld, ia32_AddrModeS);
5793 set_ia32_use_frame(xld);
5795 new_res = new_r_Proj(xld, res_mode, pn_ia32_xLoad_res);
5796 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5798 if (old_mem != NULL) {
5799 edges_reroute(old_mem, new_mem);
5803 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5809 /* do the transformation */
5810 void ia32_transform_graph(ir_graph *irg)
5814 register_transformers();
5815 initial_fpcw = NULL;
5816 ia32_no_pic_adjust = 0;
5818 old_initial_fpcw = be_get_initial_reg_value(irg, &ia32_registers[REG_FPCW]);
5820 be_timer_push(T_HEIGHTS);
5821 ia32_heights = heights_new(irg);
5822 be_timer_pop(T_HEIGHTS);
5823 ia32_calculate_non_address_mode_nodes(irg);
5825 /* the transform phase is not safe for CSE (yet) because several nodes get
5826 * attributes set after their creation */
5827 cse_last = get_opt_cse();
5830 call_list = NEW_ARR_F(ir_node *, 0);
5831 call_types = NEW_ARR_F(ir_type *, 0);
5832 be_transform_graph(irg, ia32_pretransform_node);
5834 if (ia32_cg_config.use_sse2)
5835 postprocess_fp_call_results();
5836 DEL_ARR_F(call_types);
5837 DEL_ARR_F(call_list);
5839 set_opt_cse(cse_last);
5841 ia32_free_non_address_mode_nodes();
5842 heights_free(ia32_heights);
5843 ia32_heights = NULL;
5846 void ia32_init_transform(void)
5848 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");