2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * return NoREG or pic_base in case of PIC.
200 * This is necessary as base address for newly created symbols
202 static ir_node *get_symconst_base(void)
204 if (env_cg->birg->main_env->options->pic) {
205 return arch_code_generator_get_pic_base(env_cg);
212 * Transforms a Const.
214 static ir_node *gen_Const(ir_node *node)
216 ir_node *old_block = get_nodes_block(node);
217 ir_node *block = be_transform_node(old_block);
218 dbg_info *dbgi = get_irn_dbg_info(node);
219 ir_mode *mode = get_irn_mode(node);
221 assert(is_Const(node));
223 if (mode_is_float(mode)) {
229 if (ia32_cg_config.use_sse2) {
230 tarval *tv = get_Const_tarval(node);
231 if (tarval_is_null(tv)) {
232 load = new_bd_ia32_xZero(dbgi, block);
233 set_ia32_ls_mode(load, mode);
235 #ifdef CONSTRUCT_SSE_CONST
236 } else if (tarval_is_one(tv)) {
237 int cnst = mode == mode_F ? 26 : 55;
238 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
239 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
240 ir_node *pslld, *psrld;
242 load = new_bd_ia32_xAllOnes(dbgi, block);
243 set_ia32_ls_mode(load, mode);
244 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
245 set_ia32_ls_mode(pslld, mode);
246 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
247 set_ia32_ls_mode(psrld, mode);
249 #endif /* CONSTRUCT_SSE_CONST */
250 } else if (mode == mode_F) {
251 /* we can place any 32bit constant by using a movd gp, sse */
252 unsigned val = get_tarval_sub_bits(tv, 0) |
253 (get_tarval_sub_bits(tv, 1) << 8) |
254 (get_tarval_sub_bits(tv, 2) << 16) |
255 (get_tarval_sub_bits(tv, 3) << 24);
256 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
257 load = new_bd_ia32_xMovd(dbgi, block, cnst);
258 set_ia32_ls_mode(load, mode);
261 #ifdef CONSTRUCT_SSE_CONST
262 if (mode == mode_D) {
263 unsigned val = get_tarval_sub_bits(tv, 0) |
264 (get_tarval_sub_bits(tv, 1) << 8) |
265 (get_tarval_sub_bits(tv, 2) << 16) |
266 (get_tarval_sub_bits(tv, 3) << 24);
268 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
269 ir_node *cnst, *psllq;
271 /* fine, lower 32bit are zero, produce 32bit value */
272 val = get_tarval_sub_bits(tv, 4) |
273 (get_tarval_sub_bits(tv, 5) << 8) |
274 (get_tarval_sub_bits(tv, 6) << 16) |
275 (get_tarval_sub_bits(tv, 7) << 24);
276 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
277 load = new_bd_ia32_xMovd(dbgi, block, cnst);
278 set_ia32_ls_mode(load, mode);
279 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
280 set_ia32_ls_mode(psllq, mode);
285 #endif /* CONSTRUCT_SSE_CONST */
286 floatent = create_float_const_entity(node);
288 base = get_symconst_base();
289 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
291 set_ia32_op_type(load, ia32_AddrModeS);
292 set_ia32_am_sc(load, floatent);
293 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
294 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
297 if (is_Const_null(node)) {
298 load = new_bd_ia32_vfldz(dbgi, block);
300 set_ia32_ls_mode(load, mode);
301 } else if (is_Const_one(node)) {
302 load = new_bd_ia32_vfld1(dbgi, block);
304 set_ia32_ls_mode(load, mode);
309 floatent = create_float_const_entity(node);
310 /* create_float_const_ent is smart and sometimes creates
312 ls_mode = get_type_mode(get_entity_type(floatent));
313 base = get_symconst_base();
314 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
316 set_ia32_op_type(load, ia32_AddrModeS);
317 set_ia32_am_sc(load, floatent);
318 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
319 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
322 #ifdef CONSTRUCT_SSE_CONST
324 #endif /* CONSTRUCT_SSE_CONST */
325 SET_IA32_ORIG_NODE(load, node);
327 be_dep_on_frame(load);
329 } else { /* non-float mode */
331 tarval *tv = get_Const_tarval(node);
334 tv = tarval_convert_to(tv, mode_Iu);
336 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
338 panic("couldn't convert constant tarval (%+F)", node);
340 val = get_tarval_long(tv);
342 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
343 SET_IA32_ORIG_NODE(cnst, node);
345 be_dep_on_frame(cnst);
351 * Transforms a SymConst.
353 static ir_node *gen_SymConst(ir_node *node)
355 ir_node *old_block = get_nodes_block(node);
356 ir_node *block = be_transform_node(old_block);
357 dbg_info *dbgi = get_irn_dbg_info(node);
358 ir_mode *mode = get_irn_mode(node);
361 if (mode_is_float(mode)) {
362 if (ia32_cg_config.use_sse2)
363 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
366 set_ia32_am_sc(cnst, get_SymConst_entity(node));
367 set_ia32_use_frame(cnst);
371 if (get_SymConst_kind(node) != symconst_addr_ent) {
372 panic("backend only support symconst_addr_ent (at %+F)", node);
374 entity = get_SymConst_entity(node);
375 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
378 SET_IA32_ORIG_NODE(cnst, node);
380 be_dep_on_frame(cnst);
385 * Create a float type for the given mode and cache it.
387 * @param mode the mode for the float type (might be integer mode for SSE2 types)
388 * @param align alignment
390 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
396 if (mode == mode_Iu) {
397 static ir_type *int_Iu[16] = {NULL, };
399 if (int_Iu[align] == NULL) {
400 int_Iu[align] = tp = new_type_primitive(mode);
401 /* set the specified alignment */
402 set_type_alignment_bytes(tp, align);
404 return int_Iu[align];
405 } else if (mode == mode_Lu) {
406 static ir_type *int_Lu[16] = {NULL, };
408 if (int_Lu[align] == NULL) {
409 int_Lu[align] = tp = new_type_primitive(mode);
410 /* set the specified alignment */
411 set_type_alignment_bytes(tp, align);
413 return int_Lu[align];
414 } else if (mode == mode_F) {
415 static ir_type *float_F[16] = {NULL, };
417 if (float_F[align] == NULL) {
418 float_F[align] = tp = new_type_primitive(mode);
419 /* set the specified alignment */
420 set_type_alignment_bytes(tp, align);
422 return float_F[align];
423 } else if (mode == mode_D) {
424 static ir_type *float_D[16] = {NULL, };
426 if (float_D[align] == NULL) {
427 float_D[align] = tp = new_type_primitive(mode);
428 /* set the specified alignment */
429 set_type_alignment_bytes(tp, align);
431 return float_D[align];
433 static ir_type *float_E[16] = {NULL, };
435 if (float_E[align] == NULL) {
436 float_E[align] = tp = new_type_primitive(mode);
437 /* set the specified alignment */
438 set_type_alignment_bytes(tp, align);
440 return float_E[align];
445 * Create a float[2] array type for the given atomic type.
447 * @param tp the atomic type
449 static ir_type *ia32_create_float_array(ir_type *tp)
451 ir_mode *mode = get_type_mode(tp);
452 unsigned align = get_type_alignment_bytes(tp);
457 if (mode == mode_F) {
458 static ir_type *float_F[16] = {NULL, };
460 if (float_F[align] != NULL)
461 return float_F[align];
462 arr = float_F[align] = new_type_array(1, tp);
463 } else if (mode == mode_D) {
464 static ir_type *float_D[16] = {NULL, };
466 if (float_D[align] != NULL)
467 return float_D[align];
468 arr = float_D[align] = new_type_array(1, tp);
470 static ir_type *float_E[16] = {NULL, };
472 if (float_E[align] != NULL)
473 return float_E[align];
474 arr = float_E[align] = new_type_array(1, tp);
476 set_type_alignment_bytes(arr, align);
477 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
478 set_type_state(arr, layout_fixed);
482 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
483 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
485 static const struct {
486 const char *ent_name;
487 const char *cnst_str;
490 } names [ia32_known_const_max] = {
491 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
492 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
493 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
494 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
495 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
497 static ir_entity *ent_cache[ia32_known_const_max];
499 const char *ent_name, *cnst_str;
505 ent_name = names[kct].ent_name;
506 if (! ent_cache[kct]) {
507 cnst_str = names[kct].cnst_str;
509 switch (names[kct].mode) {
510 case 0: mode = mode_Iu; break;
511 case 1: mode = mode_Lu; break;
512 default: mode = mode_F; break;
514 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
515 tp = ia32_create_float_type(mode, names[kct].align);
517 if (kct == ia32_ULLBIAS)
518 tp = ia32_create_float_array(tp);
519 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
521 set_entity_ld_ident(ent, get_entity_ident(ent));
522 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
523 set_entity_visibility(ent, ir_visibility_local);
525 if (kct == ia32_ULLBIAS) {
526 ir_initializer_t *initializer = create_initializer_compound(2);
528 set_initializer_compound_value(initializer, 0,
529 create_initializer_tarval(get_mode_null(mode)));
530 set_initializer_compound_value(initializer, 1,
531 create_initializer_tarval(tv));
533 set_entity_initializer(ent, initializer);
535 set_entity_initializer(ent, create_initializer_tarval(tv));
538 /* cache the entry */
539 ent_cache[kct] = ent;
542 return ent_cache[kct];
546 * return true if the node is a Proj(Load) and could be used in source address
547 * mode for another node. Will return only true if the @p other node is not
548 * dependent on the memory of the Load (for binary operations use the other
549 * input here, for unary operations use NULL).
551 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
552 ir_node *other, ir_node *other2, match_flags_t flags)
557 /* float constants are always available */
558 if (is_Const(node)) {
559 ir_mode *mode = get_irn_mode(node);
560 if (mode_is_float(mode)) {
561 if (ia32_cg_config.use_sse2) {
562 if (is_simple_sse_Const(node))
565 if (is_simple_x87_Const(node))
568 if (get_irn_n_edges(node) > 1)
576 load = get_Proj_pred(node);
577 pn = get_Proj_proj(node);
578 if (!is_Load(load) || pn != pn_Load_res)
580 if (get_nodes_block(load) != block)
582 /* we only use address mode if we're the only user of the load */
583 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
585 /* in some edge cases with address mode we might reach the load normally
586 * and through some AM sequence, if it is already materialized then we
587 * can't create an AM node from it */
588 if (be_is_transformed(node))
591 /* don't do AM if other node inputs depend on the load (via mem-proj) */
592 if (other != NULL && prevents_AM(block, load, other))
595 if (other2 != NULL && prevents_AM(block, load, other2))
601 typedef struct ia32_address_mode_t ia32_address_mode_t;
602 struct ia32_address_mode_t {
607 ia32_op_type_t op_type;
611 unsigned commutative : 1;
612 unsigned ins_permuted : 1;
615 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
617 /* construct load address */
618 memset(addr, 0, sizeof(addr[0]));
619 ia32_create_address_mode(addr, ptr, 0);
621 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
622 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
623 addr->mem = be_transform_node(mem);
626 static void build_address(ia32_address_mode_t *am, ir_node *node,
627 ia32_create_am_flags_t flags)
629 ia32_address_t *addr = &am->addr;
635 /* floating point immediates */
636 if (is_Const(node)) {
637 ir_entity *entity = create_float_const_entity(node);
638 addr->base = get_symconst_base();
639 addr->index = noreg_GP;
641 addr->symconst_ent = entity;
643 am->ls_mode = get_type_mode(get_entity_type(entity));
644 am->pinned = op_pin_state_floats;
648 load = get_Proj_pred(node);
649 ptr = get_Load_ptr(load);
650 mem = get_Load_mem(load);
651 new_mem = be_transform_node(mem);
652 am->pinned = get_irn_pinned(load);
653 am->ls_mode = get_Load_mode(load);
654 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
657 /* construct load address */
658 ia32_create_address_mode(addr, ptr, flags);
660 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
661 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
665 static void set_address(ir_node *node, const ia32_address_t *addr)
667 set_ia32_am_scale(node, addr->scale);
668 set_ia32_am_sc(node, addr->symconst_ent);
669 set_ia32_am_offs_int(node, addr->offset);
670 if (addr->symconst_sign)
671 set_ia32_am_sc_sign(node);
673 set_ia32_use_frame(node);
674 set_ia32_frame_ent(node, addr->frame_entity);
678 * Apply attributes of a given address mode to a node.
680 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
682 set_address(node, &am->addr);
684 set_ia32_op_type(node, am->op_type);
685 set_ia32_ls_mode(node, am->ls_mode);
686 if (am->pinned == op_pin_state_pinned) {
687 /* beware: some nodes are already pinned and did not allow to change the state */
688 if (get_irn_pinned(node) != op_pin_state_pinned)
689 set_irn_pinned(node, op_pin_state_pinned);
692 set_ia32_commutative(node);
696 * Check, if a given node is a Down-Conv, ie. a integer Conv
697 * from a mode with a mode with more bits to a mode with lesser bits.
698 * Moreover, we return only true if the node has not more than 1 user.
700 * @param node the node
701 * @return non-zero if node is a Down-Conv
703 static int is_downconv(const ir_node *node)
711 /* we only want to skip the conv when we're the only user
712 * (not optimal but for now...)
714 if (get_irn_n_edges(node) > 1)
717 src_mode = get_irn_mode(get_Conv_op(node));
718 dest_mode = get_irn_mode(node);
720 ia32_mode_needs_gp_reg(src_mode) &&
721 ia32_mode_needs_gp_reg(dest_mode) &&
722 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
725 /** Skip all Down-Conv's on a given node and return the resulting node. */
726 ir_node *ia32_skip_downconv(ir_node *node)
728 while (is_downconv(node))
729 node = get_Conv_op(node);
734 static bool is_sameconv(ir_node *node)
742 src_mode = get_irn_mode(get_Conv_op(node));
743 dest_mode = get_irn_mode(node);
745 ia32_mode_needs_gp_reg(src_mode) &&
746 ia32_mode_needs_gp_reg(dest_mode) &&
747 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
750 /** Skip all signedness convs */
751 static ir_node *ia32_skip_sameconv(ir_node *node)
753 while (is_sameconv(node))
754 node = get_Conv_op(node);
759 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
761 ir_mode *mode = get_irn_mode(node);
766 if (mode_is_signed(mode)) {
771 block = get_nodes_block(node);
772 dbgi = get_irn_dbg_info(node);
774 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
778 * matches operands of a node into ia32 addressing/operand modes. This covers
779 * usage of source address mode, immediates, operations with non 32-bit modes,
781 * The resulting data is filled into the @p am struct. block is the block
782 * of the node whose arguments are matched. op1, op2 are the first and second
783 * input that are matched (op1 may be NULL). other_op is another unrelated
784 * input that is not matched! but which is needed sometimes to check if AM
785 * for op1/op2 is legal.
786 * @p flags describes the supported modes of the operation in detail.
788 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
789 ir_node *op1, ir_node *op2, ir_node *other_op,
792 ia32_address_t *addr = &am->addr;
793 ir_mode *mode = get_irn_mode(op2);
794 int mode_bits = get_mode_size_bits(mode);
795 ir_node *new_op1, *new_op2;
797 unsigned commutative;
798 int use_am_and_immediates;
801 memset(am, 0, sizeof(am[0]));
803 commutative = (flags & match_commutative) != 0;
804 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
805 use_am = (flags & match_am) != 0;
806 use_immediate = (flags & match_immediate) != 0;
807 assert(!use_am_and_immediates || use_immediate);
810 assert(!commutative || op1 != NULL);
811 assert(use_am || !(flags & match_8bit_am));
812 assert(use_am || !(flags & match_16bit_am));
814 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
815 (mode_bits == 16 && !(flags & match_16bit_am))) {
819 /* we can simply skip downconvs for mode neutral nodes: the upper bits
820 * can be random for these operations */
821 if (flags & match_mode_neutral) {
822 op2 = ia32_skip_downconv(op2);
824 op1 = ia32_skip_downconv(op1);
827 op2 = ia32_skip_sameconv(op2);
829 op1 = ia32_skip_sameconv(op1);
833 /* match immediates. firm nodes are normalized: constants are always on the
836 if (!(flags & match_try_am) && use_immediate) {
837 new_op2 = try_create_Immediate(op2, 0);
840 if (new_op2 == NULL &&
841 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
842 build_address(am, op2, 0);
843 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
844 if (mode_is_float(mode)) {
845 new_op2 = ia32_new_NoReg_vfp(env_cg);
849 am->op_type = ia32_AddrModeS;
850 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
852 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
854 build_address(am, op1, 0);
856 if (mode_is_float(mode)) {
857 noreg = ia32_new_NoReg_vfp(env_cg);
862 if (new_op2 != NULL) {
865 new_op1 = be_transform_node(op2);
867 am->ins_permuted = 1;
869 am->op_type = ia32_AddrModeS;
872 am->op_type = ia32_Normal;
874 if (flags & match_try_am) {
880 mode = get_irn_mode(op2);
881 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
882 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
884 new_op2 = create_upconv(op2, NULL);
885 am->ls_mode = mode_Iu;
887 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
889 new_op2 = be_transform_node(op2);
890 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
893 if (addr->base == NULL)
894 addr->base = noreg_GP;
895 if (addr->index == NULL)
896 addr->index = noreg_GP;
897 if (addr->mem == NULL)
900 am->new_op1 = new_op1;
901 am->new_op2 = new_op2;
902 am->commutative = commutative;
906 * "Fixes" a node that uses address mode by turning it into mode_T
907 * and returning a pn_ia32_res Proj.
909 * @param node the node
910 * @param am its address mode
912 * @return a Proj(pn_ia32_res) if a memory address mode is used,
915 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
920 if (am->mem_proj == NULL)
923 /* we have to create a mode_T so the old MemProj can attach to us */
924 mode = get_irn_mode(node);
925 load = get_Proj_pred(am->mem_proj);
927 be_set_transformed_node(load, node);
929 if (mode != mode_T) {
930 set_irn_mode(node, mode_T);
931 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
938 * Construct a standard binary operation, set AM and immediate if required.
940 * @param node The original node for which the binop is created
941 * @param op1 The first operand
942 * @param op2 The second operand
943 * @param func The node constructor function
944 * @return The constructed ia32 node.
946 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
947 construct_binop_func *func, match_flags_t flags)
950 ir_node *block, *new_block, *new_node;
951 ia32_address_mode_t am;
952 ia32_address_t *addr = &am.addr;
954 block = get_nodes_block(node);
955 match_arguments(&am, block, op1, op2, NULL, flags);
957 dbgi = get_irn_dbg_info(node);
958 new_block = be_transform_node(block);
959 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
960 am.new_op1, am.new_op2);
961 set_am_attributes(new_node, &am);
962 /* we can't use source address mode anymore when using immediates */
963 if (!(flags & match_am_and_immediates) &&
964 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
965 set_ia32_am_support(new_node, ia32_am_none);
966 SET_IA32_ORIG_NODE(new_node, node);
968 new_node = fix_mem_proj(new_node, &am);
974 * Generic names for the inputs of an ia32 binary op.
977 n_ia32_l_binop_left, /**< ia32 left input */
978 n_ia32_l_binop_right, /**< ia32 right input */
979 n_ia32_l_binop_eflags /**< ia32 eflags input */
981 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
982 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
983 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
984 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
985 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
986 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
989 * Construct a binary operation which also consumes the eflags.
991 * @param node The node to transform
992 * @param func The node constructor function
993 * @param flags The match flags
994 * @return The constructor ia32 node
996 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
999 ir_node *src_block = get_nodes_block(node);
1000 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1001 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1002 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1004 ir_node *block, *new_node, *new_eflags;
1005 ia32_address_mode_t am;
1006 ia32_address_t *addr = &am.addr;
1008 match_arguments(&am, src_block, op1, op2, eflags, flags);
1010 dbgi = get_irn_dbg_info(node);
1011 block = be_transform_node(src_block);
1012 new_eflags = be_transform_node(eflags);
1013 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1014 am.new_op1, am.new_op2, new_eflags);
1015 set_am_attributes(new_node, &am);
1016 /* we can't use source address mode anymore when using immediates */
1017 if (!(flags & match_am_and_immediates) &&
1018 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1019 set_ia32_am_support(new_node, ia32_am_none);
1020 SET_IA32_ORIG_NODE(new_node, node);
1022 new_node = fix_mem_proj(new_node, &am);
1027 static ir_node *get_fpcw(void)
1030 if (initial_fpcw != NULL)
1031 return initial_fpcw;
1033 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
1034 &ia32_fp_cw_regs[REG_FPCW]);
1035 initial_fpcw = be_transform_node(fpcw);
1037 return initial_fpcw;
1041 * Construct a standard binary operation, set AM and immediate if required.
1043 * @param op1 The first operand
1044 * @param op2 The second operand
1045 * @param func The node constructor function
1046 * @return The constructed ia32 node.
1048 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1049 construct_binop_float_func *func)
1051 ir_mode *mode = get_irn_mode(node);
1053 ir_node *block, *new_block, *new_node;
1054 ia32_address_mode_t am;
1055 ia32_address_t *addr = &am.addr;
1056 ia32_x87_attr_t *attr;
1057 /* All operations are considered commutative, because there are reverse
1059 match_flags_t flags = match_commutative;
1061 /* happens for div nodes... */
1063 mode = get_divop_resmod(node);
1065 /* cannot use address mode with long double on x87 */
1066 if (get_mode_size_bits(mode) <= 64)
1069 block = get_nodes_block(node);
1070 match_arguments(&am, block, op1, op2, NULL, flags);
1072 dbgi = get_irn_dbg_info(node);
1073 new_block = be_transform_node(block);
1074 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1075 am.new_op1, am.new_op2, get_fpcw());
1076 set_am_attributes(new_node, &am);
1078 attr = get_ia32_x87_attr(new_node);
1079 attr->attr.data.ins_permuted = am.ins_permuted;
1081 SET_IA32_ORIG_NODE(new_node, node);
1083 new_node = fix_mem_proj(new_node, &am);
1089 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1091 * @param op1 The first operand
1092 * @param op2 The second operand
1093 * @param func The node constructor function
1094 * @return The constructed ia32 node.
1096 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1097 construct_shift_func *func,
1098 match_flags_t flags)
1101 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1103 assert(! mode_is_float(get_irn_mode(node)));
1104 assert(flags & match_immediate);
1105 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1107 if (flags & match_mode_neutral) {
1108 op1 = ia32_skip_downconv(op1);
1109 new_op1 = be_transform_node(op1);
1110 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1111 new_op1 = create_upconv(op1, node);
1113 new_op1 = be_transform_node(op1);
1116 /* the shift amount can be any mode that is bigger than 5 bits, since all
1117 * other bits are ignored anyway */
1118 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1119 ir_node *const op = get_Conv_op(op2);
1120 if (mode_is_float(get_irn_mode(op)))
1123 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1125 new_op2 = create_immediate_or_transform(op2, 0);
1127 dbgi = get_irn_dbg_info(node);
1128 block = get_nodes_block(node);
1129 new_block = be_transform_node(block);
1130 new_node = func(dbgi, new_block, new_op1, new_op2);
1131 SET_IA32_ORIG_NODE(new_node, node);
1133 /* lowered shift instruction may have a dependency operand, handle it here */
1134 if (get_irn_arity(node) == 3) {
1135 /* we have a dependency */
1136 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1137 add_irn_dep(new_node, new_dep);
1145 * Construct a standard unary operation, set AM and immediate if required.
1147 * @param op The operand
1148 * @param func The node constructor function
1149 * @return The constructed ia32 node.
1151 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1152 match_flags_t flags)
1155 ir_node *block, *new_block, *new_op, *new_node;
1157 assert(flags == 0 || flags == match_mode_neutral);
1158 if (flags & match_mode_neutral) {
1159 op = ia32_skip_downconv(op);
1162 new_op = be_transform_node(op);
1163 dbgi = get_irn_dbg_info(node);
1164 block = get_nodes_block(node);
1165 new_block = be_transform_node(block);
1166 new_node = func(dbgi, new_block, new_op);
1168 SET_IA32_ORIG_NODE(new_node, node);
1173 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1174 ia32_address_t *addr)
1176 ir_node *base, *index, *res;
1182 base = be_transform_node(base);
1185 index = addr->index;
1186 if (index == NULL) {
1189 index = be_transform_node(index);
1192 res = new_bd_ia32_Lea(dbgi, block, base, index);
1193 set_address(res, addr);
1199 * Returns non-zero if a given address mode has a symbolic or
1200 * numerical offset != 0.
1202 static int am_has_immediates(const ia32_address_t *addr)
1204 return addr->offset != 0 || addr->symconst_ent != NULL
1205 || addr->frame_entity || addr->use_frame;
1209 * Creates an ia32 Add.
1211 * @return the created ia32 Add node
1213 static ir_node *gen_Add(ir_node *node)
1215 ir_mode *mode = get_irn_mode(node);
1216 ir_node *op1 = get_Add_left(node);
1217 ir_node *op2 = get_Add_right(node);
1219 ir_node *block, *new_block, *new_node, *add_immediate_op;
1220 ia32_address_t addr;
1221 ia32_address_mode_t am;
1223 if (mode_is_float(mode)) {
1224 if (ia32_cg_config.use_sse2)
1225 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1226 match_commutative | match_am);
1228 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1231 ia32_mark_non_am(node);
1233 op2 = ia32_skip_downconv(op2);
1234 op1 = ia32_skip_downconv(op1);
1238 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1239 * 1. Add with immediate -> Lea
1240 * 2. Add with possible source address mode -> Add
1241 * 3. Otherwise -> Lea
1243 memset(&addr, 0, sizeof(addr));
1244 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1245 add_immediate_op = NULL;
1247 dbgi = get_irn_dbg_info(node);
1248 block = get_nodes_block(node);
1249 new_block = be_transform_node(block);
1252 if (addr.base == NULL && addr.index == NULL) {
1253 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1254 addr.symconst_sign, 0, addr.offset);
1255 be_dep_on_frame(new_node);
1256 SET_IA32_ORIG_NODE(new_node, node);
1259 /* add with immediate? */
1260 if (addr.index == NULL) {
1261 add_immediate_op = addr.base;
1262 } else if (addr.base == NULL && addr.scale == 0) {
1263 add_immediate_op = addr.index;
1266 if (add_immediate_op != NULL) {
1267 if (!am_has_immediates(&addr)) {
1268 #ifdef DEBUG_libfirm
1269 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1272 return be_transform_node(add_immediate_op);
1275 new_node = create_lea_from_address(dbgi, new_block, &addr);
1276 SET_IA32_ORIG_NODE(new_node, node);
1280 /* test if we can use source address mode */
1281 match_arguments(&am, block, op1, op2, NULL, match_commutative
1282 | match_mode_neutral | match_am | match_immediate | match_try_am);
1284 /* construct an Add with source address mode */
1285 if (am.op_type == ia32_AddrModeS) {
1286 ia32_address_t *am_addr = &am.addr;
1287 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1288 am_addr->index, am_addr->mem, am.new_op1,
1290 set_am_attributes(new_node, &am);
1291 SET_IA32_ORIG_NODE(new_node, node);
1293 new_node = fix_mem_proj(new_node, &am);
1298 /* otherwise construct a lea */
1299 new_node = create_lea_from_address(dbgi, new_block, &addr);
1300 SET_IA32_ORIG_NODE(new_node, node);
1305 * Creates an ia32 Mul.
1307 * @return the created ia32 Mul node
1309 static ir_node *gen_Mul(ir_node *node)
1311 ir_node *op1 = get_Mul_left(node);
1312 ir_node *op2 = get_Mul_right(node);
1313 ir_mode *mode = get_irn_mode(node);
1315 if (mode_is_float(mode)) {
1316 if (ia32_cg_config.use_sse2)
1317 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1318 match_commutative | match_am);
1320 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1322 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1323 match_commutative | match_am | match_mode_neutral |
1324 match_immediate | match_am_and_immediates);
1328 * Creates an ia32 Mulh.
1329 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1330 * this result while Mul returns the lower 32 bit.
1332 * @return the created ia32 Mulh node
1334 static ir_node *gen_Mulh(ir_node *node)
1336 dbg_info *dbgi = get_irn_dbg_info(node);
1337 ir_node *op1 = get_Mulh_left(node);
1338 ir_node *op2 = get_Mulh_right(node);
1339 ir_mode *mode = get_irn_mode(node);
1341 ir_node *proj_res_high;
1343 if (get_mode_size_bits(mode) != 32) {
1344 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1347 if (mode_is_signed(mode)) {
1348 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1349 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1351 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1352 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1354 return proj_res_high;
1358 * Creates an ia32 And.
1360 * @return The created ia32 And node
1362 static ir_node *gen_And(ir_node *node)
1364 ir_node *op1 = get_And_left(node);
1365 ir_node *op2 = get_And_right(node);
1366 assert(! mode_is_float(get_irn_mode(node)));
1368 /* is it a zero extension? */
1369 if (is_Const(op2)) {
1370 tarval *tv = get_Const_tarval(op2);
1371 long v = get_tarval_long(tv);
1373 if (v == 0xFF || v == 0xFFFF) {
1374 dbg_info *dbgi = get_irn_dbg_info(node);
1375 ir_node *block = get_nodes_block(node);
1382 assert(v == 0xFFFF);
1385 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1390 return gen_binop(node, op1, op2, new_bd_ia32_And,
1391 match_commutative | match_mode_neutral | match_am | match_immediate);
1397 * Creates an ia32 Or.
1399 * @return The created ia32 Or node
1401 static ir_node *gen_Or(ir_node *node)
1403 ir_node *op1 = get_Or_left(node);
1404 ir_node *op2 = get_Or_right(node);
1406 assert (! mode_is_float(get_irn_mode(node)));
1407 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1408 | match_mode_neutral | match_am | match_immediate);
1414 * Creates an ia32 Eor.
1416 * @return The created ia32 Eor node
1418 static ir_node *gen_Eor(ir_node *node)
1420 ir_node *op1 = get_Eor_left(node);
1421 ir_node *op2 = get_Eor_right(node);
1423 assert(! mode_is_float(get_irn_mode(node)));
1424 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1425 | match_mode_neutral | match_am | match_immediate);
1430 * Creates an ia32 Sub.
1432 * @return The created ia32 Sub node
1434 static ir_node *gen_Sub(ir_node *node)
1436 ir_node *op1 = get_Sub_left(node);
1437 ir_node *op2 = get_Sub_right(node);
1438 ir_mode *mode = get_irn_mode(node);
1440 if (mode_is_float(mode)) {
1441 if (ia32_cg_config.use_sse2)
1442 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1444 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1447 if (is_Const(op2)) {
1448 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1452 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1453 | match_am | match_immediate);
1456 static ir_node *transform_AM_mem(ir_node *const block,
1457 ir_node *const src_val,
1458 ir_node *const src_mem,
1459 ir_node *const am_mem)
1461 if (is_NoMem(am_mem)) {
1462 return be_transform_node(src_mem);
1463 } else if (is_Proj(src_val) &&
1465 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1466 /* avoid memory loop */
1468 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1469 ir_node *const ptr_pred = get_Proj_pred(src_val);
1470 int const arity = get_Sync_n_preds(src_mem);
1475 NEW_ARR_A(ir_node*, ins, arity + 1);
1477 /* NOTE: This sometimes produces dead-code because the old sync in
1478 * src_mem might not be used anymore, we should detect this case
1479 * and kill the sync... */
1480 for (i = arity - 1; i >= 0; --i) {
1481 ir_node *const pred = get_Sync_pred(src_mem, i);
1483 /* avoid memory loop */
1484 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1487 ins[n++] = be_transform_node(pred);
1492 return new_r_Sync(block, n, ins);
1496 ins[0] = be_transform_node(src_mem);
1498 return new_r_Sync(block, 2, ins);
1503 * Create a 32bit to 64bit signed extension.
1505 * @param dbgi debug info
1506 * @param block the block where node nodes should be placed
1507 * @param val the value to extend
1508 * @param orig the original node
1510 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1511 ir_node *val, const ir_node *orig)
1516 if (ia32_cg_config.use_short_sex_eax) {
1517 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1518 be_dep_on_frame(pval);
1519 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1521 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1522 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1524 SET_IA32_ORIG_NODE(res, orig);
1529 * Generates an ia32 DivMod with additional infrastructure for the
1530 * register allocator if needed.
1532 static ir_node *create_Div(ir_node *node)
1534 dbg_info *dbgi = get_irn_dbg_info(node);
1535 ir_node *block = get_nodes_block(node);
1536 ir_node *new_block = be_transform_node(block);
1543 ir_node *sign_extension;
1544 ia32_address_mode_t am;
1545 ia32_address_t *addr = &am.addr;
1547 /* the upper bits have random contents for smaller modes */
1548 switch (get_irn_opcode(node)) {
1550 op1 = get_Div_left(node);
1551 op2 = get_Div_right(node);
1552 mem = get_Div_mem(node);
1553 mode = get_Div_resmode(node);
1556 op1 = get_Mod_left(node);
1557 op2 = get_Mod_right(node);
1558 mem = get_Mod_mem(node);
1559 mode = get_Mod_resmode(node);
1562 op1 = get_DivMod_left(node);
1563 op2 = get_DivMod_right(node);
1564 mem = get_DivMod_mem(node);
1565 mode = get_DivMod_resmode(node);
1568 panic("invalid divmod node %+F", node);
1571 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1573 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1574 is the memory of the consumed address. We can have only the second op as address
1575 in Div nodes, so check only op2. */
1576 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1578 if (mode_is_signed(mode)) {
1579 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1580 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1581 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1583 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1584 be_dep_on_frame(sign_extension);
1586 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1587 addr->index, new_mem, am.new_op2,
1588 am.new_op1, sign_extension);
1591 set_irn_pinned(new_node, get_irn_pinned(node));
1593 set_am_attributes(new_node, &am);
1594 SET_IA32_ORIG_NODE(new_node, node);
1596 new_node = fix_mem_proj(new_node, &am);
1602 * Generates an ia32 Mod.
1604 static ir_node *gen_Mod(ir_node *node)
1606 return create_Div(node);
1610 * Generates an ia32 Div.
1612 static ir_node *gen_Div(ir_node *node)
1614 return create_Div(node);
1618 * Generates an ia32 DivMod.
1620 static ir_node *gen_DivMod(ir_node *node)
1622 return create_Div(node);
1628 * Creates an ia32 floating Div.
1630 * @return The created ia32 xDiv node
1632 static ir_node *gen_Quot(ir_node *node)
1634 ir_node *op1 = get_Quot_left(node);
1635 ir_node *op2 = get_Quot_right(node);
1637 if (ia32_cg_config.use_sse2) {
1638 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1640 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1646 * Creates an ia32 Shl.
1648 * @return The created ia32 Shl node
1650 static ir_node *gen_Shl(ir_node *node)
1652 ir_node *left = get_Shl_left(node);
1653 ir_node *right = get_Shl_right(node);
1655 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1656 match_mode_neutral | match_immediate);
1660 * Creates an ia32 Shr.
1662 * @return The created ia32 Shr node
1664 static ir_node *gen_Shr(ir_node *node)
1666 ir_node *left = get_Shr_left(node);
1667 ir_node *right = get_Shr_right(node);
1669 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1675 * Creates an ia32 Sar.
1677 * @return The created ia32 Shrs node
1679 static ir_node *gen_Shrs(ir_node *node)
1681 ir_node *left = get_Shrs_left(node);
1682 ir_node *right = get_Shrs_right(node);
1684 if (is_Const(right)) {
1685 tarval *tv = get_Const_tarval(right);
1686 long val = get_tarval_long(tv);
1688 /* this is a sign extension */
1689 dbg_info *dbgi = get_irn_dbg_info(node);
1690 ir_node *block = be_transform_node(get_nodes_block(node));
1691 ir_node *new_op = be_transform_node(left);
1693 return create_sex_32_64(dbgi, block, new_op, node);
1697 /* 8 or 16 bit sign extension? */
1698 if (is_Const(right) && is_Shl(left)) {
1699 ir_node *shl_left = get_Shl_left(left);
1700 ir_node *shl_right = get_Shl_right(left);
1701 if (is_Const(shl_right)) {
1702 tarval *tv1 = get_Const_tarval(right);
1703 tarval *tv2 = get_Const_tarval(shl_right);
1704 if (tv1 == tv2 && tarval_is_long(tv1)) {
1705 long val = get_tarval_long(tv1);
1706 if (val == 16 || val == 24) {
1707 dbg_info *dbgi = get_irn_dbg_info(node);
1708 ir_node *block = get_nodes_block(node);
1718 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1727 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1733 * Creates an ia32 Rol.
1735 * @param op1 The first operator
1736 * @param op2 The second operator
1737 * @return The created ia32 RotL node
1739 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1741 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1747 * Creates an ia32 Ror.
1748 * NOTE: There is no RotR with immediate because this would always be a RotL
1749 * "imm-mode_size_bits" which can be pre-calculated.
1751 * @param op1 The first operator
1752 * @param op2 The second operator
1753 * @return The created ia32 RotR node
1755 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1757 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1763 * Creates an ia32 RotR or RotL (depending on the found pattern).
1765 * @return The created ia32 RotL or RotR node
1767 static ir_node *gen_Rotl(ir_node *node)
1769 ir_node *rotate = NULL;
1770 ir_node *op1 = get_Rotl_left(node);
1771 ir_node *op2 = get_Rotl_right(node);
1773 /* Firm has only RotL, so we are looking for a right (op2)
1774 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1775 that means we can create a RotR instead of an Add and a RotL */
1779 ir_node *left = get_Add_left(add);
1780 ir_node *right = get_Add_right(add);
1781 if (is_Const(right)) {
1782 tarval *tv = get_Const_tarval(right);
1783 ir_mode *mode = get_irn_mode(node);
1784 long bits = get_mode_size_bits(mode);
1786 if (is_Minus(left) &&
1787 tarval_is_long(tv) &&
1788 get_tarval_long(tv) == bits &&
1791 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1792 rotate = gen_Ror(node, op1, get_Minus_op(left));
1797 if (rotate == NULL) {
1798 rotate = gen_Rol(node, op1, op2);
1807 * Transforms a Minus node.
1809 * @return The created ia32 Minus node
1811 static ir_node *gen_Minus(ir_node *node)
1813 ir_node *op = get_Minus_op(node);
1814 ir_node *block = be_transform_node(get_nodes_block(node));
1815 dbg_info *dbgi = get_irn_dbg_info(node);
1816 ir_mode *mode = get_irn_mode(node);
1821 if (mode_is_float(mode)) {
1822 ir_node *new_op = be_transform_node(op);
1823 if (ia32_cg_config.use_sse2) {
1824 /* TODO: non-optimal... if we have many xXors, then we should
1825 * rather create a load for the const and use that instead of
1826 * several AM nodes... */
1827 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1829 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1830 noreg_GP, nomem, new_op, noreg_xmm);
1832 size = get_mode_size_bits(mode);
1833 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1835 set_ia32_am_sc(new_node, ent);
1836 set_ia32_op_type(new_node, ia32_AddrModeS);
1837 set_ia32_ls_mode(new_node, mode);
1839 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1842 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1845 SET_IA32_ORIG_NODE(new_node, node);
1851 * Transforms a Not node.
1853 * @return The created ia32 Not node
1855 static ir_node *gen_Not(ir_node *node)
1857 ir_node *op = get_Not_op(node);
1859 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1860 assert (! mode_is_float(get_irn_mode(node)));
1862 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1868 * Transforms an Abs node.
1870 * @return The created ia32 Abs node
1872 static ir_node *gen_Abs(ir_node *node)
1874 ir_node *block = get_nodes_block(node);
1875 ir_node *new_block = be_transform_node(block);
1876 ir_node *op = get_Abs_op(node);
1877 dbg_info *dbgi = get_irn_dbg_info(node);
1878 ir_mode *mode = get_irn_mode(node);
1884 if (mode_is_float(mode)) {
1885 new_op = be_transform_node(op);
1887 if (ia32_cg_config.use_sse2) {
1888 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1889 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1890 noreg_GP, nomem, new_op, noreg_fp);
1892 size = get_mode_size_bits(mode);
1893 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1895 set_ia32_am_sc(new_node, ent);
1897 SET_IA32_ORIG_NODE(new_node, node);
1899 set_ia32_op_type(new_node, ia32_AddrModeS);
1900 set_ia32_ls_mode(new_node, mode);
1902 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1903 SET_IA32_ORIG_NODE(new_node, node);
1906 ir_node *xor, *sign_extension;
1908 if (get_mode_size_bits(mode) == 32) {
1909 new_op = be_transform_node(op);
1911 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1914 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1916 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1917 nomem, new_op, sign_extension);
1918 SET_IA32_ORIG_NODE(xor, node);
1920 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1921 nomem, xor, sign_extension);
1922 SET_IA32_ORIG_NODE(new_node, node);
1929 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1931 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1933 dbg_info *dbgi = get_irn_dbg_info(cmp);
1934 ir_node *block = get_nodes_block(cmp);
1935 ir_node *new_block = be_transform_node(block);
1936 ir_node *op1 = be_transform_node(x);
1937 ir_node *op2 = be_transform_node(n);
1939 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1943 * Transform a node returning a "flag" result.
1945 * @param node the node to transform
1946 * @param pnc_out the compare mode to use
1948 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1955 /* we have a Cmp as input */
1956 if (is_Proj(node)) {
1957 ir_node *pred = get_Proj_pred(node);
1959 pn_Cmp pnc = get_Proj_proj(node);
1960 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1961 ir_node *l = get_Cmp_left(pred);
1962 ir_node *r = get_Cmp_right(pred);
1964 ir_node *la = get_And_left(l);
1965 ir_node *ra = get_And_right(l);
1967 ir_node *c = get_Shl_left(la);
1968 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1969 /* (1 << n) & ra) */
1970 ir_node *n = get_Shl_right(la);
1971 flags = gen_bt(pred, ra, n);
1972 /* we must generate a Jc/Jnc jump */
1973 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1976 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1981 ir_node *c = get_Shl_left(ra);
1982 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1983 /* la & (1 << n)) */
1984 ir_node *n = get_Shl_right(ra);
1985 flags = gen_bt(pred, la, n);
1986 /* we must generate a Jc/Jnc jump */
1987 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1990 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1996 /* add ia32 compare flags */
1998 ir_node *l = get_Cmp_left(pred);
1999 ir_mode *mode = get_irn_mode(l);
2000 if (mode_is_float(mode))
2001 pnc |= ia32_pn_Cmp_float;
2002 else if (! mode_is_signed(mode))
2003 pnc |= ia32_pn_Cmp_unsigned;
2006 flags = be_transform_node(pred);
2011 /* a mode_b value, we have to compare it against 0 */
2012 dbgi = get_irn_dbg_info(node);
2013 new_block = be_transform_node(get_nodes_block(node));
2014 new_op = be_transform_node(node);
2015 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
2016 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
2017 *pnc_out = pn_Cmp_Lg;
2022 * Transforms a Load.
2024 * @return the created ia32 Load node
2026 static ir_node *gen_Load(ir_node *node)
2028 ir_node *old_block = get_nodes_block(node);
2029 ir_node *block = be_transform_node(old_block);
2030 ir_node *ptr = get_Load_ptr(node);
2031 ir_node *mem = get_Load_mem(node);
2032 ir_node *new_mem = be_transform_node(mem);
2035 dbg_info *dbgi = get_irn_dbg_info(node);
2036 ir_mode *mode = get_Load_mode(node);
2038 ia32_address_t addr;
2040 /* construct load address */
2041 memset(&addr, 0, sizeof(addr));
2042 ia32_create_address_mode(&addr, ptr, 0);
2049 base = be_transform_node(base);
2052 if (index == NULL) {
2055 index = be_transform_node(index);
2058 if (mode_is_float(mode)) {
2059 if (ia32_cg_config.use_sse2) {
2060 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2063 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2067 assert(mode != mode_b);
2069 /* create a conv node with address mode for smaller modes */
2070 if (get_mode_size_bits(mode) < 32) {
2071 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2072 new_mem, noreg_GP, mode);
2074 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2078 set_irn_pinned(new_node, get_irn_pinned(node));
2079 set_ia32_op_type(new_node, ia32_AddrModeS);
2080 set_ia32_ls_mode(new_node, mode);
2081 set_address(new_node, &addr);
2083 if (get_irn_pinned(node) == op_pin_state_floats) {
2084 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2085 && pn_ia32_vfld_res == pn_ia32_Load_res
2086 && pn_ia32_Load_res == pn_ia32_res);
2087 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2090 SET_IA32_ORIG_NODE(new_node, node);
2092 be_dep_on_frame(new_node);
2096 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2097 ir_node *ptr, ir_node *other)
2104 /* we only use address mode if we're the only user of the load */
2105 if (get_irn_n_edges(node) > 1)
2108 load = get_Proj_pred(node);
2111 if (get_nodes_block(load) != block)
2114 /* store should have the same pointer as the load */
2115 if (get_Load_ptr(load) != ptr)
2118 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2119 if (other != NULL &&
2120 get_nodes_block(other) == block &&
2121 heights_reachable_in_block(heights, other, load)) {
2125 if (prevents_AM(block, load, mem))
2127 /* Store should be attached to the load via mem */
2128 assert(heights_reachable_in_block(heights, mem, load));
2133 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2134 ir_node *mem, ir_node *ptr, ir_mode *mode,
2135 construct_binop_dest_func *func,
2136 construct_binop_dest_func *func8bit,
2137 match_flags_t flags)
2139 ir_node *src_block = get_nodes_block(node);
2147 ia32_address_mode_t am;
2148 ia32_address_t *addr = &am.addr;
2149 memset(&am, 0, sizeof(am));
2151 assert(flags & match_immediate); /* there is no destam node without... */
2152 commutative = (flags & match_commutative) != 0;
2154 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2155 build_address(&am, op1, ia32_create_am_double_use);
2156 new_op = create_immediate_or_transform(op2, 0);
2157 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2158 build_address(&am, op2, ia32_create_am_double_use);
2159 new_op = create_immediate_or_transform(op1, 0);
2164 if (addr->base == NULL)
2165 addr->base = noreg_GP;
2166 if (addr->index == NULL)
2167 addr->index = noreg_GP;
2168 if (addr->mem == NULL)
2171 dbgi = get_irn_dbg_info(node);
2172 block = be_transform_node(src_block);
2173 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2175 if (get_mode_size_bits(mode) == 8) {
2176 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2178 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2180 set_address(new_node, addr);
2181 set_ia32_op_type(new_node, ia32_AddrModeD);
2182 set_ia32_ls_mode(new_node, mode);
2183 SET_IA32_ORIG_NODE(new_node, node);
2185 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2186 mem_proj = be_transform_node(am.mem_proj);
2187 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2192 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2193 ir_node *ptr, ir_mode *mode,
2194 construct_unop_dest_func *func)
2196 ir_node *src_block = get_nodes_block(node);
2202 ia32_address_mode_t am;
2203 ia32_address_t *addr = &am.addr;
2205 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2208 memset(&am, 0, sizeof(am));
2209 build_address(&am, op, ia32_create_am_double_use);
2211 dbgi = get_irn_dbg_info(node);
2212 block = be_transform_node(src_block);
2213 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2214 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2215 set_address(new_node, addr);
2216 set_ia32_op_type(new_node, ia32_AddrModeD);
2217 set_ia32_ls_mode(new_node, mode);
2218 SET_IA32_ORIG_NODE(new_node, node);
2220 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2221 mem_proj = be_transform_node(am.mem_proj);
2222 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2227 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2229 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2230 return get_negated_pnc(pnc, mode);
2233 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2235 ir_mode *mode = get_irn_mode(node);
2236 ir_node *mux_true = get_Mux_true(node);
2237 ir_node *mux_false = get_Mux_false(node);
2246 ia32_address_t addr;
2248 if (get_mode_size_bits(mode) != 8)
2251 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2253 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2259 cond = get_Mux_sel(node);
2260 flags = get_flags_node(cond, &pnc);
2261 /* we can't handle the float special cases with SetM */
2262 if (pnc & ia32_pn_Cmp_float)
2265 pnc = ia32_get_negated_pnc(pnc);
2267 build_address_ptr(&addr, ptr, mem);
2269 dbgi = get_irn_dbg_info(node);
2270 block = get_nodes_block(node);
2271 new_block = be_transform_node(block);
2272 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2273 addr.index, addr.mem, flags, pnc);
2274 set_address(new_node, &addr);
2275 set_ia32_op_type(new_node, ia32_AddrModeD);
2276 set_ia32_ls_mode(new_node, mode);
2277 SET_IA32_ORIG_NODE(new_node, node);
2282 static ir_node *try_create_dest_am(ir_node *node)
2284 ir_node *val = get_Store_value(node);
2285 ir_node *mem = get_Store_mem(node);
2286 ir_node *ptr = get_Store_ptr(node);
2287 ir_mode *mode = get_irn_mode(val);
2288 unsigned bits = get_mode_size_bits(mode);
2293 /* handle only GP modes for now... */
2294 if (!ia32_mode_needs_gp_reg(mode))
2298 /* store must be the only user of the val node */
2299 if (get_irn_n_edges(val) > 1)
2301 /* skip pointless convs */
2303 ir_node *conv_op = get_Conv_op(val);
2304 ir_mode *pred_mode = get_irn_mode(conv_op);
2305 if (!ia32_mode_needs_gp_reg(pred_mode))
2307 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2315 /* value must be in the same block */
2316 if (get_nodes_block(node) != get_nodes_block(val))
2319 switch (get_irn_opcode(val)) {
2321 op1 = get_Add_left(val);
2322 op2 = get_Add_right(val);
2323 if (ia32_cg_config.use_incdec) {
2324 if (is_Const_1(op2)) {
2325 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2327 } else if (is_Const_Minus_1(op2)) {
2328 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2332 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2333 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2334 match_commutative | match_immediate);
2337 op1 = get_Sub_left(val);
2338 op2 = get_Sub_right(val);
2339 if (is_Const(op2)) {
2340 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2342 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2343 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2347 op1 = get_And_left(val);
2348 op2 = get_And_right(val);
2349 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2350 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2351 match_commutative | match_immediate);
2354 op1 = get_Or_left(val);
2355 op2 = get_Or_right(val);
2356 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2357 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2358 match_commutative | match_immediate);
2361 op1 = get_Eor_left(val);
2362 op2 = get_Eor_right(val);
2363 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2364 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2365 match_commutative | match_immediate);
2368 op1 = get_Shl_left(val);
2369 op2 = get_Shl_right(val);
2370 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2371 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2375 op1 = get_Shr_left(val);
2376 op2 = get_Shr_right(val);
2377 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2378 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2382 op1 = get_Shrs_left(val);
2383 op2 = get_Shrs_right(val);
2384 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2385 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2389 op1 = get_Rotl_left(val);
2390 op2 = get_Rotl_right(val);
2391 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2392 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2395 /* TODO: match ROR patterns... */
2397 new_node = try_create_SetMem(val, ptr, mem);
2401 op1 = get_Minus_op(val);
2402 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2405 /* should be lowered already */
2406 assert(mode != mode_b);
2407 op1 = get_Not_op(val);
2408 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2414 if (new_node != NULL) {
2415 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2416 get_irn_pinned(node) == op_pin_state_pinned) {
2417 set_irn_pinned(new_node, op_pin_state_pinned);
2424 static bool possible_int_mode_for_fp(ir_mode *mode)
2428 if (!mode_is_signed(mode))
2430 size = get_mode_size_bits(mode);
2431 if (size != 16 && size != 32)
2436 static int is_float_to_int_conv(const ir_node *node)
2438 ir_mode *mode = get_irn_mode(node);
2442 if (!possible_int_mode_for_fp(mode))
2447 conv_op = get_Conv_op(node);
2448 conv_mode = get_irn_mode(conv_op);
2450 if (!mode_is_float(conv_mode))
2457 * Transform a Store(floatConst) into a sequence of
2460 * @return the created ia32 Store node
2462 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2464 ir_mode *mode = get_irn_mode(cns);
2465 unsigned size = get_mode_size_bytes(mode);
2466 tarval *tv = get_Const_tarval(cns);
2467 ir_node *block = get_nodes_block(node);
2468 ir_node *new_block = be_transform_node(block);
2469 ir_node *ptr = get_Store_ptr(node);
2470 ir_node *mem = get_Store_mem(node);
2471 dbg_info *dbgi = get_irn_dbg_info(node);
2475 ia32_address_t addr;
2477 assert(size % 4 == 0);
2480 build_address_ptr(&addr, ptr, mem);
2484 get_tarval_sub_bits(tv, ofs) |
2485 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2486 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2487 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2488 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2490 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2491 addr.index, addr.mem, imm);
2493 set_irn_pinned(new_node, get_irn_pinned(node));
2494 set_ia32_op_type(new_node, ia32_AddrModeD);
2495 set_ia32_ls_mode(new_node, mode_Iu);
2496 set_address(new_node, &addr);
2497 SET_IA32_ORIG_NODE(new_node, node);
2500 ins[i++] = new_node;
2505 } while (size != 0);
2508 return new_rd_Sync(dbgi, new_block, i, ins);
2515 * Generate a vfist or vfisttp instruction.
2517 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2518 ir_node *mem, ir_node *val, ir_node **fist)
2522 if (ia32_cg_config.use_fisttp) {
2523 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2524 if other users exists */
2525 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2526 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2527 be_new_Keep(block, 1, &value);
2529 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2532 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2535 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2541 * Transforms a general (no special case) Store.
2543 * @return the created ia32 Store node
2545 static ir_node *gen_general_Store(ir_node *node)
2547 ir_node *val = get_Store_value(node);
2548 ir_mode *mode = get_irn_mode(val);
2549 ir_node *block = get_nodes_block(node);
2550 ir_node *new_block = be_transform_node(block);
2551 ir_node *ptr = get_Store_ptr(node);
2552 ir_node *mem = get_Store_mem(node);
2553 dbg_info *dbgi = get_irn_dbg_info(node);
2554 ir_node *new_val, *new_node, *store;
2555 ia32_address_t addr;
2557 /* check for destination address mode */
2558 new_node = try_create_dest_am(node);
2559 if (new_node != NULL)
2562 /* construct store address */
2563 memset(&addr, 0, sizeof(addr));
2564 ia32_create_address_mode(&addr, ptr, 0);
2566 if (addr.base == NULL) {
2567 addr.base = noreg_GP;
2569 addr.base = be_transform_node(addr.base);
2572 if (addr.index == NULL) {
2573 addr.index = noreg_GP;
2575 addr.index = be_transform_node(addr.index);
2577 addr.mem = be_transform_node(mem);
2579 if (mode_is_float(mode)) {
2580 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2582 while (is_Conv(val) && mode == get_irn_mode(val)) {
2583 ir_node *op = get_Conv_op(val);
2584 if (!mode_is_float(get_irn_mode(op)))
2588 new_val = be_transform_node(val);
2589 if (ia32_cg_config.use_sse2) {
2590 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2591 addr.index, addr.mem, new_val);
2593 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2594 addr.index, addr.mem, new_val, mode);
2597 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2598 val = get_Conv_op(val);
2600 /* TODO: is this optimisation still necessary at all (middleend)? */
2601 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2602 while (is_Conv(val)) {
2603 ir_node *op = get_Conv_op(val);
2604 if (!mode_is_float(get_irn_mode(op)))
2606 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2610 new_val = be_transform_node(val);
2611 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2613 new_val = create_immediate_or_transform(val, 0);
2614 assert(mode != mode_b);
2616 if (get_mode_size_bits(mode) == 8) {
2617 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2618 addr.index, addr.mem, new_val);
2620 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2621 addr.index, addr.mem, new_val);
2626 set_irn_pinned(store, get_irn_pinned(node));
2627 set_ia32_op_type(store, ia32_AddrModeD);
2628 set_ia32_ls_mode(store, mode);
2630 set_address(store, &addr);
2631 SET_IA32_ORIG_NODE(store, node);
2637 * Transforms a Store.
2639 * @return the created ia32 Store node
2641 static ir_node *gen_Store(ir_node *node)
2643 ir_node *val = get_Store_value(node);
2644 ir_mode *mode = get_irn_mode(val);
2646 if (mode_is_float(mode) && is_Const(val)) {
2647 /* We can transform every floating const store
2648 into a sequence of integer stores.
2649 If the constant is already in a register,
2650 it would be better to use it, but we don't
2651 have this information here. */
2652 return gen_float_const_Store(node, val);
2654 return gen_general_Store(node);
2658 * Transforms a Switch.
2660 * @return the created ia32 SwitchJmp node
2662 static ir_node *create_Switch(ir_node *node)
2664 dbg_info *dbgi = get_irn_dbg_info(node);
2665 ir_node *block = be_transform_node(get_nodes_block(node));
2666 ir_node *sel = get_Cond_selector(node);
2667 ir_node *new_sel = be_transform_node(sel);
2668 long switch_min = LONG_MAX;
2669 long switch_max = LONG_MIN;
2670 long default_pn = get_Cond_default_proj(node);
2672 const ir_edge_t *edge;
2674 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2676 /* determine the smallest switch case value */
2677 foreach_out_edge(node, edge) {
2678 ir_node *proj = get_edge_src_irn(edge);
2679 long pn = get_Proj_proj(proj);
2680 if (pn == default_pn)
2683 if (pn < switch_min)
2685 if (pn > switch_max)
2689 if ((unsigned long) (switch_max - switch_min) > 128000) {
2690 panic("Size of switch %+F bigger than 128000", node);
2693 if (switch_min != 0) {
2694 /* if smallest switch case is not 0 we need an additional sub */
2695 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2696 add_ia32_am_offs_int(new_sel, -switch_min);
2697 set_ia32_op_type(new_sel, ia32_AddrModeS);
2699 SET_IA32_ORIG_NODE(new_sel, node);
2702 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2703 SET_IA32_ORIG_NODE(new_node, node);
2709 * Transform a Cond node.
2711 static ir_node *gen_Cond(ir_node *node)
2713 ir_node *block = get_nodes_block(node);
2714 ir_node *new_block = be_transform_node(block);
2715 dbg_info *dbgi = get_irn_dbg_info(node);
2716 ir_node *sel = get_Cond_selector(node);
2717 ir_mode *sel_mode = get_irn_mode(sel);
2718 ir_node *flags = NULL;
2722 if (sel_mode != mode_b) {
2723 return create_Switch(node);
2726 /* we get flags from a Cmp */
2727 flags = get_flags_node(sel, &pnc);
2729 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2730 SET_IA32_ORIG_NODE(new_node, node);
2736 * Transform a be_Copy.
2738 static ir_node *gen_be_Copy(ir_node *node)
2740 ir_node *new_node = be_duplicate_node(node);
2741 ir_mode *mode = get_irn_mode(new_node);
2743 if (ia32_mode_needs_gp_reg(mode)) {
2744 set_irn_mode(new_node, mode_Iu);
2750 static ir_node *create_Fucom(ir_node *node)
2752 dbg_info *dbgi = get_irn_dbg_info(node);
2753 ir_node *block = get_nodes_block(node);
2754 ir_node *new_block = be_transform_node(block);
2755 ir_node *left = get_Cmp_left(node);
2756 ir_node *new_left = be_transform_node(left);
2757 ir_node *right = get_Cmp_right(node);
2761 if (ia32_cg_config.use_fucomi) {
2762 new_right = be_transform_node(right);
2763 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2765 set_ia32_commutative(new_node);
2766 SET_IA32_ORIG_NODE(new_node, node);
2768 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2769 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2771 new_right = be_transform_node(right);
2772 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2775 set_ia32_commutative(new_node);
2777 SET_IA32_ORIG_NODE(new_node, node);
2779 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2780 SET_IA32_ORIG_NODE(new_node, node);
2786 static ir_node *create_Ucomi(ir_node *node)
2788 dbg_info *dbgi = get_irn_dbg_info(node);
2789 ir_node *src_block = get_nodes_block(node);
2790 ir_node *new_block = be_transform_node(src_block);
2791 ir_node *left = get_Cmp_left(node);
2792 ir_node *right = get_Cmp_right(node);
2794 ia32_address_mode_t am;
2795 ia32_address_t *addr = &am.addr;
2797 match_arguments(&am, src_block, left, right, NULL,
2798 match_commutative | match_am);
2800 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2801 addr->mem, am.new_op1, am.new_op2,
2803 set_am_attributes(new_node, &am);
2805 SET_IA32_ORIG_NODE(new_node, node);
2807 new_node = fix_mem_proj(new_node, &am);
2813 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2814 * to fold an and into a test node
2816 static bool can_fold_test_and(ir_node *node)
2818 const ir_edge_t *edge;
2820 /** we can only have eq and lg projs */
2821 foreach_out_edge(node, edge) {
2822 ir_node *proj = get_edge_src_irn(edge);
2823 pn_Cmp pnc = get_Proj_proj(proj);
2824 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2832 * returns true if it is assured, that the upper bits of a node are "clean"
2833 * which means for a 16 or 8 bit value, that the upper bits in the register
2834 * are 0 for unsigned and a copy of the last significant bit for signed
2837 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2839 assert(ia32_mode_needs_gp_reg(mode));
2840 if (get_mode_size_bits(mode) >= 32)
2843 if (is_Proj(transformed_node))
2844 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2846 switch (get_ia32_irn_opcode(transformed_node)) {
2847 case iro_ia32_Conv_I2I:
2848 case iro_ia32_Conv_I2I8Bit: {
2849 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2850 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2852 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2859 if (mode_is_signed(mode)) {
2860 return false; /* TODO handle signed modes */
2862 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2863 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2864 const ia32_immediate_attr_t *attr
2865 = get_ia32_immediate_attr_const(right);
2866 if (attr->symconst == 0 &&
2867 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2871 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2875 /* TODO too conservative if shift amount is constant */
2876 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2879 if (!mode_is_signed(mode)) {
2881 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2882 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2884 /* TODO if one is known to be zero extended, then || is sufficient */
2889 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2890 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2892 case iro_ia32_Const:
2893 case iro_ia32_Immediate: {
2894 const ia32_immediate_attr_t *attr =
2895 get_ia32_immediate_attr_const(transformed_node);
2896 if (mode_is_signed(mode)) {
2897 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2898 return shifted == 0 || shifted == -1;
2900 unsigned long shifted = (unsigned long)attr->offset;
2901 shifted >>= get_mode_size_bits(mode);
2902 return shifted == 0;
2912 * Generate code for a Cmp.
2914 static ir_node *gen_Cmp(ir_node *node)
2916 dbg_info *dbgi = get_irn_dbg_info(node);
2917 ir_node *block = get_nodes_block(node);
2918 ir_node *new_block = be_transform_node(block);
2919 ir_node *left = get_Cmp_left(node);
2920 ir_node *right = get_Cmp_right(node);
2921 ir_mode *cmp_mode = get_irn_mode(left);
2923 ia32_address_mode_t am;
2924 ia32_address_t *addr = &am.addr;
2927 if (mode_is_float(cmp_mode)) {
2928 if (ia32_cg_config.use_sse2) {
2929 return create_Ucomi(node);
2931 return create_Fucom(node);
2935 assert(ia32_mode_needs_gp_reg(cmp_mode));
2937 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2938 cmp_unsigned = !mode_is_signed(cmp_mode);
2939 if (is_Const_0(right) &&
2941 get_irn_n_edges(left) == 1 &&
2942 can_fold_test_and(node)) {
2943 /* Test(and_left, and_right) */
2944 ir_node *and_left = get_And_left(left);
2945 ir_node *and_right = get_And_right(left);
2947 /* matze: code here used mode instead of cmd_mode, I think it is always
2948 * the same as cmp_mode, but I leave this here to see if this is really
2951 assert(get_irn_mode(and_left) == cmp_mode);
2953 match_arguments(&am, block, and_left, and_right, NULL,
2955 match_am | match_8bit_am | match_16bit_am |
2956 match_am_and_immediates | match_immediate);
2958 /* use 32bit compare mode if possible since the opcode is smaller */
2959 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2960 upper_bits_clean(am.new_op2, cmp_mode)) {
2961 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2964 if (get_mode_size_bits(cmp_mode) == 8) {
2965 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2966 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2969 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2970 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2973 /* Cmp(left, right) */
2974 match_arguments(&am, block, left, right, NULL,
2975 match_commutative | match_am | match_8bit_am |
2976 match_16bit_am | match_am_and_immediates |
2978 /* use 32bit compare mode if possible since the opcode is smaller */
2979 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2980 upper_bits_clean(am.new_op2, cmp_mode)) {
2981 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2984 if (get_mode_size_bits(cmp_mode) == 8) {
2985 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2986 addr->index, addr->mem, am.new_op1,
2987 am.new_op2, am.ins_permuted,
2990 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2991 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2994 set_am_attributes(new_node, &am);
2995 set_ia32_ls_mode(new_node, cmp_mode);
2997 SET_IA32_ORIG_NODE(new_node, node);
2999 new_node = fix_mem_proj(new_node, &am);
3004 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3007 dbg_info *dbgi = get_irn_dbg_info(node);
3008 ir_node *block = get_nodes_block(node);
3009 ir_node *new_block = be_transform_node(block);
3010 ir_node *val_true = get_Mux_true(node);
3011 ir_node *val_false = get_Mux_false(node);
3013 ia32_address_mode_t am;
3014 ia32_address_t *addr;
3016 assert(ia32_cg_config.use_cmov);
3017 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3021 match_arguments(&am, block, val_false, val_true, flags,
3022 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3024 if (am.ins_permuted)
3025 pnc = ia32_get_negated_pnc(pnc);
3027 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3028 addr->mem, am.new_op1, am.new_op2, new_flags,
3030 set_am_attributes(new_node, &am);
3032 SET_IA32_ORIG_NODE(new_node, node);
3034 new_node = fix_mem_proj(new_node, &am);
3040 * Creates a ia32 Setcc instruction.
3042 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3043 ir_node *flags, pn_Cmp pnc,
3046 ir_mode *mode = get_irn_mode(orig_node);
3049 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3050 SET_IA32_ORIG_NODE(new_node, orig_node);
3052 /* we might need to conv the result up */
3053 if (get_mode_size_bits(mode) > 8) {
3054 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3055 nomem, new_node, mode_Bu);
3056 SET_IA32_ORIG_NODE(new_node, orig_node);
3063 * Create instruction for an unsigned Difference or Zero.
3065 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3067 ir_mode *mode = get_irn_mode(psi);
3077 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3078 match_mode_neutral | match_am | match_immediate | match_two_users);
3080 block = get_nodes_block(new_node);
3082 if (is_Proj(new_node)) {
3083 sub = get_Proj_pred(new_node);
3084 assert(is_ia32_Sub(sub));
3087 set_irn_mode(sub, mode_T);
3088 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3090 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3092 dbgi = get_irn_dbg_info(psi);
3093 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3094 not = new_bd_ia32_Not(dbgi, block, sbb);
3096 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3097 set_ia32_commutative(new_node);
3102 * Create an const array of two float consts.
3104 * @param c0 the first constant
3105 * @param c1 the second constant
3106 * @param new_mode IN/OUT for the mode of the constants, if NULL
3107 * smallest possible mode will be used
3109 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3112 ir_mode *mode = *new_mode;
3114 ir_initializer_t *initializer;
3115 tarval *tv0 = get_Const_tarval(c0);
3116 tarval *tv1 = get_Const_tarval(c1);
3119 /* detect the best mode for the constants */
3120 mode = get_tarval_mode(tv0);
3122 if (mode != mode_F) {
3123 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3124 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3126 tv0 = tarval_convert_to(tv0, mode);
3127 tv1 = tarval_convert_to(tv1, mode);
3128 } else if (mode != mode_D) {
3129 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3130 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3132 tv0 = tarval_convert_to(tv0, mode);
3133 tv1 = tarval_convert_to(tv1, mode);
3140 tp = ia32_create_float_type(mode, 4);
3141 tp = ia32_create_float_array(tp);
3143 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3145 set_entity_ld_ident(ent, get_entity_ident(ent));
3146 set_entity_visibility(ent, ir_visibility_local);
3147 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3149 initializer = create_initializer_compound(2);
3151 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3152 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3154 set_entity_initializer(ent, initializer);
3161 * Possible transformations for creating a Setcc.
3163 enum setcc_transform_insn {
3176 typedef struct setcc_transform {
3178 unsigned permutate_cmp_ins;
3181 enum setcc_transform_insn transform;
3185 } setcc_transform_t;
3188 * Setcc can only handle 0 and 1 result.
3189 * Find a transformation that creates 0 and 1 from
3192 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f,
3193 setcc_transform_t *res)
3198 res->permutate_cmp_ins = 0;
3200 if (tarval_is_null(t)) {
3204 pnc = ia32_get_negated_pnc(pnc);
3205 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3206 // now, t is the bigger one
3210 pnc = ia32_get_negated_pnc(pnc);
3214 if (! tarval_is_null(f)) {
3215 tarval *t_sub = tarval_sub(t, f, NULL);
3218 res->steps[step].transform = SETCC_TR_ADD;
3220 if (t == tarval_bad)
3221 panic("constant subtract failed");
3222 if (! tarval_is_long(f))
3223 panic("tarval is not long");
3225 res->steps[step].val = get_tarval_long(f);
3227 f = tarval_sub(f, f, NULL);
3228 assert(tarval_is_null(f));
3231 if (tarval_is_one(t)) {
3232 res->steps[step].transform = SETCC_TR_SET;
3233 res->num_steps = ++step;
3237 if (tarval_is_minus_one(t)) {
3238 res->steps[step].transform = SETCC_TR_NEG;
3240 res->steps[step].transform = SETCC_TR_SET;
3241 res->num_steps = ++step;
3244 if (tarval_is_long(t)) {
3245 long v = get_tarval_long(t);
3247 res->steps[step].val = 0;
3250 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3252 res->steps[step].transform = SETCC_TR_LEAxx;
3253 res->steps[step].scale = 3; /* (a << 3) + a */
3256 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3258 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3259 res->steps[step].scale = 3; /* (a << 3) */
3262 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3264 res->steps[step].transform = SETCC_TR_LEAxx;
3265 res->steps[step].scale = 2; /* (a << 2) + a */
3268 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3270 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3271 res->steps[step].scale = 2; /* (a << 2) */
3274 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3276 res->steps[step].transform = SETCC_TR_LEAxx;
3277 res->steps[step].scale = 1; /* (a << 1) + a */
3280 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3282 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3283 res->steps[step].scale = 1; /* (a << 1) */
3286 res->num_steps = step;
3289 if (! tarval_is_single_bit(t)) {
3290 res->steps[step].transform = SETCC_TR_AND;
3291 res->steps[step].val = v;
3293 res->steps[step].transform = SETCC_TR_NEG;
3295 int v = get_tarval_lowest_bit(t);
3298 res->steps[step].transform = SETCC_TR_SHL;
3299 res->steps[step].scale = v;
3303 res->steps[step].transform = SETCC_TR_SET;
3304 res->num_steps = ++step;
3307 panic("tarval is not long");
3311 * Transforms a Mux node into some code sequence.
3313 * @return The transformed node.
3315 static ir_node *gen_Mux(ir_node *node)
3317 dbg_info *dbgi = get_irn_dbg_info(node);
3318 ir_node *block = get_nodes_block(node);
3319 ir_node *new_block = be_transform_node(block);
3320 ir_node *mux_true = get_Mux_true(node);
3321 ir_node *mux_false = get_Mux_false(node);
3322 ir_node *cond = get_Mux_sel(node);
3323 ir_mode *mode = get_irn_mode(node);
3328 assert(get_irn_mode(cond) == mode_b);
3330 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3331 if (mode_is_float(mode)) {
3332 ir_node *cmp = get_Proj_pred(cond);
3333 ir_node *cmp_left = get_Cmp_left(cmp);
3334 ir_node *cmp_right = get_Cmp_right(cmp);
3335 pn_Cmp pnc = get_Proj_proj(cond);
3337 if (ia32_cg_config.use_sse2) {
3338 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3339 if (cmp_left == mux_true && cmp_right == mux_false) {
3340 /* Mux(a <= b, a, b) => MIN */
3341 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3342 match_commutative | match_am | match_two_users);
3343 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3344 /* Mux(a <= b, b, a) => MAX */
3345 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3346 match_commutative | match_am | match_two_users);
3348 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3349 if (cmp_left == mux_true && cmp_right == mux_false) {
3350 /* Mux(a >= b, a, b) => MAX */
3351 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3352 match_commutative | match_am | match_two_users);
3353 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3354 /* Mux(a >= b, b, a) => MIN */
3355 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3356 match_commutative | match_am | match_two_users);
3361 if (is_Const(mux_true) && is_Const(mux_false)) {
3362 ia32_address_mode_t am;
3367 flags = get_flags_node(cond, &pnc);
3368 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3370 if (ia32_cg_config.use_sse2) {
3371 /* cannot load from different mode on SSE */
3374 /* x87 can load any mode */
3378 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3380 switch (get_mode_size_bytes(new_mode)) {
3390 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3391 set_ia32_am_scale(new_node, 2);
3396 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3397 set_ia32_am_scale(new_node, 1);
3400 /* arg, shift 16 NOT supported */
3402 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3405 panic("Unsupported constant size");
3408 am.ls_mode = new_mode;
3409 am.addr.base = get_symconst_base();
3410 am.addr.index = new_node;
3411 am.addr.mem = nomem;
3413 am.addr.scale = scale;
3414 am.addr.use_frame = 0;
3415 am.addr.frame_entity = NULL;
3416 am.addr.symconst_sign = 0;
3417 am.mem_proj = am.addr.mem;
3418 am.op_type = ia32_AddrModeS;
3421 am.pinned = op_pin_state_floats;
3423 am.ins_permuted = 0;
3425 if (ia32_cg_config.use_sse2)
3426 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3428 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3429 set_am_attributes(load, &am);
3431 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3433 panic("cannot transform floating point Mux");
3436 assert(ia32_mode_needs_gp_reg(mode));
3438 if (is_Proj(cond)) {
3439 ir_node *cmp = get_Proj_pred(cond);
3441 ir_node *cmp_left = get_Cmp_left(cmp);
3442 ir_node *cmp_right = get_Cmp_right(cmp);
3443 pn_Cmp pnc = get_Proj_proj(cond);
3445 /* check for unsigned Doz first */
3446 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3447 is_Const_0(mux_false) && is_Sub(mux_true) &&
3448 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3449 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3450 return create_doz(node, cmp_left, cmp_right);
3451 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3452 is_Const_0(mux_true) && is_Sub(mux_false) &&
3453 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3454 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3455 return create_doz(node, cmp_left, cmp_right);
3460 flags = get_flags_node(cond, &pnc);
3462 if (is_Const(mux_true) && is_Const(mux_false)) {
3463 /* both are const, good */
3464 tarval *tv_true = get_Const_tarval(mux_true);
3465 tarval *tv_false = get_Const_tarval(mux_false);
3466 setcc_transform_t res;
3469 find_const_transform(pnc, tv_true, tv_false, &res);
3471 if (res.permutate_cmp_ins) {
3472 ia32_attr_t *attr = get_ia32_attr(flags);
3473 attr->data.ins_permuted ^= 1;
3475 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3478 switch (res.steps[step].transform) {
3480 imm = ia32_immediate_from_long(res.steps[step].val);
3481 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3483 case SETCC_TR_ADDxx:
3484 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3487 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3488 set_ia32_am_scale(new_node, res.steps[step].scale);
3489 set_ia32_am_offs_int(new_node, res.steps[step].val);
3491 case SETCC_TR_LEAxx:
3492 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3493 set_ia32_am_scale(new_node, res.steps[step].scale);
3494 set_ia32_am_offs_int(new_node, res.steps[step].val);
3497 imm = ia32_immediate_from_long(res.steps[step].scale);
3498 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3501 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3504 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3507 imm = ia32_immediate_from_long(res.steps[step].val);
3508 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3511 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, new_node);
3514 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3517 panic("unknown setcc transform");
3521 new_node = create_CMov(node, cond, flags, pnc);
3529 * Create a conversion from x87 state register to general purpose.
3531 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3533 ir_node *block = be_transform_node(get_nodes_block(node));
3534 ir_node *op = get_Conv_op(node);
3535 ir_node *new_op = be_transform_node(op);
3536 ir_graph *irg = current_ir_graph;
3537 dbg_info *dbgi = get_irn_dbg_info(node);
3538 ir_mode *mode = get_irn_mode(node);
3539 ir_node *fist, *load, *mem;
3541 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3542 set_irn_pinned(fist, op_pin_state_floats);
3543 set_ia32_use_frame(fist);
3544 set_ia32_op_type(fist, ia32_AddrModeD);
3546 assert(get_mode_size_bits(mode) <= 32);
3547 /* exception we can only store signed 32 bit integers, so for unsigned
3548 we store a 64bit (signed) integer and load the lower bits */
3549 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3550 set_ia32_ls_mode(fist, mode_Ls);
3552 set_ia32_ls_mode(fist, mode_Is);
3554 SET_IA32_ORIG_NODE(fist, node);
3557 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3559 set_irn_pinned(load, op_pin_state_floats);
3560 set_ia32_use_frame(load);
3561 set_ia32_op_type(load, ia32_AddrModeS);
3562 set_ia32_ls_mode(load, mode_Is);
3563 if (get_ia32_ls_mode(fist) == mode_Ls) {
3564 ia32_attr_t *attr = get_ia32_attr(load);
3565 attr->data.need_64bit_stackent = 1;
3567 ia32_attr_t *attr = get_ia32_attr(load);
3568 attr->data.need_32bit_stackent = 1;
3570 SET_IA32_ORIG_NODE(load, node);
3572 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3576 * Creates a x87 strict Conv by placing a Store and a Load
3578 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3580 ir_node *block = get_nodes_block(node);
3581 ir_graph *irg = get_Block_irg(block);
3582 dbg_info *dbgi = get_irn_dbg_info(node);
3583 ir_node *frame = get_irg_frame(irg);
3584 ir_node *store, *load;
3587 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3588 set_ia32_use_frame(store);
3589 set_ia32_op_type(store, ia32_AddrModeD);
3590 SET_IA32_ORIG_NODE(store, node);
3592 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3593 set_ia32_use_frame(load);
3594 set_ia32_op_type(load, ia32_AddrModeS);
3595 SET_IA32_ORIG_NODE(load, node);
3597 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3601 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3602 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3604 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3606 func = get_mode_size_bits(mode) == 8 ?
3607 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3608 return func(dbgi, block, base, index, mem, val, mode);
3612 * Create a conversion from general purpose to x87 register
3614 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3616 ir_node *src_block = get_nodes_block(node);
3617 ir_node *block = be_transform_node(src_block);
3618 ir_graph *irg = get_Block_irg(block);
3619 dbg_info *dbgi = get_irn_dbg_info(node);
3620 ir_node *op = get_Conv_op(node);
3621 ir_node *new_op = NULL;
3623 ir_mode *store_mode;
3628 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3629 if (possible_int_mode_for_fp(src_mode)) {
3630 ia32_address_mode_t am;
3632 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3633 if (am.op_type == ia32_AddrModeS) {
3634 ia32_address_t *addr = &am.addr;
3636 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3637 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3639 set_am_attributes(fild, &am);
3640 SET_IA32_ORIG_NODE(fild, node);
3642 fix_mem_proj(fild, &am);
3647 if (new_op == NULL) {
3648 new_op = be_transform_node(op);
3651 mode = get_irn_mode(op);
3653 /* first convert to 32 bit signed if necessary */
3654 if (get_mode_size_bits(src_mode) < 32) {
3655 if (!upper_bits_clean(new_op, src_mode)) {
3656 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3657 SET_IA32_ORIG_NODE(new_op, node);
3662 assert(get_mode_size_bits(mode) == 32);
3665 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3667 set_ia32_use_frame(store);
3668 set_ia32_op_type(store, ia32_AddrModeD);
3669 set_ia32_ls_mode(store, mode_Iu);
3671 /* exception for 32bit unsigned, do a 64bit spill+load */
3672 if (!mode_is_signed(mode)) {
3675 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3677 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3678 noreg_GP, nomem, zero_const);
3680 set_ia32_use_frame(zero_store);
3681 set_ia32_op_type(zero_store, ia32_AddrModeD);
3682 add_ia32_am_offs_int(zero_store, 4);
3683 set_ia32_ls_mode(zero_store, mode_Iu);
3688 store = new_rd_Sync(dbgi, block, 2, in);
3689 store_mode = mode_Ls;
3691 store_mode = mode_Is;
3695 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3697 set_ia32_use_frame(fild);
3698 set_ia32_op_type(fild, ia32_AddrModeS);
3699 set_ia32_ls_mode(fild, store_mode);
3701 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3707 * Create a conversion from one integer mode into another one
3709 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3710 dbg_info *dbgi, ir_node *block, ir_node *op,
3713 ir_node *new_block = be_transform_node(block);
3715 ir_mode *smaller_mode;
3716 ia32_address_mode_t am;
3717 ia32_address_t *addr = &am.addr;
3720 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3721 smaller_mode = src_mode;
3723 smaller_mode = tgt_mode;
3726 #ifdef DEBUG_libfirm
3728 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3733 match_arguments(&am, block, NULL, op, NULL,
3734 match_am | match_8bit_am | match_16bit_am);
3736 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3737 /* unnecessary conv. in theory it shouldn't have been AM */
3738 assert(is_ia32_NoReg_GP(addr->base));
3739 assert(is_ia32_NoReg_GP(addr->index));
3740 assert(is_NoMem(addr->mem));
3741 assert(am.addr.offset == 0);
3742 assert(am.addr.symconst_ent == NULL);
3746 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3747 addr->mem, am.new_op2, smaller_mode);
3748 set_am_attributes(new_node, &am);
3749 /* match_arguments assume that out-mode = in-mode, this isn't true here
3751 set_ia32_ls_mode(new_node, smaller_mode);
3752 SET_IA32_ORIG_NODE(new_node, node);
3753 new_node = fix_mem_proj(new_node, &am);
3758 * Transforms a Conv node.
3760 * @return The created ia32 Conv node
3762 static ir_node *gen_Conv(ir_node *node)
3764 ir_node *block = get_nodes_block(node);
3765 ir_node *new_block = be_transform_node(block);
3766 ir_node *op = get_Conv_op(node);
3767 ir_node *new_op = NULL;
3768 dbg_info *dbgi = get_irn_dbg_info(node);
3769 ir_mode *src_mode = get_irn_mode(op);
3770 ir_mode *tgt_mode = get_irn_mode(node);
3771 int src_bits = get_mode_size_bits(src_mode);
3772 int tgt_bits = get_mode_size_bits(tgt_mode);
3773 ir_node *res = NULL;
3775 assert(!mode_is_int(src_mode) || src_bits <= 32);
3776 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3778 /* modeB -> X should already be lowered by the lower_mode_b pass */
3779 if (src_mode == mode_b) {
3780 panic("ConvB not lowered %+F", node);
3783 if (src_mode == tgt_mode) {
3784 if (get_Conv_strict(node)) {
3785 if (ia32_cg_config.use_sse2) {
3786 /* when we are in SSE mode, we can kill all strict no-op conversion */
3787 return be_transform_node(op);
3790 /* this should be optimized already, but who knows... */
3791 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3792 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3793 return be_transform_node(op);
3797 if (mode_is_float(src_mode)) {
3798 new_op = be_transform_node(op);
3799 /* we convert from float ... */
3800 if (mode_is_float(tgt_mode)) {
3802 if (ia32_cg_config.use_sse2) {
3803 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3804 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3806 set_ia32_ls_mode(res, tgt_mode);
3808 if (get_Conv_strict(node)) {
3809 /* if fp_no_float_fold is not set then we assume that we
3810 * don't have any float operations in a non
3811 * mode_float_arithmetic mode and can skip strict upconvs */
3812 if (src_bits < tgt_bits
3813 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3814 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3817 res = gen_x87_strict_conv(tgt_mode, new_op);
3818 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3822 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3827 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3828 if (ia32_cg_config.use_sse2) {
3829 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3831 set_ia32_ls_mode(res, src_mode);
3833 return gen_x87_fp_to_gp(node);
3837 /* we convert from int ... */
3838 if (mode_is_float(tgt_mode)) {
3840 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3841 if (ia32_cg_config.use_sse2) {
3842 new_op = be_transform_node(op);
3843 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3845 set_ia32_ls_mode(res, tgt_mode);
3847 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3848 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3849 res = gen_x87_gp_to_fp(node, src_mode);
3851 /* we need a strict-Conv, if the int mode has more bits than the
3853 if (float_mantissa < int_mantissa) {
3854 res = gen_x87_strict_conv(tgt_mode, res);
3855 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3859 } else if (tgt_mode == mode_b) {
3860 /* mode_b lowering already took care that we only have 0/1 values */
3861 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3862 src_mode, tgt_mode));
3863 return be_transform_node(op);
3866 if (src_bits == tgt_bits) {
3867 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3868 src_mode, tgt_mode));
3869 return be_transform_node(op);
3872 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3880 static ir_node *create_immediate_or_transform(ir_node *node,
3881 char immediate_constraint_type)
3883 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3884 if (new_node == NULL) {
3885 new_node = be_transform_node(node);
3891 * Transforms a FrameAddr into an ia32 Add.
3893 static ir_node *gen_be_FrameAddr(ir_node *node)
3895 ir_node *block = be_transform_node(get_nodes_block(node));
3896 ir_node *op = be_get_FrameAddr_frame(node);
3897 ir_node *new_op = be_transform_node(op);
3898 dbg_info *dbgi = get_irn_dbg_info(node);
3901 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3902 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3903 set_ia32_use_frame(new_node);
3905 SET_IA32_ORIG_NODE(new_node, node);
3911 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3913 static ir_node *gen_be_Return(ir_node *node)
3915 ir_graph *irg = current_ir_graph;
3916 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3917 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3918 ir_entity *ent = get_irg_entity(irg);
3919 ir_type *tp = get_entity_type(ent);
3924 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3925 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3927 int pn_ret_val, pn_ret_mem, arity, i;
3929 assert(ret_val != NULL);
3930 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3931 return be_duplicate_node(node);
3934 res_type = get_method_res_type(tp, 0);
3936 if (! is_Primitive_type(res_type)) {
3937 return be_duplicate_node(node);
3940 mode = get_type_mode(res_type);
3941 if (! mode_is_float(mode)) {
3942 return be_duplicate_node(node);
3945 assert(get_method_n_ress(tp) == 1);
3947 pn_ret_val = get_Proj_proj(ret_val);
3948 pn_ret_mem = get_Proj_proj(ret_mem);
3950 /* get the Barrier */
3951 barrier = get_Proj_pred(ret_val);
3953 /* get result input of the Barrier */
3954 ret_val = get_irn_n(barrier, pn_ret_val);
3955 new_ret_val = be_transform_node(ret_val);
3957 /* get memory input of the Barrier */
3958 ret_mem = get_irn_n(barrier, pn_ret_mem);
3959 new_ret_mem = be_transform_node(ret_mem);
3961 frame = get_irg_frame(irg);
3963 dbgi = get_irn_dbg_info(barrier);
3964 block = be_transform_node(get_nodes_block(barrier));
3966 /* store xmm0 onto stack */
3967 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3968 new_ret_mem, new_ret_val);
3969 set_ia32_ls_mode(sse_store, mode);
3970 set_ia32_op_type(sse_store, ia32_AddrModeD);
3971 set_ia32_use_frame(sse_store);
3973 /* load into x87 register */
3974 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3975 set_ia32_op_type(fld, ia32_AddrModeS);
3976 set_ia32_use_frame(fld);
3978 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3979 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3981 /* create a new barrier */
3982 arity = get_irn_arity(barrier);
3983 in = ALLOCAN(ir_node*, arity);
3984 for (i = 0; i < arity; ++i) {
3987 if (i == pn_ret_val) {
3989 } else if (i == pn_ret_mem) {
3992 ir_node *in = get_irn_n(barrier, i);
3993 new_in = be_transform_node(in);
3998 new_barrier = new_ir_node(dbgi, irg, block,
3999 get_irn_op(barrier), get_irn_mode(barrier),
4001 copy_node_attr(irg, barrier, new_barrier);
4002 be_duplicate_deps(barrier, new_barrier);
4003 be_set_transformed_node(barrier, new_barrier);
4005 /* transform normally */
4006 return be_duplicate_node(node);
4010 * Transform a be_AddSP into an ia32_SubSP.
4012 static ir_node *gen_be_AddSP(ir_node *node)
4014 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4015 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4017 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4018 match_am | match_immediate);
4022 * Transform a be_SubSP into an ia32_AddSP
4024 static ir_node *gen_be_SubSP(ir_node *node)
4026 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4027 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4029 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4030 match_am | match_immediate);
4034 * Change some phi modes
4036 static ir_node *gen_Phi(ir_node *node)
4038 const arch_register_req_t *req;
4039 ir_node *block = be_transform_node(get_nodes_block(node));
4040 ir_graph *irg = current_ir_graph;
4041 dbg_info *dbgi = get_irn_dbg_info(node);
4042 ir_mode *mode = get_irn_mode(node);
4045 if (ia32_mode_needs_gp_reg(mode)) {
4046 /* we shouldn't have any 64bit stuff around anymore */
4047 assert(get_mode_size_bits(mode) <= 32);
4048 /* all integer operations are on 32bit registers now */
4050 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4051 } else if (mode_is_float(mode)) {
4052 if (ia32_cg_config.use_sse2) {
4054 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4057 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4060 req = arch_no_register_req;
4063 /* phi nodes allow loops, so we use the old arguments for now
4064 * and fix this later */
4065 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4066 get_irn_in(node) + 1);
4067 copy_node_attr(irg, node, phi);
4068 be_duplicate_deps(node, phi);
4070 arch_set_out_register_req(phi, 0, req);
4072 be_enqueue_preds(node);
4077 static ir_node *gen_Jmp(ir_node *node)
4079 ir_node *block = get_nodes_block(node);
4080 ir_node *new_block = be_transform_node(block);
4081 dbg_info *dbgi = get_irn_dbg_info(node);
4084 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4085 SET_IA32_ORIG_NODE(new_node, node);
4093 static ir_node *gen_IJmp(ir_node *node)
4095 ir_node *block = get_nodes_block(node);
4096 ir_node *new_block = be_transform_node(block);
4097 dbg_info *dbgi = get_irn_dbg_info(node);
4098 ir_node *op = get_IJmp_target(node);
4100 ia32_address_mode_t am;
4101 ia32_address_t *addr = &am.addr;
4103 assert(get_irn_mode(op) == mode_P);
4105 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4107 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4108 addr->mem, am.new_op2);
4109 set_am_attributes(new_node, &am);
4110 SET_IA32_ORIG_NODE(new_node, node);
4112 new_node = fix_mem_proj(new_node, &am);
4118 * Transform a Bound node.
4120 static ir_node *gen_Bound(ir_node *node)
4123 ir_node *lower = get_Bound_lower(node);
4124 dbg_info *dbgi = get_irn_dbg_info(node);
4126 if (is_Const_0(lower)) {
4127 /* typical case for Java */
4128 ir_node *sub, *res, *flags, *block;
4130 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4131 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
4133 block = get_nodes_block(res);
4134 if (! is_Proj(res)) {
4136 set_irn_mode(sub, mode_T);
4137 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4139 sub = get_Proj_pred(res);
4141 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4142 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4143 SET_IA32_ORIG_NODE(new_node, node);
4145 panic("generic Bound not supported in ia32 Backend");
4151 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4153 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4154 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4156 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4157 match_immediate | match_mode_neutral);
4160 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4162 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4163 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4164 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4168 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4170 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4171 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4172 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4176 static ir_node *gen_ia32_l_Add(ir_node *node)
4178 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4179 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4180 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4181 match_commutative | match_am | match_immediate |
4182 match_mode_neutral);
4184 if (is_Proj(lowered)) {
4185 lowered = get_Proj_pred(lowered);
4187 assert(is_ia32_Add(lowered));
4188 set_irn_mode(lowered, mode_T);
4194 static ir_node *gen_ia32_l_Adc(ir_node *node)
4196 return gen_binop_flags(node, new_bd_ia32_Adc,
4197 match_commutative | match_am | match_immediate |
4198 match_mode_neutral);
4202 * Transforms a l_MulS into a "real" MulS node.
4204 * @return the created ia32 Mul node
4206 static ir_node *gen_ia32_l_Mul(ir_node *node)
4208 ir_node *left = get_binop_left(node);
4209 ir_node *right = get_binop_right(node);
4211 return gen_binop(node, left, right, new_bd_ia32_Mul,
4212 match_commutative | match_am | match_mode_neutral);
4216 * Transforms a l_IMulS into a "real" IMul1OPS node.
4218 * @return the created ia32 IMul1OP node
4220 static ir_node *gen_ia32_l_IMul(ir_node *node)
4222 ir_node *left = get_binop_left(node);
4223 ir_node *right = get_binop_right(node);
4225 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4226 match_commutative | match_am | match_mode_neutral);
4229 static ir_node *gen_ia32_l_Sub(ir_node *node)
4231 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4232 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4233 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4234 match_am | match_immediate | match_mode_neutral);
4236 if (is_Proj(lowered)) {
4237 lowered = get_Proj_pred(lowered);
4239 assert(is_ia32_Sub(lowered));
4240 set_irn_mode(lowered, mode_T);
4246 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4248 return gen_binop_flags(node, new_bd_ia32_Sbb,
4249 match_am | match_immediate | match_mode_neutral);
4253 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4254 * op1 - target to be shifted
4255 * op2 - contains bits to be shifted into target
4257 * Only op3 can be an immediate.
4259 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4260 ir_node *low, ir_node *count)
4262 ir_node *block = get_nodes_block(node);
4263 ir_node *new_block = be_transform_node(block);
4264 dbg_info *dbgi = get_irn_dbg_info(node);
4265 ir_node *new_high = be_transform_node(high);
4266 ir_node *new_low = be_transform_node(low);
4270 /* the shift amount can be any mode that is bigger than 5 bits, since all
4271 * other bits are ignored anyway */
4272 while (is_Conv(count) &&
4273 get_irn_n_edges(count) == 1 &&
4274 mode_is_int(get_irn_mode(count))) {
4275 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4276 count = get_Conv_op(count);
4278 new_count = create_immediate_or_transform(count, 0);
4280 if (is_ia32_l_ShlD(node)) {
4281 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4284 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4287 SET_IA32_ORIG_NODE(new_node, node);
4292 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4294 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4295 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4296 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4297 return gen_lowered_64bit_shifts(node, high, low, count);
4300 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4302 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4303 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4304 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4305 return gen_lowered_64bit_shifts(node, high, low, count);
4308 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4310 ir_node *src_block = get_nodes_block(node);
4311 ir_node *block = be_transform_node(src_block);
4312 ir_graph *irg = current_ir_graph;
4313 dbg_info *dbgi = get_irn_dbg_info(node);
4314 ir_node *frame = get_irg_frame(irg);
4315 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4316 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4317 ir_node *new_val_low = be_transform_node(val_low);
4318 ir_node *new_val_high = be_transform_node(val_high);
4320 ir_node *sync, *fild, *res;
4321 ir_node *store_low, *store_high;
4323 if (ia32_cg_config.use_sse2) {
4324 panic("ia32_l_LLtoFloat not implemented for SSE2");
4328 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4330 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4332 SET_IA32_ORIG_NODE(store_low, node);
4333 SET_IA32_ORIG_NODE(store_high, node);
4335 set_ia32_use_frame(store_low);
4336 set_ia32_use_frame(store_high);
4337 set_ia32_op_type(store_low, ia32_AddrModeD);
4338 set_ia32_op_type(store_high, ia32_AddrModeD);
4339 set_ia32_ls_mode(store_low, mode_Iu);
4340 set_ia32_ls_mode(store_high, mode_Is);
4341 add_ia32_am_offs_int(store_high, 4);
4345 sync = new_rd_Sync(dbgi, block, 2, in);
4348 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4350 set_ia32_use_frame(fild);
4351 set_ia32_op_type(fild, ia32_AddrModeS);
4352 set_ia32_ls_mode(fild, mode_Ls);
4354 SET_IA32_ORIG_NODE(fild, node);
4356 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4358 if (! mode_is_signed(get_irn_mode(val_high))) {
4359 ia32_address_mode_t am;
4361 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4364 am.addr.base = get_symconst_base();
4365 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4366 am.addr.mem = nomem;
4369 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4370 am.addr.use_frame = 0;
4371 am.addr.frame_entity = NULL;
4372 am.addr.symconst_sign = 0;
4373 am.ls_mode = mode_F;
4374 am.mem_proj = nomem;
4375 am.op_type = ia32_AddrModeS;
4377 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4378 am.pinned = op_pin_state_floats;
4380 am.ins_permuted = 0;
4382 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4383 am.new_op1, am.new_op2, get_fpcw());
4384 set_am_attributes(fadd, &am);
4386 set_irn_mode(fadd, mode_T);
4387 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4392 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4394 ir_node *src_block = get_nodes_block(node);
4395 ir_node *block = be_transform_node(src_block);
4396 ir_graph *irg = get_Block_irg(block);
4397 dbg_info *dbgi = get_irn_dbg_info(node);
4398 ir_node *frame = get_irg_frame(irg);
4399 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4400 ir_node *new_val = be_transform_node(val);
4401 ir_node *fist, *mem;
4403 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4404 SET_IA32_ORIG_NODE(fist, node);
4405 set_ia32_use_frame(fist);
4406 set_ia32_op_type(fist, ia32_AddrModeD);
4407 set_ia32_ls_mode(fist, mode_Ls);
4413 * the BAD transformer.
4415 static ir_node *bad_transform(ir_node *node)
4417 panic("No transform function for %+F available.", node);
4420 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4422 ir_node *block = be_transform_node(get_nodes_block(node));
4423 ir_graph *irg = get_Block_irg(block);
4424 ir_node *pred = get_Proj_pred(node);
4425 ir_node *new_pred = be_transform_node(pred);
4426 ir_node *frame = get_irg_frame(irg);
4427 dbg_info *dbgi = get_irn_dbg_info(node);
4428 long pn = get_Proj_proj(node);
4433 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4434 SET_IA32_ORIG_NODE(load, node);
4435 set_ia32_use_frame(load);
4436 set_ia32_op_type(load, ia32_AddrModeS);
4437 set_ia32_ls_mode(load, mode_Iu);
4438 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4439 * 32 bit from it with this particular load */
4440 attr = get_ia32_attr(load);
4441 attr->data.need_64bit_stackent = 1;
4443 if (pn == pn_ia32_l_FloattoLL_res_high) {
4444 add_ia32_am_offs_int(load, 4);
4446 assert(pn == pn_ia32_l_FloattoLL_res_low);
4449 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4455 * Transform the Projs of an AddSP.
4457 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4459 ir_node *pred = get_Proj_pred(node);
4460 ir_node *new_pred = be_transform_node(pred);
4461 dbg_info *dbgi = get_irn_dbg_info(node);
4462 long proj = get_Proj_proj(node);
4464 if (proj == pn_be_AddSP_sp) {
4465 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4466 pn_ia32_SubSP_stack);
4467 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4469 } else if (proj == pn_be_AddSP_res) {
4470 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4471 pn_ia32_SubSP_addr);
4472 } else if (proj == pn_be_AddSP_M) {
4473 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4476 panic("No idea how to transform proj->AddSP");
4480 * Transform the Projs of a SubSP.
4482 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4484 ir_node *pred = get_Proj_pred(node);
4485 ir_node *new_pred = be_transform_node(pred);
4486 dbg_info *dbgi = get_irn_dbg_info(node);
4487 long proj = get_Proj_proj(node);
4489 if (proj == pn_be_SubSP_sp) {
4490 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4491 pn_ia32_AddSP_stack);
4492 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4494 } else if (proj == pn_be_SubSP_M) {
4495 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4498 panic("No idea how to transform proj->SubSP");
4502 * Transform and renumber the Projs from a Load.
4504 static ir_node *gen_Proj_Load(ir_node *node)
4507 ir_node *block = be_transform_node(get_nodes_block(node));
4508 ir_node *pred = get_Proj_pred(node);
4509 dbg_info *dbgi = get_irn_dbg_info(node);
4510 long proj = get_Proj_proj(node);
4512 /* loads might be part of source address mode matches, so we don't
4513 * transform the ProjMs yet (with the exception of loads whose result is
4516 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4519 /* this is needed, because sometimes we have loops that are only
4520 reachable through the ProjM */
4521 be_enqueue_preds(node);
4522 /* do it in 2 steps, to silence firm verifier */
4523 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4524 set_Proj_proj(res, pn_ia32_mem);
4528 /* renumber the proj */
4529 new_pred = be_transform_node(pred);
4530 if (is_ia32_Load(new_pred)) {
4533 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4535 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4536 case pn_Load_X_regular:
4537 return new_rd_Jmp(dbgi, block);
4538 case pn_Load_X_except:
4539 /* This Load might raise an exception. Mark it. */
4540 set_ia32_exc_label(new_pred, 1);
4541 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4545 } else if (is_ia32_Conv_I2I(new_pred) ||
4546 is_ia32_Conv_I2I8Bit(new_pred)) {
4547 set_irn_mode(new_pred, mode_T);
4548 if (proj == pn_Load_res) {
4549 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4550 } else if (proj == pn_Load_M) {
4551 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4553 } else if (is_ia32_xLoad(new_pred)) {
4556 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4558 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4559 case pn_Load_X_regular:
4560 return new_rd_Jmp(dbgi, block);
4561 case pn_Load_X_except:
4562 /* This Load might raise an exception. Mark it. */
4563 set_ia32_exc_label(new_pred, 1);
4564 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4568 } else if (is_ia32_vfld(new_pred)) {
4571 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4573 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4574 case pn_Load_X_regular:
4575 return new_rd_Jmp(dbgi, block);
4576 case pn_Load_X_except:
4577 /* This Load might raise an exception. Mark it. */
4578 set_ia32_exc_label(new_pred, 1);
4579 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4584 /* can happen for ProJMs when source address mode happened for the
4587 /* however it should not be the result proj, as that would mean the
4588 load had multiple users and should not have been used for
4590 if (proj != pn_Load_M) {
4591 panic("internal error: transformed node not a Load");
4593 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4596 panic("No idea how to transform proj");
4600 * Transform and renumber the Projs from a DivMod like instruction.
4602 static ir_node *gen_Proj_DivMod(ir_node *node)
4604 ir_node *block = be_transform_node(get_nodes_block(node));
4605 ir_node *pred = get_Proj_pred(node);
4606 ir_node *new_pred = be_transform_node(pred);
4607 dbg_info *dbgi = get_irn_dbg_info(node);
4608 long proj = get_Proj_proj(node);
4610 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4612 switch (get_irn_opcode(pred)) {
4616 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4618 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4619 case pn_Div_X_regular:
4620 return new_rd_Jmp(dbgi, block);
4621 case pn_Div_X_except:
4622 set_ia32_exc_label(new_pred, 1);
4623 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4631 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4633 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4634 case pn_Mod_X_except:
4635 set_ia32_exc_label(new_pred, 1);
4636 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4644 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4645 case pn_DivMod_res_div:
4646 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4647 case pn_DivMod_res_mod:
4648 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4649 case pn_DivMod_X_regular:
4650 return new_rd_Jmp(dbgi, block);
4651 case pn_DivMod_X_except:
4652 set_ia32_exc_label(new_pred, 1);
4653 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4662 panic("No idea how to transform proj->DivMod");
4666 * Transform and renumber the Projs from a CopyB.
4668 static ir_node *gen_Proj_CopyB(ir_node *node)
4670 ir_node *pred = get_Proj_pred(node);
4671 ir_node *new_pred = be_transform_node(pred);
4672 dbg_info *dbgi = get_irn_dbg_info(node);
4673 long proj = get_Proj_proj(node);
4676 case pn_CopyB_M_regular:
4677 if (is_ia32_CopyB_i(new_pred)) {
4678 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4679 } else if (is_ia32_CopyB(new_pred)) {
4680 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4687 panic("No idea how to transform proj->CopyB");
4691 * Transform and renumber the Projs from a Quot.
4693 static ir_node *gen_Proj_Quot(ir_node *node)
4695 ir_node *pred = get_Proj_pred(node);
4696 ir_node *new_pred = be_transform_node(pred);
4697 dbg_info *dbgi = get_irn_dbg_info(node);
4698 long proj = get_Proj_proj(node);
4702 if (is_ia32_xDiv(new_pred)) {
4703 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4704 } else if (is_ia32_vfdiv(new_pred)) {
4705 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4709 if (is_ia32_xDiv(new_pred)) {
4710 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4711 } else if (is_ia32_vfdiv(new_pred)) {
4712 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4715 case pn_Quot_X_regular:
4716 case pn_Quot_X_except:
4721 panic("No idea how to transform proj->Quot");
4724 static ir_node *gen_be_Call(ir_node *node)
4726 dbg_info *const dbgi = get_irn_dbg_info(node);
4727 ir_node *const src_block = get_nodes_block(node);
4728 ir_node *const block = be_transform_node(src_block);
4729 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4730 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4731 ir_node *const sp = be_transform_node(src_sp);
4732 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4733 ia32_address_mode_t am;
4734 ia32_address_t *const addr = &am.addr;
4739 ir_node * eax = noreg_GP;
4740 ir_node * ecx = noreg_GP;
4741 ir_node * edx = noreg_GP;
4742 unsigned const pop = be_Call_get_pop(node);
4743 ir_type *const call_tp = be_Call_get_type(node);
4744 int old_no_pic_adjust;
4746 /* Run the x87 simulator if the call returns a float value */
4747 if (get_method_n_ress(call_tp) > 0) {
4748 ir_type *const res_type = get_method_res_type(call_tp, 0);
4749 ir_mode *const res_mode = get_type_mode(res_type);
4751 if (res_mode != NULL && mode_is_float(res_mode)) {
4752 env_cg->do_x87_sim = 1;
4756 /* We do not want be_Call direct calls */
4757 assert(be_Call_get_entity(node) == NULL);
4759 /* special case for PIC trampoline calls */
4760 old_no_pic_adjust = no_pic_adjust;
4761 no_pic_adjust = env_cg->birg->main_env->options->pic;
4763 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4764 match_am | match_immediate);
4766 no_pic_adjust = old_no_pic_adjust;
4768 i = get_irn_arity(node) - 1;
4769 fpcw = be_transform_node(get_irn_n(node, i--));
4770 for (; i >= be_pos_Call_first_arg; --i) {
4771 arch_register_req_t const *const req = arch_get_register_req(node, i);
4772 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4774 assert(req->type == arch_register_req_type_limited);
4775 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4777 switch (*req->limited) {
4778 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4779 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4780 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4781 default: panic("Invalid GP register for register parameter");
4785 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4786 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4787 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4788 set_am_attributes(call, &am);
4789 call = fix_mem_proj(call, &am);
4791 if (get_irn_pinned(node) == op_pin_state_pinned)
4792 set_irn_pinned(call, op_pin_state_pinned);
4794 SET_IA32_ORIG_NODE(call, node);
4796 if (ia32_cg_config.use_sse2) {
4797 /* remember this call for post-processing */
4798 ARR_APP1(ir_node *, call_list, call);
4799 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4806 * Transform Builtin trap
4808 static ir_node *gen_trap(ir_node *node)
4810 dbg_info *dbgi = get_irn_dbg_info(node);
4811 ir_node *block = be_transform_node(get_nodes_block(node));
4812 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4814 return new_bd_ia32_UD2(dbgi, block, mem);
4818 * Transform Builtin debugbreak
4820 static ir_node *gen_debugbreak(ir_node *node)
4822 dbg_info *dbgi = get_irn_dbg_info(node);
4823 ir_node *block = be_transform_node(get_nodes_block(node));
4824 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4826 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4830 * Transform Builtin return_address
4832 static ir_node *gen_return_address(ir_node *node)
4834 ir_node *param = get_Builtin_param(node, 0);
4835 ir_node *frame = get_Builtin_param(node, 1);
4836 dbg_info *dbgi = get_irn_dbg_info(node);
4837 tarval *tv = get_Const_tarval(param);
4838 unsigned long value = get_tarval_long(tv);
4840 ir_node *block = be_transform_node(get_nodes_block(node));
4841 ir_node *ptr = be_transform_node(frame);
4845 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4846 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4847 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4850 /* load the return address from this frame */
4851 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4853 set_irn_pinned(load, get_irn_pinned(node));
4854 set_ia32_op_type(load, ia32_AddrModeS);
4855 set_ia32_ls_mode(load, mode_Iu);
4857 set_ia32_am_offs_int(load, 0);
4858 set_ia32_use_frame(load);
4859 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4861 if (get_irn_pinned(node) == op_pin_state_floats) {
4862 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4863 && pn_ia32_vfld_res == pn_ia32_Load_res
4864 && pn_ia32_Load_res == pn_ia32_res);
4865 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4868 SET_IA32_ORIG_NODE(load, node);
4869 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4873 * Transform Builtin frame_address
4875 static ir_node *gen_frame_address(ir_node *node)
4877 ir_node *param = get_Builtin_param(node, 0);
4878 ir_node *frame = get_Builtin_param(node, 1);
4879 dbg_info *dbgi = get_irn_dbg_info(node);
4880 tarval *tv = get_Const_tarval(param);
4881 unsigned long value = get_tarval_long(tv);
4883 ir_node *block = be_transform_node(get_nodes_block(node));
4884 ir_node *ptr = be_transform_node(frame);
4889 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4890 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4891 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4894 /* load the frame address from this frame */
4895 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4897 set_irn_pinned(load, get_irn_pinned(node));
4898 set_ia32_op_type(load, ia32_AddrModeS);
4899 set_ia32_ls_mode(load, mode_Iu);
4901 ent = ia32_get_frame_address_entity();
4903 set_ia32_am_offs_int(load, 0);
4904 set_ia32_use_frame(load);
4905 set_ia32_frame_ent(load, ent);
4907 /* will fail anyway, but gcc does this: */
4908 set_ia32_am_offs_int(load, 0);
4911 if (get_irn_pinned(node) == op_pin_state_floats) {
4912 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4913 && pn_ia32_vfld_res == pn_ia32_Load_res
4914 && pn_ia32_Load_res == pn_ia32_res);
4915 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4918 SET_IA32_ORIG_NODE(load, node);
4919 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4923 * Transform Builtin frame_address
4925 static ir_node *gen_prefetch(ir_node *node)
4928 ir_node *ptr, *block, *mem, *base, *index;
4929 ir_node *param, *new_node;
4932 ia32_address_t addr;
4934 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4935 /* no prefetch at all, route memory */
4936 return be_transform_node(get_Builtin_mem(node));
4939 param = get_Builtin_param(node, 1);
4940 tv = get_Const_tarval(param);
4941 rw = get_tarval_long(tv);
4943 /* construct load address */
4944 memset(&addr, 0, sizeof(addr));
4945 ptr = get_Builtin_param(node, 0);
4946 ia32_create_address_mode(&addr, ptr, 0);
4953 base = be_transform_node(base);
4956 if (index == NULL) {
4959 index = be_transform_node(index);
4962 dbgi = get_irn_dbg_info(node);
4963 block = be_transform_node(get_nodes_block(node));
4964 mem = be_transform_node(get_Builtin_mem(node));
4966 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4967 /* we have 3DNow!, this was already checked above */
4968 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4969 } else if (ia32_cg_config.use_sse_prefetch) {
4970 /* note: rw == 1 is IGNORED in that case */
4971 param = get_Builtin_param(node, 2);
4972 tv = get_Const_tarval(param);
4973 locality = get_tarval_long(tv);
4975 /* SSE style prefetch */
4978 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4981 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4984 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4987 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4991 assert(ia32_cg_config.use_3dnow_prefetch);
4992 /* 3DNow! style prefetch */
4993 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4996 set_irn_pinned(new_node, get_irn_pinned(node));
4997 set_ia32_op_type(new_node, ia32_AddrModeS);
4998 set_ia32_ls_mode(new_node, mode_Bu);
4999 set_address(new_node, &addr);
5001 SET_IA32_ORIG_NODE(new_node, node);
5003 be_dep_on_frame(new_node);
5004 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5008 * Transform bsf like node
5010 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5012 ir_node *param = get_Builtin_param(node, 0);
5013 dbg_info *dbgi = get_irn_dbg_info(node);
5015 ir_node *block = get_nodes_block(node);
5016 ir_node *new_block = be_transform_node(block);
5018 ia32_address_mode_t am;
5019 ia32_address_t *addr = &am.addr;
5022 match_arguments(&am, block, NULL, param, NULL, match_am);
5024 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5025 set_am_attributes(cnt, &am);
5026 set_ia32_ls_mode(cnt, get_irn_mode(param));
5028 SET_IA32_ORIG_NODE(cnt, node);
5029 return fix_mem_proj(cnt, &am);
5033 * Transform builtin ffs.
5035 static ir_node *gen_ffs(ir_node *node)
5037 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5038 ir_node *real = skip_Proj(bsf);
5039 dbg_info *dbgi = get_irn_dbg_info(real);
5040 ir_node *block = get_nodes_block(real);
5041 ir_node *flag, *set, *conv, *neg, *or;
5044 if (get_irn_mode(real) != mode_T) {
5045 set_irn_mode(real, mode_T);
5046 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5049 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5052 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5053 SET_IA32_ORIG_NODE(set, node);
5056 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5057 SET_IA32_ORIG_NODE(conv, node);
5060 neg = new_bd_ia32_Neg(dbgi, block, conv);
5063 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5064 set_ia32_commutative(or);
5067 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5071 * Transform builtin clz.
5073 static ir_node *gen_clz(ir_node *node)
5075 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5076 ir_node *real = skip_Proj(bsr);
5077 dbg_info *dbgi = get_irn_dbg_info(real);
5078 ir_node *block = get_nodes_block(real);
5079 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5081 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5085 * Transform builtin ctz.
5087 static ir_node *gen_ctz(ir_node *node)
5089 return gen_unop_AM(node, new_bd_ia32_Bsf);
5093 * Transform builtin parity.
5095 static ir_node *gen_parity(ir_node *node)
5097 ir_node *param = get_Builtin_param(node, 0);
5098 dbg_info *dbgi = get_irn_dbg_info(node);
5100 ir_node *block = get_nodes_block(node);
5102 ir_node *new_block = be_transform_node(block);
5103 ir_node *imm, *cmp, *new_node;
5105 ia32_address_mode_t am;
5106 ia32_address_t *addr = &am.addr;
5110 match_arguments(&am, block, NULL, param, NULL, match_am);
5111 imm = ia32_create_Immediate(NULL, 0, 0);
5112 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5113 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5114 set_am_attributes(cmp, &am);
5115 set_ia32_ls_mode(cmp, mode_Iu);
5117 SET_IA32_ORIG_NODE(cmp, node);
5119 cmp = fix_mem_proj(cmp, &am);
5122 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5123 SET_IA32_ORIG_NODE(new_node, node);
5126 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5127 nomem, new_node, mode_Bu);
5128 SET_IA32_ORIG_NODE(new_node, node);
5133 * Transform builtin popcount
5135 static ir_node *gen_popcount(ir_node *node)
5137 ir_node *param = get_Builtin_param(node, 0);
5138 dbg_info *dbgi = get_irn_dbg_info(node);
5140 ir_node *block = get_nodes_block(node);
5141 ir_node *new_block = be_transform_node(block);
5144 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5146 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5147 if (ia32_cg_config.use_popcnt) {
5148 ia32_address_mode_t am;
5149 ia32_address_t *addr = &am.addr;
5152 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5154 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5155 set_am_attributes(cnt, &am);
5156 set_ia32_ls_mode(cnt, get_irn_mode(param));
5158 SET_IA32_ORIG_NODE(cnt, node);
5159 return fix_mem_proj(cnt, &am);
5162 new_param = be_transform_node(param);
5164 /* do the standard popcount algo */
5166 /* m1 = x & 0x55555555 */
5167 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5168 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5171 simm = ia32_create_Immediate(NULL, 0, 1);
5172 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5174 /* m2 = s1 & 0x55555555 */
5175 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5178 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5180 /* m4 = m3 & 0x33333333 */
5181 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5182 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5185 simm = ia32_create_Immediate(NULL, 0, 2);
5186 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5188 /* m5 = s2 & 0x33333333 */
5189 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5192 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5194 /* m7 = m6 & 0x0F0F0F0F */
5195 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5196 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5199 simm = ia32_create_Immediate(NULL, 0, 4);
5200 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5202 /* m8 = s3 & 0x0F0F0F0F */
5203 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5206 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5208 /* m10 = m9 & 0x00FF00FF */
5209 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5210 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5213 simm = ia32_create_Immediate(NULL, 0, 8);
5214 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5216 /* m11 = s4 & 0x00FF00FF */
5217 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5219 /* m12 = m10 + m11 */
5220 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5222 /* m13 = m12 & 0x0000FFFF */
5223 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5224 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5226 /* s5 = m12 >> 16 */
5227 simm = ia32_create_Immediate(NULL, 0, 16);
5228 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5230 /* res = m13 + s5 */
5231 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5235 * Transform builtin byte swap.
5237 static ir_node *gen_bswap(ir_node *node)
5239 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5240 dbg_info *dbgi = get_irn_dbg_info(node);
5242 ir_node *block = get_nodes_block(node);
5243 ir_node *new_block = be_transform_node(block);
5244 ir_mode *mode = get_irn_mode(param);
5245 unsigned size = get_mode_size_bits(mode);
5246 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5250 if (ia32_cg_config.use_i486) {
5251 /* swap available */
5252 return new_bd_ia32_Bswap(dbgi, new_block, param);
5254 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5255 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5257 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5258 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5260 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5262 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5263 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5265 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5266 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5269 /* swap16 always available */
5270 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5273 panic("Invalid bswap size (%d)", size);
5278 * Transform builtin outport.
5280 static ir_node *gen_outport(ir_node *node)
5282 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5283 ir_node *oldv = get_Builtin_param(node, 1);
5284 ir_mode *mode = get_irn_mode(oldv);
5285 ir_node *value = be_transform_node(oldv);
5286 ir_node *block = be_transform_node(get_nodes_block(node));
5287 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5288 dbg_info *dbgi = get_irn_dbg_info(node);
5290 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5291 set_ia32_ls_mode(res, mode);
5296 * Transform builtin inport.
5298 static ir_node *gen_inport(ir_node *node)
5300 ir_type *tp = get_Builtin_type(node);
5301 ir_type *rstp = get_method_res_type(tp, 0);
5302 ir_mode *mode = get_type_mode(rstp);
5303 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5304 ir_node *block = be_transform_node(get_nodes_block(node));
5305 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5306 dbg_info *dbgi = get_irn_dbg_info(node);
5308 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5309 set_ia32_ls_mode(res, mode);
5311 /* check for missing Result Proj */
5316 * Transform a builtin inner trampoline
5318 static ir_node *gen_inner_trampoline(ir_node *node)
5320 ir_node *ptr = get_Builtin_param(node, 0);
5321 ir_node *callee = get_Builtin_param(node, 1);
5322 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5323 ir_node *mem = get_Builtin_mem(node);
5324 ir_node *block = get_nodes_block(node);
5325 ir_node *new_block = be_transform_node(block);
5329 ir_node *trampoline;
5331 dbg_info *dbgi = get_irn_dbg_info(node);
5332 ia32_address_t addr;
5334 /* construct store address */
5335 memset(&addr, 0, sizeof(addr));
5336 ia32_create_address_mode(&addr, ptr, 0);
5338 if (addr.base == NULL) {
5339 addr.base = noreg_GP;
5341 addr.base = be_transform_node(addr.base);
5344 if (addr.index == NULL) {
5345 addr.index = noreg_GP;
5347 addr.index = be_transform_node(addr.index);
5349 addr.mem = be_transform_node(mem);
5351 /* mov ecx, <env> */
5352 val = ia32_create_Immediate(NULL, 0, 0xB9);
5353 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5354 addr.index, addr.mem, val);
5355 set_irn_pinned(store, get_irn_pinned(node));
5356 set_ia32_op_type(store, ia32_AddrModeD);
5357 set_ia32_ls_mode(store, mode_Bu);
5358 set_address(store, &addr);
5362 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5363 addr.index, addr.mem, env);
5364 set_irn_pinned(store, get_irn_pinned(node));
5365 set_ia32_op_type(store, ia32_AddrModeD);
5366 set_ia32_ls_mode(store, mode_Iu);
5367 set_address(store, &addr);
5371 /* jmp rel <callee> */
5372 val = ia32_create_Immediate(NULL, 0, 0xE9);
5373 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5374 addr.index, addr.mem, val);
5375 set_irn_pinned(store, get_irn_pinned(node));
5376 set_ia32_op_type(store, ia32_AddrModeD);
5377 set_ia32_ls_mode(store, mode_Bu);
5378 set_address(store, &addr);
5382 trampoline = be_transform_node(ptr);
5384 /* the callee is typically an immediate */
5385 if (is_SymConst(callee)) {
5386 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5388 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5390 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5392 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5393 addr.index, addr.mem, rel);
5394 set_irn_pinned(store, get_irn_pinned(node));
5395 set_ia32_op_type(store, ia32_AddrModeD);
5396 set_ia32_ls_mode(store, mode_Iu);
5397 set_address(store, &addr);
5402 return new_r_Tuple(new_block, 2, in);
5406 * Transform Builtin node.
5408 static ir_node *gen_Builtin(ir_node *node)
5410 ir_builtin_kind kind = get_Builtin_kind(node);
5414 return gen_trap(node);
5415 case ir_bk_debugbreak:
5416 return gen_debugbreak(node);
5417 case ir_bk_return_address:
5418 return gen_return_address(node);
5419 case ir_bk_frame_address:
5420 return gen_frame_address(node);
5421 case ir_bk_prefetch:
5422 return gen_prefetch(node);
5424 return gen_ffs(node);
5426 return gen_clz(node);
5428 return gen_ctz(node);
5430 return gen_parity(node);
5431 case ir_bk_popcount:
5432 return gen_popcount(node);
5434 return gen_bswap(node);
5436 return gen_outport(node);
5438 return gen_inport(node);
5439 case ir_bk_inner_trampoline:
5440 return gen_inner_trampoline(node);
5442 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5446 * Transform Proj(Builtin) node.
5448 static ir_node *gen_Proj_Builtin(ir_node *proj)
5450 ir_node *node = get_Proj_pred(proj);
5451 ir_node *new_node = be_transform_node(node);
5452 ir_builtin_kind kind = get_Builtin_kind(node);
5455 case ir_bk_return_address:
5456 case ir_bk_frame_address:
5461 case ir_bk_popcount:
5463 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5466 case ir_bk_debugbreak:
5467 case ir_bk_prefetch:
5469 assert(get_Proj_proj(proj) == pn_Builtin_M);
5472 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5473 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5475 assert(get_Proj_proj(proj) == pn_Builtin_M);
5476 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5478 case ir_bk_inner_trampoline:
5479 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5480 return get_Tuple_pred(new_node, 1);
5482 assert(get_Proj_proj(proj) == pn_Builtin_M);
5483 return get_Tuple_pred(new_node, 0);
5486 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5489 static ir_node *gen_be_IncSP(ir_node *node)
5491 ir_node *res = be_duplicate_node(node);
5492 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5498 * Transform the Projs from a be_Call.
5500 static ir_node *gen_Proj_be_Call(ir_node *node)
5502 ir_node *call = get_Proj_pred(node);
5503 ir_node *new_call = be_transform_node(call);
5504 dbg_info *dbgi = get_irn_dbg_info(node);
5505 long proj = get_Proj_proj(node);
5506 ir_mode *mode = get_irn_mode(node);
5509 if (proj == pn_be_Call_M_regular) {
5510 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5512 /* transform call modes */
5513 if (mode_is_data(mode)) {
5514 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5518 /* Map from be_Call to ia32_Call proj number */
5519 if (proj == pn_be_Call_sp) {
5520 proj = pn_ia32_Call_stack;
5521 } else if (proj == pn_be_Call_M_regular) {
5522 proj = pn_ia32_Call_M;
5524 arch_register_req_t const *const req = arch_get_register_req_out(node);
5525 int const n_outs = arch_irn_get_n_outs(new_call);
5528 assert(proj >= pn_be_Call_first_res);
5529 assert(req->type & arch_register_req_type_limited);
5531 for (i = 0; i < n_outs; ++i) {
5532 arch_register_req_t const *const new_req
5533 = arch_get_out_register_req(new_call, i);
5535 if (!(new_req->type & arch_register_req_type_limited) ||
5536 new_req->cls != req->cls ||
5537 *new_req->limited != *req->limited)
5546 res = new_rd_Proj(dbgi, new_call, mode, proj);
5548 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5550 case pn_ia32_Call_stack:
5551 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5554 case pn_ia32_Call_fpcw:
5555 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5563 * Transform the Projs from a Cmp.
5565 static ir_node *gen_Proj_Cmp(ir_node *node)
5567 /* this probably means not all mode_b nodes were lowered... */
5568 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5573 * Transform the Projs from a Bound.
5575 static ir_node *gen_Proj_Bound(ir_node *node)
5578 ir_node *pred = get_Proj_pred(node);
5580 switch (get_Proj_proj(node)) {
5582 return be_transform_node(get_Bound_mem(pred));
5583 case pn_Bound_X_regular:
5584 new_node = be_transform_node(pred);
5585 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5586 case pn_Bound_X_except:
5587 new_node = be_transform_node(pred);
5588 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5590 return be_transform_node(get_Bound_index(pred));
5592 panic("unsupported Proj from Bound");
5596 static ir_node *gen_Proj_ASM(ir_node *node)
5598 ir_mode *mode = get_irn_mode(node);
5599 ir_node *pred = get_Proj_pred(node);
5600 ir_node *new_pred = be_transform_node(pred);
5601 long pos = get_Proj_proj(node);
5603 if (mode == mode_M) {
5604 pos = arch_irn_get_n_outs(new_pred)-1;
5605 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5607 } else if (mode_is_float(mode)) {
5610 panic("unexpected proj mode at ASM");
5613 return new_r_Proj(new_pred, mode, pos);
5617 * Transform and potentially renumber Proj nodes.
5619 static ir_node *gen_Proj(ir_node *node)
5621 ir_node *pred = get_Proj_pred(node);
5624 switch (get_irn_opcode(pred)) {
5626 proj = get_Proj_proj(node);
5627 if (proj == pn_Store_M) {
5628 return be_transform_node(pred);
5630 panic("No idea how to transform proj->Store");
5633 return gen_Proj_Load(node);
5635 return gen_Proj_ASM(node);
5637 return gen_Proj_Builtin(node);
5641 return gen_Proj_DivMod(node);
5643 return gen_Proj_CopyB(node);
5645 return gen_Proj_Quot(node);
5647 return gen_Proj_be_SubSP(node);
5649 return gen_Proj_be_AddSP(node);
5651 return gen_Proj_be_Call(node);
5653 return gen_Proj_Cmp(node);
5655 return gen_Proj_Bound(node);
5657 proj = get_Proj_proj(node);
5659 case pn_Start_X_initial_exec: {
5660 ir_node *block = get_nodes_block(pred);
5661 ir_node *new_block = be_transform_node(block);
5662 dbg_info *dbgi = get_irn_dbg_info(node);
5663 /* we exchange the ProjX with a jump */
5664 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5669 case pn_Start_P_tls:
5670 return gen_Proj_tls(node);
5675 if (is_ia32_l_FloattoLL(pred)) {
5676 return gen_Proj_l_FloattoLL(node);
5678 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5682 ir_mode *mode = get_irn_mode(node);
5683 if (ia32_mode_needs_gp_reg(mode)) {
5684 ir_node *new_pred = be_transform_node(pred);
5685 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5686 get_Proj_proj(node));
5687 new_proj->node_nr = node->node_nr;
5692 return be_duplicate_node(node);
5696 * Enters all transform functions into the generic pointer
5698 static void register_transformers(void)
5700 /* first clear the generic function pointer for all ops */
5701 clear_irp_opcodes_generic_func();
5703 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5704 #define BAD(a) { op_##a->ops.generic = (op_func)bad_transform; }
5744 /* transform ops from intrinsic lowering */
5756 GEN(ia32_l_LLtoFloat)
5757 GEN(ia32_l_FloattoLL)
5763 /* we should never see these nodes */
5778 /* handle builtins */
5781 /* handle generic backend nodes */
5795 * Pre-transform all unknown and noreg nodes.
5797 static void ia32_pretransform_node(void)
5799 ia32_code_gen_t *cg = env_cg;
5801 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5802 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5803 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5805 nomem = get_irg_no_mem(current_ir_graph);
5806 noreg_GP = ia32_new_NoReg_gp(cg);
5812 * Walker, checks if all ia32 nodes producing more than one result have their
5813 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5815 static void add_missing_keep_walker(ir_node *node, void *data)
5818 unsigned found_projs = 0;
5819 const ir_edge_t *edge;
5820 ir_mode *mode = get_irn_mode(node);
5825 if (!is_ia32_irn(node))
5828 n_outs = arch_irn_get_n_outs(node);
5831 if (is_ia32_SwitchJmp(node))
5834 assert(n_outs < (int) sizeof(unsigned) * 8);
5835 foreach_out_edge(node, edge) {
5836 ir_node *proj = get_edge_src_irn(edge);
5839 /* The node could be kept */
5843 if (get_irn_mode(proj) == mode_M)
5846 pn = get_Proj_proj(proj);
5847 assert(pn < n_outs);
5848 found_projs |= 1 << pn;
5852 /* are keeps missing? */
5854 for (i = 0; i < n_outs; ++i) {
5857 const arch_register_req_t *req;
5858 const arch_register_class_t *cls;
5860 if (found_projs & (1 << i)) {
5864 req = arch_get_out_register_req(node, i);
5869 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5873 block = get_nodes_block(node);
5874 in[0] = new_r_Proj(node, arch_register_class_mode(cls), i);
5875 if (last_keep != NULL) {
5876 be_Keep_add_node(last_keep, cls, in[0]);
5878 last_keep = be_new_Keep(block, 1, in);
5879 if (sched_is_scheduled(node)) {
5880 sched_add_after(node, last_keep);
5887 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5890 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5892 ir_graph *irg = be_get_birg_irg(cg->birg);
5893 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5897 * Post-process all calls if we are in SSE mode.
5898 * The ABI requires that the results are in st0, copy them
5899 * to a xmm register.
5901 static void postprocess_fp_call_results(void)
5905 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5906 ir_node *call = call_list[i];
5907 ir_type *mtp = call_types[i];
5910 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5911 ir_type *res_tp = get_method_res_type(mtp, j);
5912 ir_node *res, *new_res;
5913 const ir_edge_t *edge, *next;
5916 if (! is_atomic_type(res_tp)) {
5917 /* no floating point return */
5920 mode = get_type_mode(res_tp);
5921 if (! mode_is_float(mode)) {
5922 /* no floating point return */
5926 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5929 /* now patch the users */
5930 foreach_out_edge_safe(res, edge, next) {
5931 ir_node *succ = get_edge_src_irn(edge);
5934 if (be_is_Keep(succ))
5937 if (is_ia32_xStore(succ)) {
5938 /* an xStore can be patched into an vfst */
5939 dbg_info *db = get_irn_dbg_info(succ);
5940 ir_node *block = get_nodes_block(succ);
5941 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5942 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5943 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5944 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5945 ir_mode *mode = get_ia32_ls_mode(succ);
5947 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5948 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5949 if (is_ia32_use_frame(succ))
5950 set_ia32_use_frame(st);
5951 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5952 set_irn_pinned(st, get_irn_pinned(succ));
5953 set_ia32_op_type(st, ia32_AddrModeD);
5957 if (new_res == NULL) {
5958 dbg_info *db = get_irn_dbg_info(call);
5959 ir_node *block = get_nodes_block(call);
5960 ir_node *frame = get_irg_frame(current_ir_graph);
5961 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5962 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5963 ir_node *vfst, *xld, *new_mem;
5965 /* store st(0) on stack */
5966 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5967 set_ia32_op_type(vfst, ia32_AddrModeD);
5968 set_ia32_use_frame(vfst);
5970 /* load into SSE register */
5971 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5972 set_ia32_op_type(xld, ia32_AddrModeS);
5973 set_ia32_use_frame(xld);
5975 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5976 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5978 if (old_mem != NULL) {
5979 edges_reroute(old_mem, new_mem, current_ir_graph);
5983 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5990 /* do the transformation */
5991 void ia32_transform_graph(ia32_code_gen_t *cg)
5995 register_transformers();
5997 initial_fpcw = NULL;
6000 be_timer_push(T_HEIGHTS);
6001 heights = heights_new(cg->irg);
6002 be_timer_pop(T_HEIGHTS);
6003 ia32_calculate_non_address_mode_nodes(cg->birg);
6005 /* the transform phase is not safe for CSE (yet) because several nodes get
6006 * attributes set after their creation */
6007 cse_last = get_opt_cse();
6010 call_list = NEW_ARR_F(ir_node *, 0);
6011 call_types = NEW_ARR_F(ir_type *, 0);
6012 be_transform_graph(cg->irg, ia32_pretransform_node);
6014 if (ia32_cg_config.use_sse2)
6015 postprocess_fp_call_results();
6016 DEL_ARR_F(call_types);
6017 DEL_ARR_F(call_list);
6019 set_opt_cse(cse_last);
6021 ia32_free_non_address_mode_nodes();
6022 heights_free(heights);
6026 void ia32_init_transform(void)
6028 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");