2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
293 floatent = create_float_const_entity(node);
294 /* create_float_const_ent is smart and sometimes creates
296 ls_mode = get_type_mode(get_entity_type(floatent));
298 if (env_cg->birg->main_env->options->pic) {
299 base = arch_code_generator_get_pic_base(env_cg);
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(block, load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 be_dep_on_frame(load);
319 } else { /* non-float mode */
321 tarval *tv = get_Const_tarval(node);
324 tv = tarval_convert_to(tv, mode_Iu);
326 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
328 panic("couldn't convert constant tarval (%+F)", node);
330 val = get_tarval_long(tv);
332 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
333 SET_IA32_ORIG_NODE(cnst, node);
335 be_dep_on_frame(cnst);
341 * Transforms a SymConst.
343 static ir_node *gen_SymConst(ir_node *node)
345 ir_node *old_block = get_nodes_block(node);
346 ir_node *block = be_transform_node(old_block);
347 dbg_info *dbgi = get_irn_dbg_info(node);
348 ir_mode *mode = get_irn_mode(node);
351 if (mode_is_float(mode)) {
352 if (ia32_cg_config.use_sse2)
353 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
355 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
356 set_ia32_am_sc(cnst, get_SymConst_entity(node));
357 set_ia32_use_frame(cnst);
361 if (get_SymConst_kind(node) != symconst_addr_ent) {
362 panic("backend only support symconst_addr_ent (at %+F)", node);
364 entity = get_SymConst_entity(node);
365 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
368 SET_IA32_ORIG_NODE(cnst, node);
370 be_dep_on_frame(cnst);
375 * Create a float type for the given mode and cache it.
377 * @param mode the mode for the float type (might be integer mode for SSE2 types)
378 * @param align alignment
380 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
386 if (mode == mode_Iu) {
387 static ir_type *int_Iu[16] = {NULL, };
389 if (int_Iu[align] == NULL) {
390 int_Iu[align] = tp = new_type_primitive(mode);
391 /* set the specified alignment */
392 set_type_alignment_bytes(tp, align);
394 return int_Iu[align];
395 } else if (mode == mode_Lu) {
396 static ir_type *int_Lu[16] = {NULL, };
398 if (int_Lu[align] == NULL) {
399 int_Lu[align] = tp = new_type_primitive(mode);
400 /* set the specified alignment */
401 set_type_alignment_bytes(tp, align);
403 return int_Lu[align];
404 } else if (mode == mode_F) {
405 static ir_type *float_F[16] = {NULL, };
407 if (float_F[align] == NULL) {
408 float_F[align] = tp = new_type_primitive(mode);
409 /* set the specified alignment */
410 set_type_alignment_bytes(tp, align);
412 return float_F[align];
413 } else if (mode == mode_D) {
414 static ir_type *float_D[16] = {NULL, };
416 if (float_D[align] == NULL) {
417 float_D[align] = tp = new_type_primitive(mode);
418 /* set the specified alignment */
419 set_type_alignment_bytes(tp, align);
421 return float_D[align];
423 static ir_type *float_E[16] = {NULL, };
425 if (float_E[align] == NULL) {
426 float_E[align] = tp = new_type_primitive(mode);
427 /* set the specified alignment */
428 set_type_alignment_bytes(tp, align);
430 return float_E[align];
435 * Create a float[2] array type for the given atomic type.
437 * @param tp the atomic type
439 static ir_type *ia32_create_float_array(ir_type *tp)
441 ir_mode *mode = get_type_mode(tp);
442 unsigned align = get_type_alignment_bytes(tp);
447 if (mode == mode_F) {
448 static ir_type *float_F[16] = {NULL, };
450 if (float_F[align] != NULL)
451 return float_F[align];
452 arr = float_F[align] = new_type_array(1, tp);
453 } else if (mode == mode_D) {
454 static ir_type *float_D[16] = {NULL, };
456 if (float_D[align] != NULL)
457 return float_D[align];
458 arr = float_D[align] = new_type_array(1, tp);
460 static ir_type *float_E[16] = {NULL, };
462 if (float_E[align] != NULL)
463 return float_E[align];
464 arr = float_E[align] = new_type_array(1, tp);
466 set_type_alignment_bytes(arr, align);
467 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
468 set_type_state(arr, layout_fixed);
472 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
473 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
475 static const struct {
476 const char *ent_name;
477 const char *cnst_str;
480 } names [ia32_known_const_max] = {
481 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
482 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
483 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
484 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
485 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
487 static ir_entity *ent_cache[ia32_known_const_max];
489 const char *ent_name, *cnst_str;
495 ent_name = names[kct].ent_name;
496 if (! ent_cache[kct]) {
497 cnst_str = names[kct].cnst_str;
499 switch (names[kct].mode) {
500 case 0: mode = mode_Iu; break;
501 case 1: mode = mode_Lu; break;
502 default: mode = mode_F; break;
504 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
505 tp = ia32_create_float_type(mode, names[kct].align);
507 if (kct == ia32_ULLBIAS)
508 tp = ia32_create_float_array(tp);
509 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
511 set_entity_ld_ident(ent, get_entity_ident(ent));
512 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
513 set_entity_visibility(ent, ir_visibility_local);
515 if (kct == ia32_ULLBIAS) {
516 ir_initializer_t *initializer = create_initializer_compound(2);
518 set_initializer_compound_value(initializer, 0,
519 create_initializer_tarval(get_tarval_null(mode)));
520 set_initializer_compound_value(initializer, 1,
521 create_initializer_tarval(tv));
523 set_entity_initializer(ent, initializer);
525 set_entity_initializer(ent, create_initializer_tarval(tv));
528 /* cache the entry */
529 ent_cache[kct] = ent;
532 return ent_cache[kct];
536 * return true if the node is a Proj(Load) and could be used in source address
537 * mode for another node. Will return only true if the @p other node is not
538 * dependent on the memory of the Load (for binary operations use the other
539 * input here, for unary operations use NULL).
541 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
542 ir_node *other, ir_node *other2, match_flags_t flags)
547 /* float constants are always available */
548 if (is_Const(node)) {
549 ir_mode *mode = get_irn_mode(node);
550 if (mode_is_float(mode)) {
551 if (ia32_cg_config.use_sse2) {
552 if (is_simple_sse_Const(node))
555 if (is_simple_x87_Const(node))
558 if (get_irn_n_edges(node) > 1)
566 load = get_Proj_pred(node);
567 pn = get_Proj_proj(node);
568 if (!is_Load(load) || pn != pn_Load_res)
570 if (get_nodes_block(load) != block)
572 /* we only use address mode if we're the only user of the load */
573 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
575 /* in some edge cases with address mode we might reach the load normally
576 * and through some AM sequence, if it is already materialized then we
577 * can't create an AM node from it */
578 if (be_is_transformed(node))
581 /* don't do AM if other node inputs depend on the load (via mem-proj) */
582 if (other != NULL && prevents_AM(block, load, other))
585 if (other2 != NULL && prevents_AM(block, load, other2))
591 typedef struct ia32_address_mode_t ia32_address_mode_t;
592 struct ia32_address_mode_t {
597 ia32_op_type_t op_type;
601 unsigned commutative : 1;
602 unsigned ins_permuted : 1;
605 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
607 /* construct load address */
608 memset(addr, 0, sizeof(addr[0]));
609 ia32_create_address_mode(addr, ptr, 0);
611 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
612 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
613 addr->mem = be_transform_node(mem);
616 static void build_address(ia32_address_mode_t *am, ir_node *node,
617 ia32_create_am_flags_t flags)
619 ia32_address_t *addr = &am->addr;
625 if (is_Const(node)) {
626 ir_entity *entity = create_float_const_entity(node);
627 addr->base = noreg_GP;
628 addr->index = noreg_GP;
630 addr->symconst_ent = entity;
632 am->ls_mode = get_type_mode(get_entity_type(entity));
633 am->pinned = op_pin_state_floats;
637 load = get_Proj_pred(node);
638 ptr = get_Load_ptr(load);
639 mem = get_Load_mem(load);
640 new_mem = be_transform_node(mem);
641 am->pinned = get_irn_pinned(load);
642 am->ls_mode = get_Load_mode(load);
643 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
646 /* construct load address */
647 ia32_create_address_mode(addr, ptr, flags);
649 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
650 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
654 static void set_address(ir_node *node, const ia32_address_t *addr)
656 set_ia32_am_scale(node, addr->scale);
657 set_ia32_am_sc(node, addr->symconst_ent);
658 set_ia32_am_offs_int(node, addr->offset);
659 if (addr->symconst_sign)
660 set_ia32_am_sc_sign(node);
662 set_ia32_use_frame(node);
663 set_ia32_frame_ent(node, addr->frame_entity);
667 * Apply attributes of a given address mode to a node.
669 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
671 set_address(node, &am->addr);
673 set_ia32_op_type(node, am->op_type);
674 set_ia32_ls_mode(node, am->ls_mode);
675 if (am->pinned == op_pin_state_pinned) {
676 /* beware: some nodes are already pinned and did not allow to change the state */
677 if (get_irn_pinned(node) != op_pin_state_pinned)
678 set_irn_pinned(node, op_pin_state_pinned);
681 set_ia32_commutative(node);
685 * Check, if a given node is a Down-Conv, ie. a integer Conv
686 * from a mode with a mode with more bits to a mode with lesser bits.
687 * Moreover, we return only true if the node has not more than 1 user.
689 * @param node the node
690 * @return non-zero if node is a Down-Conv
692 static int is_downconv(const ir_node *node)
700 /* we only want to skip the conv when we're the only user
701 * (not optimal but for now...)
703 if (get_irn_n_edges(node) > 1)
706 src_mode = get_irn_mode(get_Conv_op(node));
707 dest_mode = get_irn_mode(node);
709 ia32_mode_needs_gp_reg(src_mode) &&
710 ia32_mode_needs_gp_reg(dest_mode) &&
711 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
714 /* Skip all Down-Conv's on a given node and return the resulting node. */
715 ir_node *ia32_skip_downconv(ir_node *node)
717 while (is_downconv(node))
718 node = get_Conv_op(node);
723 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
725 ir_mode *mode = get_irn_mode(node);
730 if (mode_is_signed(mode)) {
735 block = get_nodes_block(node);
736 dbgi = get_irn_dbg_info(node);
738 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
742 * matches operands of a node into ia32 addressing/operand modes. This covers
743 * usage of source address mode, immediates, operations with non 32-bit modes,
745 * The resulting data is filled into the @p am struct. block is the block
746 * of the node whose arguments are matched. op1, op2 are the first and second
747 * input that are matched (op1 may be NULL). other_op is another unrelated
748 * input that is not matched! but which is needed sometimes to check if AM
749 * for op1/op2 is legal.
750 * @p flags describes the supported modes of the operation in detail.
752 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
753 ir_node *op1, ir_node *op2, ir_node *other_op,
756 ia32_address_t *addr = &am->addr;
757 ir_mode *mode = get_irn_mode(op2);
758 int mode_bits = get_mode_size_bits(mode);
759 ir_node *new_op1, *new_op2;
761 unsigned commutative;
762 int use_am_and_immediates;
765 memset(am, 0, sizeof(am[0]));
767 commutative = (flags & match_commutative) != 0;
768 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
769 use_am = (flags & match_am) != 0;
770 use_immediate = (flags & match_immediate) != 0;
771 assert(!use_am_and_immediates || use_immediate);
774 assert(!commutative || op1 != NULL);
775 assert(use_am || !(flags & match_8bit_am));
776 assert(use_am || !(flags & match_16bit_am));
778 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
779 (mode_bits == 16 && !(flags & match_16bit_am))) {
783 /* we can simply skip downconvs for mode neutral nodes: the upper bits
784 * can be random for these operations */
785 if (flags & match_mode_neutral) {
786 op2 = ia32_skip_downconv(op2);
788 op1 = ia32_skip_downconv(op1);
792 /* match immediates. firm nodes are normalized: constants are always on the
795 if (!(flags & match_try_am) && use_immediate) {
796 new_op2 = try_create_Immediate(op2, 0);
799 if (new_op2 == NULL &&
800 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
801 build_address(am, op2, 0);
802 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
803 if (mode_is_float(mode)) {
804 new_op2 = ia32_new_NoReg_vfp(env_cg);
808 am->op_type = ia32_AddrModeS;
809 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
811 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
813 build_address(am, op1, 0);
815 if (mode_is_float(mode)) {
816 noreg = ia32_new_NoReg_vfp(env_cg);
821 if (new_op2 != NULL) {
824 new_op1 = be_transform_node(op2);
826 am->ins_permuted = 1;
828 am->op_type = ia32_AddrModeS;
831 am->op_type = ia32_Normal;
833 if (flags & match_try_am) {
839 mode = get_irn_mode(op2);
840 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
841 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
843 new_op2 = create_upconv(op2, NULL);
844 am->ls_mode = mode_Iu;
846 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
848 new_op2 = be_transform_node(op2);
849 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
852 if (addr->base == NULL)
853 addr->base = noreg_GP;
854 if (addr->index == NULL)
855 addr->index = noreg_GP;
856 if (addr->mem == NULL)
859 am->new_op1 = new_op1;
860 am->new_op2 = new_op2;
861 am->commutative = commutative;
865 * "Fixes" a node that uses address mode by turning it into mode_T
866 * and returning a pn_ia32_res Proj.
868 * @param node the node
869 * @param am its address mode
871 * @return a Proj(pn_ia32_res) if a memory address mode is used,
874 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
879 if (am->mem_proj == NULL)
882 /* we have to create a mode_T so the old MemProj can attach to us */
883 mode = get_irn_mode(node);
884 load = get_Proj_pred(am->mem_proj);
886 be_set_transformed_node(load, node);
888 if (mode != mode_T) {
889 set_irn_mode(node, mode_T);
890 return new_rd_Proj(NULL, get_nodes_block(node), node, mode, pn_ia32_res);
897 * Construct a standard binary operation, set AM and immediate if required.
899 * @param node The original node for which the binop is created
900 * @param op1 The first operand
901 * @param op2 The second operand
902 * @param func The node constructor function
903 * @return The constructed ia32 node.
905 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
906 construct_binop_func *func, match_flags_t flags)
909 ir_node *block, *new_block, *new_node;
910 ia32_address_mode_t am;
911 ia32_address_t *addr = &am.addr;
913 block = get_nodes_block(node);
914 match_arguments(&am, block, op1, op2, NULL, flags);
916 dbgi = get_irn_dbg_info(node);
917 new_block = be_transform_node(block);
918 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
919 am.new_op1, am.new_op2);
920 set_am_attributes(new_node, &am);
921 /* we can't use source address mode anymore when using immediates */
922 if (!(flags & match_am_and_immediates) &&
923 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
924 set_ia32_am_support(new_node, ia32_am_none);
925 SET_IA32_ORIG_NODE(new_node, node);
927 new_node = fix_mem_proj(new_node, &am);
933 * Generic names for the inputs of an ia32 binary op.
936 n_ia32_l_binop_left, /**< ia32 left input */
937 n_ia32_l_binop_right, /**< ia32 right input */
938 n_ia32_l_binop_eflags /**< ia32 eflags input */
940 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
941 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
942 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
943 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
944 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
945 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
948 * Construct a binary operation which also consumes the eflags.
950 * @param node The node to transform
951 * @param func The node constructor function
952 * @param flags The match flags
953 * @return The constructor ia32 node
955 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
958 ir_node *src_block = get_nodes_block(node);
959 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
960 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
961 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
963 ir_node *block, *new_node, *new_eflags;
964 ia32_address_mode_t am;
965 ia32_address_t *addr = &am.addr;
967 match_arguments(&am, src_block, op1, op2, eflags, flags);
969 dbgi = get_irn_dbg_info(node);
970 block = be_transform_node(src_block);
971 new_eflags = be_transform_node(eflags);
972 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
973 am.new_op1, am.new_op2, new_eflags);
974 set_am_attributes(new_node, &am);
975 /* we can't use source address mode anymore when using immediates */
976 if (!(flags & match_am_and_immediates) &&
977 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
978 set_ia32_am_support(new_node, ia32_am_none);
979 SET_IA32_ORIG_NODE(new_node, node);
981 new_node = fix_mem_proj(new_node, &am);
986 static ir_node *get_fpcw(void)
989 if (initial_fpcw != NULL)
992 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
993 &ia32_fp_cw_regs[REG_FPCW]);
994 initial_fpcw = be_transform_node(fpcw);
1000 * Construct a standard binary operation, set AM and immediate if required.
1002 * @param op1 The first operand
1003 * @param op2 The second operand
1004 * @param func The node constructor function
1005 * @return The constructed ia32 node.
1007 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1008 construct_binop_float_func *func)
1010 ir_mode *mode = get_irn_mode(node);
1012 ir_node *block, *new_block, *new_node;
1013 ia32_address_mode_t am;
1014 ia32_address_t *addr = &am.addr;
1015 ia32_x87_attr_t *attr;
1016 /* All operations are considered commutative, because there are reverse
1018 match_flags_t flags = match_commutative;
1020 /* happens for div nodes... */
1022 mode = get_divop_resmod(node);
1024 /* cannot use address mode with long double on x87 */
1025 if (get_mode_size_bits(mode) <= 64)
1028 block = get_nodes_block(node);
1029 match_arguments(&am, block, op1, op2, NULL, flags);
1031 dbgi = get_irn_dbg_info(node);
1032 new_block = be_transform_node(block);
1033 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1034 am.new_op1, am.new_op2, get_fpcw());
1035 set_am_attributes(new_node, &am);
1037 attr = get_ia32_x87_attr(new_node);
1038 attr->attr.data.ins_permuted = am.ins_permuted;
1040 SET_IA32_ORIG_NODE(new_node, node);
1042 new_node = fix_mem_proj(new_node, &am);
1048 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1050 * @param op1 The first operand
1051 * @param op2 The second operand
1052 * @param func The node constructor function
1053 * @return The constructed ia32 node.
1055 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1056 construct_shift_func *func,
1057 match_flags_t flags)
1060 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1062 assert(! mode_is_float(get_irn_mode(node)));
1063 assert(flags & match_immediate);
1064 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1066 if (flags & match_mode_neutral) {
1067 op1 = ia32_skip_downconv(op1);
1068 new_op1 = be_transform_node(op1);
1069 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1070 new_op1 = create_upconv(op1, node);
1072 new_op1 = be_transform_node(op1);
1075 /* the shift amount can be any mode that is bigger than 5 bits, since all
1076 * other bits are ignored anyway */
1077 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1078 ir_node *const op = get_Conv_op(op2);
1079 if (mode_is_float(get_irn_mode(op)))
1082 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1084 new_op2 = create_immediate_or_transform(op2, 0);
1086 dbgi = get_irn_dbg_info(node);
1087 block = get_nodes_block(node);
1088 new_block = be_transform_node(block);
1089 new_node = func(dbgi, new_block, new_op1, new_op2);
1090 SET_IA32_ORIG_NODE(new_node, node);
1092 /* lowered shift instruction may have a dependency operand, handle it here */
1093 if (get_irn_arity(node) == 3) {
1094 /* we have a dependency */
1095 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1096 add_irn_dep(new_node, new_dep);
1104 * Construct a standard unary operation, set AM and immediate if required.
1106 * @param op The operand
1107 * @param func The node constructor function
1108 * @return The constructed ia32 node.
1110 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1111 match_flags_t flags)
1114 ir_node *block, *new_block, *new_op, *new_node;
1116 assert(flags == 0 || flags == match_mode_neutral);
1117 if (flags & match_mode_neutral) {
1118 op = ia32_skip_downconv(op);
1121 new_op = be_transform_node(op);
1122 dbgi = get_irn_dbg_info(node);
1123 block = get_nodes_block(node);
1124 new_block = be_transform_node(block);
1125 new_node = func(dbgi, new_block, new_op);
1127 SET_IA32_ORIG_NODE(new_node, node);
1132 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1133 ia32_address_t *addr)
1135 ir_node *base, *index, *res;
1141 base = be_transform_node(base);
1144 index = addr->index;
1145 if (index == NULL) {
1148 index = be_transform_node(index);
1151 res = new_bd_ia32_Lea(dbgi, block, base, index);
1152 set_address(res, addr);
1158 * Returns non-zero if a given address mode has a symbolic or
1159 * numerical offset != 0.
1161 static int am_has_immediates(const ia32_address_t *addr)
1163 return addr->offset != 0 || addr->symconst_ent != NULL
1164 || addr->frame_entity || addr->use_frame;
1168 * Creates an ia32 Add.
1170 * @return the created ia32 Add node
1172 static ir_node *gen_Add(ir_node *node)
1174 ir_mode *mode = get_irn_mode(node);
1175 ir_node *op1 = get_Add_left(node);
1176 ir_node *op2 = get_Add_right(node);
1178 ir_node *block, *new_block, *new_node, *add_immediate_op;
1179 ia32_address_t addr;
1180 ia32_address_mode_t am;
1182 if (mode_is_float(mode)) {
1183 if (ia32_cg_config.use_sse2)
1184 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1185 match_commutative | match_am);
1187 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1190 ia32_mark_non_am(node);
1192 op2 = ia32_skip_downconv(op2);
1193 op1 = ia32_skip_downconv(op1);
1197 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1198 * 1. Add with immediate -> Lea
1199 * 2. Add with possible source address mode -> Add
1200 * 3. Otherwise -> Lea
1202 memset(&addr, 0, sizeof(addr));
1203 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1204 add_immediate_op = NULL;
1206 dbgi = get_irn_dbg_info(node);
1207 block = get_nodes_block(node);
1208 new_block = be_transform_node(block);
1211 if (addr.base == NULL && addr.index == NULL) {
1212 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1213 addr.symconst_sign, 0, addr.offset);
1214 be_dep_on_frame(new_node);
1215 SET_IA32_ORIG_NODE(new_node, node);
1218 /* add with immediate? */
1219 if (addr.index == NULL) {
1220 add_immediate_op = addr.base;
1221 } else if (addr.base == NULL && addr.scale == 0) {
1222 add_immediate_op = addr.index;
1225 if (add_immediate_op != NULL) {
1226 if (!am_has_immediates(&addr)) {
1227 #ifdef DEBUG_libfirm
1228 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1231 return be_transform_node(add_immediate_op);
1234 new_node = create_lea_from_address(dbgi, new_block, &addr);
1235 SET_IA32_ORIG_NODE(new_node, node);
1239 /* test if we can use source address mode */
1240 match_arguments(&am, block, op1, op2, NULL, match_commutative
1241 | match_mode_neutral | match_am | match_immediate | match_try_am);
1243 /* construct an Add with source address mode */
1244 if (am.op_type == ia32_AddrModeS) {
1245 ia32_address_t *am_addr = &am.addr;
1246 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1247 am_addr->index, am_addr->mem, am.new_op1,
1249 set_am_attributes(new_node, &am);
1250 SET_IA32_ORIG_NODE(new_node, node);
1252 new_node = fix_mem_proj(new_node, &am);
1257 /* otherwise construct a lea */
1258 new_node = create_lea_from_address(dbgi, new_block, &addr);
1259 SET_IA32_ORIG_NODE(new_node, node);
1264 * Creates an ia32 Mul.
1266 * @return the created ia32 Mul node
1268 static ir_node *gen_Mul(ir_node *node)
1270 ir_node *op1 = get_Mul_left(node);
1271 ir_node *op2 = get_Mul_right(node);
1272 ir_mode *mode = get_irn_mode(node);
1274 if (mode_is_float(mode)) {
1275 if (ia32_cg_config.use_sse2)
1276 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1277 match_commutative | match_am);
1279 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1281 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1282 match_commutative | match_am | match_mode_neutral |
1283 match_immediate | match_am_and_immediates);
1287 * Creates an ia32 Mulh.
1288 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1289 * this result while Mul returns the lower 32 bit.
1291 * @return the created ia32 Mulh node
1293 static ir_node *gen_Mulh(ir_node *node)
1295 ir_node *block = get_nodes_block(node);
1296 ir_node *new_block = be_transform_node(block);
1297 dbg_info *dbgi = get_irn_dbg_info(node);
1298 ir_node *op1 = get_Mulh_left(node);
1299 ir_node *op2 = get_Mulh_right(node);
1300 ir_mode *mode = get_irn_mode(node);
1302 ir_node *proj_res_high;
1304 if (get_mode_size_bits(mode) != 32) {
1305 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1308 if (mode_is_signed(mode)) {
1309 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1310 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1312 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1313 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_Mul_res_high);
1315 return proj_res_high;
1319 * Creates an ia32 And.
1321 * @return The created ia32 And node
1323 static ir_node *gen_And(ir_node *node)
1325 ir_node *op1 = get_And_left(node);
1326 ir_node *op2 = get_And_right(node);
1327 assert(! mode_is_float(get_irn_mode(node)));
1329 /* is it a zero extension? */
1330 if (is_Const(op2)) {
1331 tarval *tv = get_Const_tarval(op2);
1332 long v = get_tarval_long(tv);
1334 if (v == 0xFF || v == 0xFFFF) {
1335 dbg_info *dbgi = get_irn_dbg_info(node);
1336 ir_node *block = get_nodes_block(node);
1343 assert(v == 0xFFFF);
1346 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1351 return gen_binop(node, op1, op2, new_bd_ia32_And,
1352 match_commutative | match_mode_neutral | match_am | match_immediate);
1358 * Creates an ia32 Or.
1360 * @return The created ia32 Or node
1362 static ir_node *gen_Or(ir_node *node)
1364 ir_node *op1 = get_Or_left(node);
1365 ir_node *op2 = get_Or_right(node);
1367 assert (! mode_is_float(get_irn_mode(node)));
1368 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1369 | match_mode_neutral | match_am | match_immediate);
1375 * Creates an ia32 Eor.
1377 * @return The created ia32 Eor node
1379 static ir_node *gen_Eor(ir_node *node)
1381 ir_node *op1 = get_Eor_left(node);
1382 ir_node *op2 = get_Eor_right(node);
1384 assert(! mode_is_float(get_irn_mode(node)));
1385 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1386 | match_mode_neutral | match_am | match_immediate);
1391 * Creates an ia32 Sub.
1393 * @return The created ia32 Sub node
1395 static ir_node *gen_Sub(ir_node *node)
1397 ir_node *op1 = get_Sub_left(node);
1398 ir_node *op2 = get_Sub_right(node);
1399 ir_mode *mode = get_irn_mode(node);
1401 if (mode_is_float(mode)) {
1402 if (ia32_cg_config.use_sse2)
1403 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1405 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1408 if (is_Const(op2)) {
1409 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1413 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1414 | match_am | match_immediate);
1417 static ir_node *transform_AM_mem(ir_node *const block,
1418 ir_node *const src_val,
1419 ir_node *const src_mem,
1420 ir_node *const am_mem)
1422 if (is_NoMem(am_mem)) {
1423 return be_transform_node(src_mem);
1424 } else if (is_Proj(src_val) &&
1426 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1427 /* avoid memory loop */
1429 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1430 ir_node *const ptr_pred = get_Proj_pred(src_val);
1431 int const arity = get_Sync_n_preds(src_mem);
1436 NEW_ARR_A(ir_node*, ins, arity + 1);
1438 /* NOTE: This sometimes produces dead-code because the old sync in
1439 * src_mem might not be used anymore, we should detect this case
1440 * and kill the sync... */
1441 for (i = arity - 1; i >= 0; --i) {
1442 ir_node *const pred = get_Sync_pred(src_mem, i);
1444 /* avoid memory loop */
1445 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1448 ins[n++] = be_transform_node(pred);
1453 return new_r_Sync(block, n, ins);
1457 ins[0] = be_transform_node(src_mem);
1459 return new_r_Sync(block, 2, ins);
1464 * Create a 32bit to 64bit signed extension.
1466 * @param dbgi debug info
1467 * @param block the block where node nodes should be placed
1468 * @param val the value to extend
1469 * @param orig the original node
1471 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1472 ir_node *val, const ir_node *orig)
1477 if (ia32_cg_config.use_short_sex_eax) {
1478 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1479 be_dep_on_frame(pval);
1480 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1482 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1483 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1485 SET_IA32_ORIG_NODE(res, orig);
1490 * Generates an ia32 DivMod with additional infrastructure for the
1491 * register allocator if needed.
1493 static ir_node *create_Div(ir_node *node)
1495 dbg_info *dbgi = get_irn_dbg_info(node);
1496 ir_node *block = get_nodes_block(node);
1497 ir_node *new_block = be_transform_node(block);
1504 ir_node *sign_extension;
1505 ia32_address_mode_t am;
1506 ia32_address_t *addr = &am.addr;
1508 /* the upper bits have random contents for smaller modes */
1509 switch (get_irn_opcode(node)) {
1511 op1 = get_Div_left(node);
1512 op2 = get_Div_right(node);
1513 mem = get_Div_mem(node);
1514 mode = get_Div_resmode(node);
1517 op1 = get_Mod_left(node);
1518 op2 = get_Mod_right(node);
1519 mem = get_Mod_mem(node);
1520 mode = get_Mod_resmode(node);
1523 op1 = get_DivMod_left(node);
1524 op2 = get_DivMod_right(node);
1525 mem = get_DivMod_mem(node);
1526 mode = get_DivMod_resmode(node);
1529 panic("invalid divmod node %+F", node);
1532 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1534 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1535 is the memory of the consumed address. We can have only the second op as address
1536 in Div nodes, so check only op2. */
1537 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1539 if (mode_is_signed(mode)) {
1540 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1541 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1542 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1544 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1545 be_dep_on_frame(sign_extension);
1547 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1548 addr->index, new_mem, am.new_op2,
1549 am.new_op1, sign_extension);
1552 set_irn_pinned(new_node, get_irn_pinned(node));
1554 set_am_attributes(new_node, &am);
1555 SET_IA32_ORIG_NODE(new_node, node);
1557 new_node = fix_mem_proj(new_node, &am);
1563 * Generates an ia32 Mod.
1565 static ir_node *gen_Mod(ir_node *node)
1567 return create_Div(node);
1571 * Generates an ia32 Div.
1573 static ir_node *gen_Div(ir_node *node)
1575 return create_Div(node);
1579 * Generates an ia32 DivMod.
1581 static ir_node *gen_DivMod(ir_node *node)
1583 return create_Div(node);
1589 * Creates an ia32 floating Div.
1591 * @return The created ia32 xDiv node
1593 static ir_node *gen_Quot(ir_node *node)
1595 ir_node *op1 = get_Quot_left(node);
1596 ir_node *op2 = get_Quot_right(node);
1598 if (ia32_cg_config.use_sse2) {
1599 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1601 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1607 * Creates an ia32 Shl.
1609 * @return The created ia32 Shl node
1611 static ir_node *gen_Shl(ir_node *node)
1613 ir_node *left = get_Shl_left(node);
1614 ir_node *right = get_Shl_right(node);
1616 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1617 match_mode_neutral | match_immediate);
1621 * Creates an ia32 Shr.
1623 * @return The created ia32 Shr node
1625 static ir_node *gen_Shr(ir_node *node)
1627 ir_node *left = get_Shr_left(node);
1628 ir_node *right = get_Shr_right(node);
1630 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1636 * Creates an ia32 Sar.
1638 * @return The created ia32 Shrs node
1640 static ir_node *gen_Shrs(ir_node *node)
1642 ir_node *left = get_Shrs_left(node);
1643 ir_node *right = get_Shrs_right(node);
1645 if (is_Const(right)) {
1646 tarval *tv = get_Const_tarval(right);
1647 long val = get_tarval_long(tv);
1649 /* this is a sign extension */
1650 dbg_info *dbgi = get_irn_dbg_info(node);
1651 ir_node *block = be_transform_node(get_nodes_block(node));
1652 ir_node *new_op = be_transform_node(left);
1654 return create_sex_32_64(dbgi, block, new_op, node);
1658 /* 8 or 16 bit sign extension? */
1659 if (is_Const(right) && is_Shl(left)) {
1660 ir_node *shl_left = get_Shl_left(left);
1661 ir_node *shl_right = get_Shl_right(left);
1662 if (is_Const(shl_right)) {
1663 tarval *tv1 = get_Const_tarval(right);
1664 tarval *tv2 = get_Const_tarval(shl_right);
1665 if (tv1 == tv2 && tarval_is_long(tv1)) {
1666 long val = get_tarval_long(tv1);
1667 if (val == 16 || val == 24) {
1668 dbg_info *dbgi = get_irn_dbg_info(node);
1669 ir_node *block = get_nodes_block(node);
1679 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1688 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1694 * Creates an ia32 Rol.
1696 * @param op1 The first operator
1697 * @param op2 The second operator
1698 * @return The created ia32 RotL node
1700 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1702 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1708 * Creates an ia32 Ror.
1709 * NOTE: There is no RotR with immediate because this would always be a RotL
1710 * "imm-mode_size_bits" which can be pre-calculated.
1712 * @param op1 The first operator
1713 * @param op2 The second operator
1714 * @return The created ia32 RotR node
1716 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1718 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1724 * Creates an ia32 RotR or RotL (depending on the found pattern).
1726 * @return The created ia32 RotL or RotR node
1728 static ir_node *gen_Rotl(ir_node *node)
1730 ir_node *rotate = NULL;
1731 ir_node *op1 = get_Rotl_left(node);
1732 ir_node *op2 = get_Rotl_right(node);
1734 /* Firm has only RotL, so we are looking for a right (op2)
1735 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1736 that means we can create a RotR instead of an Add and a RotL */
1740 ir_node *left = get_Add_left(add);
1741 ir_node *right = get_Add_right(add);
1742 if (is_Const(right)) {
1743 tarval *tv = get_Const_tarval(right);
1744 ir_mode *mode = get_irn_mode(node);
1745 long bits = get_mode_size_bits(mode);
1747 if (is_Minus(left) &&
1748 tarval_is_long(tv) &&
1749 get_tarval_long(tv) == bits &&
1752 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1753 rotate = gen_Ror(node, op1, get_Minus_op(left));
1758 if (rotate == NULL) {
1759 rotate = gen_Rol(node, op1, op2);
1768 * Transforms a Minus node.
1770 * @return The created ia32 Minus node
1772 static ir_node *gen_Minus(ir_node *node)
1774 ir_node *op = get_Minus_op(node);
1775 ir_node *block = be_transform_node(get_nodes_block(node));
1776 dbg_info *dbgi = get_irn_dbg_info(node);
1777 ir_mode *mode = get_irn_mode(node);
1782 if (mode_is_float(mode)) {
1783 ir_node *new_op = be_transform_node(op);
1784 if (ia32_cg_config.use_sse2) {
1785 /* TODO: non-optimal... if we have many xXors, then we should
1786 * rather create a load for the const and use that instead of
1787 * several AM nodes... */
1788 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1790 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1791 nomem, new_op, noreg_xmm);
1793 size = get_mode_size_bits(mode);
1794 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1796 set_ia32_am_sc(new_node, ent);
1797 set_ia32_op_type(new_node, ia32_AddrModeS);
1798 set_ia32_ls_mode(new_node, mode);
1800 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1803 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1806 SET_IA32_ORIG_NODE(new_node, node);
1812 * Transforms a Not node.
1814 * @return The created ia32 Not node
1816 static ir_node *gen_Not(ir_node *node)
1818 ir_node *op = get_Not_op(node);
1820 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1821 assert (! mode_is_float(get_irn_mode(node)));
1823 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1829 * Transforms an Abs node.
1831 * @return The created ia32 Abs node
1833 static ir_node *gen_Abs(ir_node *node)
1835 ir_node *block = get_nodes_block(node);
1836 ir_node *new_block = be_transform_node(block);
1837 ir_node *op = get_Abs_op(node);
1838 dbg_info *dbgi = get_irn_dbg_info(node);
1839 ir_mode *mode = get_irn_mode(node);
1845 if (mode_is_float(mode)) {
1846 new_op = be_transform_node(op);
1848 if (ia32_cg_config.use_sse2) {
1849 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1850 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1851 nomem, new_op, noreg_fp);
1853 size = get_mode_size_bits(mode);
1854 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1856 set_ia32_am_sc(new_node, ent);
1858 SET_IA32_ORIG_NODE(new_node, node);
1860 set_ia32_op_type(new_node, ia32_AddrModeS);
1861 set_ia32_ls_mode(new_node, mode);
1863 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1864 SET_IA32_ORIG_NODE(new_node, node);
1867 ir_node *xor, *sign_extension;
1869 if (get_mode_size_bits(mode) == 32) {
1870 new_op = be_transform_node(op);
1872 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1875 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1877 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1878 nomem, new_op, sign_extension);
1879 SET_IA32_ORIG_NODE(xor, node);
1881 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1882 nomem, xor, sign_extension);
1883 SET_IA32_ORIG_NODE(new_node, node);
1890 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1892 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1894 dbg_info *dbgi = get_irn_dbg_info(cmp);
1895 ir_node *block = get_nodes_block(cmp);
1896 ir_node *new_block = be_transform_node(block);
1897 ir_node *op1 = be_transform_node(x);
1898 ir_node *op2 = be_transform_node(n);
1900 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1904 * Transform a node returning a "flag" result.
1906 * @param node the node to transform
1907 * @param pnc_out the compare mode to use
1909 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1916 /* we have a Cmp as input */
1917 if (is_Proj(node)) {
1918 ir_node *pred = get_Proj_pred(node);
1920 pn_Cmp pnc = get_Proj_proj(node);
1921 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1922 ir_node *l = get_Cmp_left(pred);
1923 ir_node *r = get_Cmp_right(pred);
1925 ir_node *la = get_And_left(l);
1926 ir_node *ra = get_And_right(l);
1928 ir_node *c = get_Shl_left(la);
1929 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1930 /* (1 << n) & ra) */
1931 ir_node *n = get_Shl_right(la);
1932 flags = gen_bt(pred, ra, n);
1933 /* we must generate a Jc/Jnc jump */
1934 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1937 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1942 ir_node *c = get_Shl_left(ra);
1943 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1944 /* la & (1 << n)) */
1945 ir_node *n = get_Shl_right(ra);
1946 flags = gen_bt(pred, la, n);
1947 /* we must generate a Jc/Jnc jump */
1948 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1951 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1957 /* add ia32 compare flags */
1959 ir_node *l = get_Cmp_left(pred);
1960 ir_mode *mode = get_irn_mode(l);
1961 if (mode_is_float(mode))
1962 pnc |= ia32_pn_Cmp_float;
1963 else if (! mode_is_signed(mode))
1964 pnc |= ia32_pn_Cmp_unsigned;
1967 flags = be_transform_node(pred);
1972 /* a mode_b value, we have to compare it against 0 */
1973 dbgi = get_irn_dbg_info(node);
1974 new_block = be_transform_node(get_nodes_block(node));
1975 new_op = be_transform_node(node);
1976 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1977 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1978 *pnc_out = pn_Cmp_Lg;
1983 * Transforms a Load.
1985 * @return the created ia32 Load node
1987 static ir_node *gen_Load(ir_node *node)
1989 ir_node *old_block = get_nodes_block(node);
1990 ir_node *block = be_transform_node(old_block);
1991 ir_node *ptr = get_Load_ptr(node);
1992 ir_node *mem = get_Load_mem(node);
1993 ir_node *new_mem = be_transform_node(mem);
1996 dbg_info *dbgi = get_irn_dbg_info(node);
1997 ir_mode *mode = get_Load_mode(node);
2000 ia32_address_t addr;
2002 /* construct load address */
2003 memset(&addr, 0, sizeof(addr));
2004 ia32_create_address_mode(&addr, ptr, 0);
2011 base = be_transform_node(base);
2014 if (index == NULL) {
2017 index = be_transform_node(index);
2020 if (mode_is_float(mode)) {
2021 if (ia32_cg_config.use_sse2) {
2022 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2024 res_mode = mode_xmm;
2026 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2028 res_mode = mode_vfp;
2031 assert(mode != mode_b);
2033 /* create a conv node with address mode for smaller modes */
2034 if (get_mode_size_bits(mode) < 32) {
2035 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2036 new_mem, noreg_GP, mode);
2038 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2043 set_irn_pinned(new_node, get_irn_pinned(node));
2044 set_ia32_op_type(new_node, ia32_AddrModeS);
2045 set_ia32_ls_mode(new_node, mode);
2046 set_address(new_node, &addr);
2048 if (get_irn_pinned(node) == op_pin_state_floats) {
2049 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2050 && pn_ia32_vfld_res == pn_ia32_Load_res
2051 && pn_ia32_Load_res == pn_ia32_res);
2052 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2055 SET_IA32_ORIG_NODE(new_node, node);
2057 be_dep_on_frame(new_node);
2061 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2062 ir_node *ptr, ir_node *other)
2069 /* we only use address mode if we're the only user of the load */
2070 if (get_irn_n_edges(node) > 1)
2073 load = get_Proj_pred(node);
2076 if (get_nodes_block(load) != block)
2079 /* store should have the same pointer as the load */
2080 if (get_Load_ptr(load) != ptr)
2083 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2084 if (other != NULL &&
2085 get_nodes_block(other) == block &&
2086 heights_reachable_in_block(heights, other, load)) {
2090 if (prevents_AM(block, load, mem))
2092 /* Store should be attached to the load via mem */
2093 assert(heights_reachable_in_block(heights, mem, load));
2098 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2099 ir_node *mem, ir_node *ptr, ir_mode *mode,
2100 construct_binop_dest_func *func,
2101 construct_binop_dest_func *func8bit,
2102 match_flags_t flags)
2104 ir_node *src_block = get_nodes_block(node);
2112 ia32_address_mode_t am;
2113 ia32_address_t *addr = &am.addr;
2114 memset(&am, 0, sizeof(am));
2116 assert(flags & match_immediate); /* there is no destam node without... */
2117 commutative = (flags & match_commutative) != 0;
2119 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2120 build_address(&am, op1, ia32_create_am_double_use);
2121 new_op = create_immediate_or_transform(op2, 0);
2122 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2123 build_address(&am, op2, ia32_create_am_double_use);
2124 new_op = create_immediate_or_transform(op1, 0);
2129 if (addr->base == NULL)
2130 addr->base = noreg_GP;
2131 if (addr->index == NULL)
2132 addr->index = noreg_GP;
2133 if (addr->mem == NULL)
2136 dbgi = get_irn_dbg_info(node);
2137 block = be_transform_node(src_block);
2138 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2140 if (get_mode_size_bits(mode) == 8) {
2141 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2143 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2145 set_address(new_node, addr);
2146 set_ia32_op_type(new_node, ia32_AddrModeD);
2147 set_ia32_ls_mode(new_node, mode);
2148 SET_IA32_ORIG_NODE(new_node, node);
2150 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2151 mem_proj = be_transform_node(am.mem_proj);
2152 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2157 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2158 ir_node *ptr, ir_mode *mode,
2159 construct_unop_dest_func *func)
2161 ir_node *src_block = get_nodes_block(node);
2167 ia32_address_mode_t am;
2168 ia32_address_t *addr = &am.addr;
2170 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2173 memset(&am, 0, sizeof(am));
2174 build_address(&am, op, ia32_create_am_double_use);
2176 dbgi = get_irn_dbg_info(node);
2177 block = be_transform_node(src_block);
2178 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2179 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2180 set_address(new_node, addr);
2181 set_ia32_op_type(new_node, ia32_AddrModeD);
2182 set_ia32_ls_mode(new_node, mode);
2183 SET_IA32_ORIG_NODE(new_node, node);
2185 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2186 mem_proj = be_transform_node(am.mem_proj);
2187 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2192 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2194 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2195 return get_negated_pnc(pnc, mode);
2198 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2200 ir_mode *mode = get_irn_mode(node);
2201 ir_node *mux_true = get_Mux_true(node);
2202 ir_node *mux_false = get_Mux_false(node);
2212 ia32_address_t addr;
2214 if (get_mode_size_bits(mode) != 8)
2217 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2219 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2225 cond = get_Mux_sel(node);
2226 flags = get_flags_node(cond, &pnc);
2227 /* we can't handle the float special cases with SetM */
2228 if (pnc & ia32_pn_Cmp_float)
2231 pnc = ia32_get_negated_pnc(pnc);
2233 build_address_ptr(&addr, ptr, mem);
2235 dbgi = get_irn_dbg_info(node);
2236 block = get_nodes_block(node);
2237 new_block = be_transform_node(block);
2238 new_mem = be_transform_node(mem);
2239 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2240 addr.index, addr.mem, flags, pnc);
2241 set_address(new_node, &addr);
2242 set_ia32_op_type(new_node, ia32_AddrModeD);
2243 set_ia32_ls_mode(new_node, mode);
2244 SET_IA32_ORIG_NODE(new_node, node);
2249 static ir_node *try_create_dest_am(ir_node *node)
2251 ir_node *val = get_Store_value(node);
2252 ir_node *mem = get_Store_mem(node);
2253 ir_node *ptr = get_Store_ptr(node);
2254 ir_mode *mode = get_irn_mode(val);
2255 unsigned bits = get_mode_size_bits(mode);
2260 /* handle only GP modes for now... */
2261 if (!ia32_mode_needs_gp_reg(mode))
2265 /* store must be the only user of the val node */
2266 if (get_irn_n_edges(val) > 1)
2268 /* skip pointless convs */
2270 ir_node *conv_op = get_Conv_op(val);
2271 ir_mode *pred_mode = get_irn_mode(conv_op);
2272 if (!ia32_mode_needs_gp_reg(pred_mode))
2274 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2282 /* value must be in the same block */
2283 if (get_nodes_block(node) != get_nodes_block(val))
2286 switch (get_irn_opcode(val)) {
2288 op1 = get_Add_left(val);
2289 op2 = get_Add_right(val);
2290 if (ia32_cg_config.use_incdec) {
2291 if (is_Const_1(op2)) {
2292 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2294 } else if (is_Const_Minus_1(op2)) {
2295 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2299 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2300 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2301 match_commutative | match_immediate);
2304 op1 = get_Sub_left(val);
2305 op2 = get_Sub_right(val);
2306 if (is_Const(op2)) {
2307 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2309 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2310 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2314 op1 = get_And_left(val);
2315 op2 = get_And_right(val);
2316 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2317 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2318 match_commutative | match_immediate);
2321 op1 = get_Or_left(val);
2322 op2 = get_Or_right(val);
2323 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2324 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2325 match_commutative | match_immediate);
2328 op1 = get_Eor_left(val);
2329 op2 = get_Eor_right(val);
2330 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2331 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2332 match_commutative | match_immediate);
2335 op1 = get_Shl_left(val);
2336 op2 = get_Shl_right(val);
2337 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2338 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2342 op1 = get_Shr_left(val);
2343 op2 = get_Shr_right(val);
2344 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2345 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2349 op1 = get_Shrs_left(val);
2350 op2 = get_Shrs_right(val);
2351 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2352 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2356 op1 = get_Rotl_left(val);
2357 op2 = get_Rotl_right(val);
2358 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2359 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2362 /* TODO: match ROR patterns... */
2364 new_node = try_create_SetMem(val, ptr, mem);
2368 op1 = get_Minus_op(val);
2369 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2372 /* should be lowered already */
2373 assert(mode != mode_b);
2374 op1 = get_Not_op(val);
2375 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2381 if (new_node != NULL) {
2382 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2383 get_irn_pinned(node) == op_pin_state_pinned) {
2384 set_irn_pinned(new_node, op_pin_state_pinned);
2391 static bool possible_int_mode_for_fp(ir_mode *mode)
2395 if (!mode_is_signed(mode))
2397 size = get_mode_size_bits(mode);
2398 if (size != 16 && size != 32)
2403 static int is_float_to_int_conv(const ir_node *node)
2405 ir_mode *mode = get_irn_mode(node);
2409 if (!possible_int_mode_for_fp(mode))
2414 conv_op = get_Conv_op(node);
2415 conv_mode = get_irn_mode(conv_op);
2417 if (!mode_is_float(conv_mode))
2424 * Transform a Store(floatConst) into a sequence of
2427 * @return the created ia32 Store node
2429 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2431 ir_mode *mode = get_irn_mode(cns);
2432 unsigned size = get_mode_size_bytes(mode);
2433 tarval *tv = get_Const_tarval(cns);
2434 ir_node *block = get_nodes_block(node);
2435 ir_node *new_block = be_transform_node(block);
2436 ir_node *ptr = get_Store_ptr(node);
2437 ir_node *mem = get_Store_mem(node);
2438 dbg_info *dbgi = get_irn_dbg_info(node);
2442 ia32_address_t addr;
2444 assert(size % 4 == 0);
2447 build_address_ptr(&addr, ptr, mem);
2451 get_tarval_sub_bits(tv, ofs) |
2452 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2453 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2454 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2455 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2457 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2458 addr.index, addr.mem, imm);
2460 set_irn_pinned(new_node, get_irn_pinned(node));
2461 set_ia32_op_type(new_node, ia32_AddrModeD);
2462 set_ia32_ls_mode(new_node, mode_Iu);
2463 set_address(new_node, &addr);
2464 SET_IA32_ORIG_NODE(new_node, node);
2467 ins[i++] = new_node;
2472 } while (size != 0);
2475 return new_rd_Sync(dbgi, new_block, i, ins);
2482 * Generate a vfist or vfisttp instruction.
2484 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2485 ir_node *mem, ir_node *val, ir_node **fist)
2489 if (ia32_cg_config.use_fisttp) {
2490 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2491 if other users exists */
2492 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2493 ir_node *value = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2494 be_new_Keep(block, 1, &value);
2496 new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2499 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2502 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2508 * Transforms a general (no special case) Store.
2510 * @return the created ia32 Store node
2512 static ir_node *gen_general_Store(ir_node *node)
2514 ir_node *val = get_Store_value(node);
2515 ir_mode *mode = get_irn_mode(val);
2516 ir_node *block = get_nodes_block(node);
2517 ir_node *new_block = be_transform_node(block);
2518 ir_node *ptr = get_Store_ptr(node);
2519 ir_node *mem = get_Store_mem(node);
2520 dbg_info *dbgi = get_irn_dbg_info(node);
2521 ir_node *new_val, *new_node, *store;
2522 ia32_address_t addr;
2524 /* check for destination address mode */
2525 new_node = try_create_dest_am(node);
2526 if (new_node != NULL)
2529 /* construct store address */
2530 memset(&addr, 0, sizeof(addr));
2531 ia32_create_address_mode(&addr, ptr, 0);
2533 if (addr.base == NULL) {
2534 addr.base = noreg_GP;
2536 addr.base = be_transform_node(addr.base);
2539 if (addr.index == NULL) {
2540 addr.index = noreg_GP;
2542 addr.index = be_transform_node(addr.index);
2544 addr.mem = be_transform_node(mem);
2546 if (mode_is_float(mode)) {
2547 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2549 while (is_Conv(val) && mode == get_irn_mode(val)) {
2550 ir_node *op = get_Conv_op(val);
2551 if (!mode_is_float(get_irn_mode(op)))
2555 new_val = be_transform_node(val);
2556 if (ia32_cg_config.use_sse2) {
2557 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2558 addr.index, addr.mem, new_val);
2560 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2561 addr.index, addr.mem, new_val, mode);
2564 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2565 val = get_Conv_op(val);
2567 /* TODO: is this optimisation still necessary at all (middleend)? */
2568 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2569 while (is_Conv(val)) {
2570 ir_node *op = get_Conv_op(val);
2571 if (!mode_is_float(get_irn_mode(op)))
2573 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2577 new_val = be_transform_node(val);
2578 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2580 new_val = create_immediate_or_transform(val, 0);
2581 assert(mode != mode_b);
2583 if (get_mode_size_bits(mode) == 8) {
2584 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2585 addr.index, addr.mem, new_val);
2587 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2588 addr.index, addr.mem, new_val);
2593 set_irn_pinned(store, get_irn_pinned(node));
2594 set_ia32_op_type(store, ia32_AddrModeD);
2595 set_ia32_ls_mode(store, mode);
2597 set_address(store, &addr);
2598 SET_IA32_ORIG_NODE(store, node);
2604 * Transforms a Store.
2606 * @return the created ia32 Store node
2608 static ir_node *gen_Store(ir_node *node)
2610 ir_node *val = get_Store_value(node);
2611 ir_mode *mode = get_irn_mode(val);
2613 if (mode_is_float(mode) && is_Const(val)) {
2614 /* We can transform every floating const store
2615 into a sequence of integer stores.
2616 If the constant is already in a register,
2617 it would be better to use it, but we don't
2618 have this information here. */
2619 return gen_float_const_Store(node, val);
2621 return gen_general_Store(node);
2625 * Transforms a Switch.
2627 * @return the created ia32 SwitchJmp node
2629 static ir_node *create_Switch(ir_node *node)
2631 dbg_info *dbgi = get_irn_dbg_info(node);
2632 ir_node *block = be_transform_node(get_nodes_block(node));
2633 ir_node *sel = get_Cond_selector(node);
2634 ir_node *new_sel = be_transform_node(sel);
2635 long switch_min = LONG_MAX;
2636 long switch_max = LONG_MIN;
2637 long default_pn = get_Cond_default_proj(node);
2639 const ir_edge_t *edge;
2641 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2643 /* determine the smallest switch case value */
2644 foreach_out_edge(node, edge) {
2645 ir_node *proj = get_edge_src_irn(edge);
2646 long pn = get_Proj_proj(proj);
2647 if (pn == default_pn)
2650 if (pn < switch_min)
2652 if (pn > switch_max)
2656 if ((unsigned long) (switch_max - switch_min) > 128000) {
2657 panic("Size of switch %+F bigger than 128000", node);
2660 if (switch_min != 0) {
2661 /* if smallest switch case is not 0 we need an additional sub */
2662 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2663 add_ia32_am_offs_int(new_sel, -switch_min);
2664 set_ia32_op_type(new_sel, ia32_AddrModeS);
2666 SET_IA32_ORIG_NODE(new_sel, node);
2669 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2670 SET_IA32_ORIG_NODE(new_node, node);
2676 * Transform a Cond node.
2678 static ir_node *gen_Cond(ir_node *node)
2680 ir_node *block = get_nodes_block(node);
2681 ir_node *new_block = be_transform_node(block);
2682 dbg_info *dbgi = get_irn_dbg_info(node);
2683 ir_node *sel = get_Cond_selector(node);
2684 ir_mode *sel_mode = get_irn_mode(sel);
2685 ir_node *flags = NULL;
2689 if (sel_mode != mode_b) {
2690 return create_Switch(node);
2693 /* we get flags from a Cmp */
2694 flags = get_flags_node(sel, &pnc);
2696 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2697 SET_IA32_ORIG_NODE(new_node, node);
2703 * Transform a be_Copy.
2705 static ir_node *gen_be_Copy(ir_node *node)
2707 ir_node *new_node = be_duplicate_node(node);
2708 ir_mode *mode = get_irn_mode(new_node);
2710 if (ia32_mode_needs_gp_reg(mode)) {
2711 set_irn_mode(new_node, mode_Iu);
2717 static ir_node *create_Fucom(ir_node *node)
2719 dbg_info *dbgi = get_irn_dbg_info(node);
2720 ir_node *block = get_nodes_block(node);
2721 ir_node *new_block = be_transform_node(block);
2722 ir_node *left = get_Cmp_left(node);
2723 ir_node *new_left = be_transform_node(left);
2724 ir_node *right = get_Cmp_right(node);
2728 if (ia32_cg_config.use_fucomi) {
2729 new_right = be_transform_node(right);
2730 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2732 set_ia32_commutative(new_node);
2733 SET_IA32_ORIG_NODE(new_node, node);
2735 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2736 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2738 new_right = be_transform_node(right);
2739 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2742 set_ia32_commutative(new_node);
2744 SET_IA32_ORIG_NODE(new_node, node);
2746 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2747 SET_IA32_ORIG_NODE(new_node, node);
2753 static ir_node *create_Ucomi(ir_node *node)
2755 dbg_info *dbgi = get_irn_dbg_info(node);
2756 ir_node *src_block = get_nodes_block(node);
2757 ir_node *new_block = be_transform_node(src_block);
2758 ir_node *left = get_Cmp_left(node);
2759 ir_node *right = get_Cmp_right(node);
2761 ia32_address_mode_t am;
2762 ia32_address_t *addr = &am.addr;
2764 match_arguments(&am, src_block, left, right, NULL,
2765 match_commutative | match_am);
2767 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2768 addr->mem, am.new_op1, am.new_op2,
2770 set_am_attributes(new_node, &am);
2772 SET_IA32_ORIG_NODE(new_node, node);
2774 new_node = fix_mem_proj(new_node, &am);
2780 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2781 * to fold an and into a test node
2783 static bool can_fold_test_and(ir_node *node)
2785 const ir_edge_t *edge;
2787 /** we can only have eq and lg projs */
2788 foreach_out_edge(node, edge) {
2789 ir_node *proj = get_edge_src_irn(edge);
2790 pn_Cmp pnc = get_Proj_proj(proj);
2791 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2799 * returns true if it is assured, that the upper bits of a node are "clean"
2800 * which means for a 16 or 8 bit value, that the upper bits in the register
2801 * are 0 for unsigned and a copy of the last significant bit for signed
2804 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2806 assert(ia32_mode_needs_gp_reg(mode));
2807 if (get_mode_size_bits(mode) >= 32)
2810 if (is_Proj(transformed_node))
2811 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2813 switch (get_ia32_irn_opcode(transformed_node)) {
2814 case iro_ia32_Conv_I2I:
2815 case iro_ia32_Conv_I2I8Bit: {
2816 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2817 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2819 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2826 if (mode_is_signed(mode)) {
2827 return false; /* TODO handle signed modes */
2829 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2830 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2831 const ia32_immediate_attr_t *attr
2832 = get_ia32_immediate_attr_const(right);
2833 if (attr->symconst == 0 &&
2834 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2838 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2842 /* TODO too conservative if shift amount is constant */
2843 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2846 if (!mode_is_signed(mode)) {
2848 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2849 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2851 /* TODO if one is known to be zero extended, then || is sufficient */
2856 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2857 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2859 case iro_ia32_Const:
2860 case iro_ia32_Immediate: {
2861 const ia32_immediate_attr_t *attr =
2862 get_ia32_immediate_attr_const(transformed_node);
2863 if (mode_is_signed(mode)) {
2864 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2865 return shifted == 0 || shifted == -1;
2867 unsigned long shifted = (unsigned long)attr->offset;
2868 shifted >>= get_mode_size_bits(mode);
2869 return shifted == 0;
2879 * Generate code for a Cmp.
2881 static ir_node *gen_Cmp(ir_node *node)
2883 dbg_info *dbgi = get_irn_dbg_info(node);
2884 ir_node *block = get_nodes_block(node);
2885 ir_node *new_block = be_transform_node(block);
2886 ir_node *left = get_Cmp_left(node);
2887 ir_node *right = get_Cmp_right(node);
2888 ir_mode *cmp_mode = get_irn_mode(left);
2890 ia32_address_mode_t am;
2891 ia32_address_t *addr = &am.addr;
2894 if (mode_is_float(cmp_mode)) {
2895 if (ia32_cg_config.use_sse2) {
2896 return create_Ucomi(node);
2898 return create_Fucom(node);
2902 assert(ia32_mode_needs_gp_reg(cmp_mode));
2904 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2905 cmp_unsigned = !mode_is_signed(cmp_mode);
2906 if (is_Const_0(right) &&
2908 get_irn_n_edges(left) == 1 &&
2909 can_fold_test_and(node)) {
2910 /* Test(and_left, and_right) */
2911 ir_node *and_left = get_And_left(left);
2912 ir_node *and_right = get_And_right(left);
2914 /* matze: code here used mode instead of cmd_mode, I think it is always
2915 * the same as cmp_mode, but I leave this here to see if this is really
2918 assert(get_irn_mode(and_left) == cmp_mode);
2920 match_arguments(&am, block, and_left, and_right, NULL,
2922 match_am | match_8bit_am | match_16bit_am |
2923 match_am_and_immediates | match_immediate);
2925 /* use 32bit compare mode if possible since the opcode is smaller */
2926 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2927 upper_bits_clean(am.new_op2, cmp_mode)) {
2928 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2931 if (get_mode_size_bits(cmp_mode) == 8) {
2932 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2933 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2936 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2937 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2940 /* Cmp(left, right) */
2941 match_arguments(&am, block, left, right, NULL,
2942 match_commutative | match_am | match_8bit_am |
2943 match_16bit_am | match_am_and_immediates |
2945 /* use 32bit compare mode if possible since the opcode is smaller */
2946 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2947 upper_bits_clean(am.new_op2, cmp_mode)) {
2948 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2951 if (get_mode_size_bits(cmp_mode) == 8) {
2952 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2953 addr->index, addr->mem, am.new_op1,
2954 am.new_op2, am.ins_permuted,
2957 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2958 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2961 set_am_attributes(new_node, &am);
2962 set_ia32_ls_mode(new_node, cmp_mode);
2964 SET_IA32_ORIG_NODE(new_node, node);
2966 new_node = fix_mem_proj(new_node, &am);
2971 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2974 dbg_info *dbgi = get_irn_dbg_info(node);
2975 ir_node *block = get_nodes_block(node);
2976 ir_node *new_block = be_transform_node(block);
2977 ir_node *val_true = get_Mux_true(node);
2978 ir_node *val_false = get_Mux_false(node);
2980 ia32_address_mode_t am;
2981 ia32_address_t *addr;
2983 assert(ia32_cg_config.use_cmov);
2984 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2988 match_arguments(&am, block, val_false, val_true, flags,
2989 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2991 if (am.ins_permuted)
2992 pnc = ia32_get_negated_pnc(pnc);
2994 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
2995 addr->mem, am.new_op1, am.new_op2, new_flags,
2997 set_am_attributes(new_node, &am);
2999 SET_IA32_ORIG_NODE(new_node, node);
3001 new_node = fix_mem_proj(new_node, &am);
3007 * Creates a ia32 Setcc instruction.
3009 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3010 ir_node *flags, pn_Cmp pnc,
3013 ir_mode *mode = get_irn_mode(orig_node);
3016 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3017 SET_IA32_ORIG_NODE(new_node, orig_node);
3019 /* we might need to conv the result up */
3020 if (get_mode_size_bits(mode) > 8) {
3021 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3022 nomem, new_node, mode_Bu);
3023 SET_IA32_ORIG_NODE(new_node, orig_node);
3030 * Create instruction for an unsigned Difference or Zero.
3032 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3034 ir_mode *mode = get_irn_mode(psi);
3044 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3045 match_mode_neutral | match_am | match_immediate | match_two_users);
3047 block = get_nodes_block(new_node);
3049 if (is_Proj(new_node)) {
3050 sub = get_Proj_pred(new_node);
3051 assert(is_ia32_Sub(sub));
3054 set_irn_mode(sub, mode_T);
3055 new_node = new_rd_Proj(NULL, block, sub, mode, pn_ia32_res);
3057 eflags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3059 dbgi = get_irn_dbg_info(psi);
3060 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3061 not = new_bd_ia32_Not(dbgi, block, sbb);
3063 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3064 set_ia32_commutative(new_node);
3069 * Create an const array of two float consts.
3071 * @param c0 the first constant
3072 * @param c1 the second constant
3073 * @param new_mode IN/OUT for the mode of the constants, if NULL
3074 * smallest possible mode will be used
3076 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3078 ir_mode *mode = *new_mode;
3080 ir_initializer_t *initializer;
3081 tarval *tv0 = get_Const_tarval(c0);
3082 tarval *tv1 = get_Const_tarval(c1);
3085 /* detect the best mode for the constants */
3086 mode = get_tarval_mode(tv0);
3088 if (mode != mode_F) {
3089 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3090 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3092 tv0 = tarval_convert_to(tv0, mode);
3093 tv1 = tarval_convert_to(tv1, mode);
3094 } else if (mode != mode_D) {
3095 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3096 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3098 tv0 = tarval_convert_to(tv0, mode);
3099 tv1 = tarval_convert_to(tv1, mode);
3106 tp = ia32_create_float_type(mode, 4);
3107 tp = ia32_create_float_array(tp);
3109 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3111 set_entity_ld_ident(ent, get_entity_ident(ent));
3112 set_entity_visibility(ent, ir_visibility_local);
3113 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3115 initializer = create_initializer_compound(2);
3117 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3118 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3120 set_entity_initializer(ent, initializer);
3127 * Possible transformations for creating a Setcc.
3129 enum setcc_transform_insn {
3142 typedef struct setcc_transform {
3144 unsigned permutate_cmp_ins;
3147 enum setcc_transform_insn transform;
3151 } setcc_transform_t;
3154 * Setcc can only handle 0 and 1 result.
3155 * Find a transformation that creates 0 and 1 from
3158 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f, setcc_transform_t *res, int can_permutate)
3163 res->permutate_cmp_ins = 0;
3165 if (tarval_is_null(t)) {
3169 pnc = ia32_get_negated_pnc(pnc);
3170 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3171 // now, t is the bigger one
3175 pnc = ia32_get_negated_pnc(pnc);
3179 if (tarval_is_one(t)) {
3180 res->steps[step].transform = SETCC_TR_SET;
3181 res->num_steps = ++step;
3185 if (! tarval_is_null(f)) {
3186 tarval *t_sub = tarval_sub(t, f, NULL);
3189 res->steps[step].transform = SETCC_TR_ADD;
3191 if (t == tarval_bad)
3192 panic("constant subtract failed");
3193 if (! tarval_is_long(f))
3194 panic("tarval is not long");
3196 res->steps[step].val = get_tarval_long(f);
3198 f = tarval_sub(f, f, NULL);
3199 assert(tarval_is_null(f));
3202 if (tarval_is_minus_one(t)) {
3203 if (pnc == (pn_Cmp_Lt | ia32_pn_Cmp_unsigned)) {
3204 res->steps[step].transform = SETCC_TR_SBB;
3205 res->num_steps = ++step;
3207 res->steps[step].transform = SETCC_TR_NEG;
3209 res->steps[step].transform = SETCC_TR_SET;
3210 res->num_steps = ++step;
3214 if (tarval_is_long(t)) {
3215 ir_mode *mode = get_tarval_mode(t);
3216 long v = get_tarval_long(t);
3218 if (pnc & ia32_pn_Cmp_unsigned) {
3219 if (pnc == (pn_Cmp_Lt | ia32_pn_Cmp_unsigned)) {
3220 res->steps[step].transform = SETCC_TR_AND;
3221 res->steps[step].val = v;
3224 res->steps[step].transform = SETCC_TR_SBB;
3225 res->num_steps = ++step;
3227 } else if (pnc == (pn_Cmp_Ge | ia32_pn_Cmp_unsigned)) {
3228 res->steps[step].transform = SETCC_TR_AND;
3229 res->steps[step].val = v;
3232 res->steps[step].transform = SETCC_TR_NOT;
3235 res->steps[step].transform = SETCC_TR_SBB;
3236 res->num_steps = ++step;
3238 } else if (can_permutate && pnc == (pn_Cmp_Gt | ia32_pn_Cmp_unsigned)) {
3239 res->permutate_cmp_ins ^= 1;
3241 res->steps[step].transform = SETCC_TR_NOT;
3244 res->steps[step].transform = SETCC_TR_AND;
3245 res->steps[step].val = v;
3248 res->steps[step].transform = SETCC_TR_SBB;
3249 res->num_steps = ++step;
3251 } else if (can_permutate && pnc == (pn_Cmp_Le | ia32_pn_Cmp_unsigned)) {
3252 res->permutate_cmp_ins ^= 1;
3254 res->steps[step].transform = SETCC_TR_AND;
3255 res->steps[step].val = v;
3258 res->steps[step].transform = SETCC_TR_SBB;
3259 res->num_steps = ++step;
3264 res->steps[step].val = 0;
3267 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3269 res->steps[step].transform = SETCC_TR_LEAxx;
3270 res->steps[step].scale = 3; /* (a << 3) + a */
3273 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3275 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3276 res->steps[step].scale = 3; /* (a << 3) */
3279 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3281 res->steps[step].transform = SETCC_TR_LEAxx;
3282 res->steps[step].scale = 2; /* (a << 2) + a */
3285 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3287 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3288 res->steps[step].scale = 2; /* (a << 2) */
3291 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3293 res->steps[step].transform = SETCC_TR_LEAxx;
3294 res->steps[step].scale = 1; /* (a << 1) + a */
3297 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3299 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3300 res->steps[step].scale = 1; /* (a << 1) */
3303 res->num_steps = step;
3306 if (! tarval_is_single_bit(t)) {
3307 res->steps[step].transform = SETCC_TR_AND;
3308 res->steps[step].val = v;
3310 res->steps[step].transform = SETCC_TR_NEG;
3312 int v = get_tarval_lowest_bit(t);
3315 res->steps[step].transform = SETCC_TR_SHL;
3316 res->steps[step].scale = v;
3320 res->steps[step].transform = SETCC_TR_SET;
3321 res->num_steps = ++step;
3324 panic("tarval is not long");
3328 * Transforms a Mux node into some code sequence.
3330 * @return The transformed node.
3332 static ir_node *gen_Mux(ir_node *node)
3334 dbg_info *dbgi = get_irn_dbg_info(node);
3335 ir_node *block = get_nodes_block(node);
3336 ir_node *new_block = be_transform_node(block);
3337 ir_node *mux_true = get_Mux_true(node);
3338 ir_node *mux_false = get_Mux_false(node);
3339 ir_node *cond = get_Mux_sel(node);
3340 ir_mode *mode = get_irn_mode(node);
3345 assert(get_irn_mode(cond) == mode_b);
3347 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3348 if (mode_is_float(mode)) {
3349 ir_node *cmp = get_Proj_pred(cond);
3350 ir_node *cmp_left = get_Cmp_left(cmp);
3351 ir_node *cmp_right = get_Cmp_right(cmp);
3352 pn_Cmp pnc = get_Proj_proj(cond);
3354 if (ia32_cg_config.use_sse2) {
3355 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3356 if (cmp_left == mux_true && cmp_right == mux_false) {
3357 /* Mux(a <= b, a, b) => MIN */
3358 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3359 match_commutative | match_am | match_two_users);
3360 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3361 /* Mux(a <= b, b, a) => MAX */
3362 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3363 match_commutative | match_am | match_two_users);
3365 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3366 if (cmp_left == mux_true && cmp_right == mux_false) {
3367 /* Mux(a >= b, a, b) => MAX */
3368 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3369 match_commutative | match_am | match_two_users);
3370 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3371 /* Mux(a >= b, b, a) => MIN */
3372 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3373 match_commutative | match_am | match_two_users);
3378 if (is_Const(mux_true) && is_Const(mux_false)) {
3379 ia32_address_mode_t am;
3384 flags = get_flags_node(cond, &pnc);
3385 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3387 if (ia32_cg_config.use_sse2) {
3388 /* cannot load from different mode on SSE */
3391 /* x87 can load any mode */
3395 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3397 switch (get_mode_size_bytes(new_mode)) {
3407 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3408 set_ia32_am_scale(new_node, 2);
3413 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3414 set_ia32_am_scale(new_node, 1);
3417 /* arg, shift 16 NOT supported */
3419 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3422 panic("Unsupported constant size");
3425 am.ls_mode = new_mode;
3426 am.addr.base = noreg_GP;
3427 am.addr.index = new_node;
3428 am.addr.mem = nomem;
3430 am.addr.scale = scale;
3431 am.addr.use_frame = 0;
3432 am.addr.frame_entity = NULL;
3433 am.addr.symconst_sign = 0;
3434 am.mem_proj = am.addr.mem;
3435 am.op_type = ia32_AddrModeS;
3438 am.pinned = op_pin_state_floats;
3440 am.ins_permuted = 0;
3442 if (ia32_cg_config.use_sse2)
3443 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3445 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3446 set_am_attributes(load, &am);
3448 return new_rd_Proj(NULL, block, load, mode_vfp, pn_ia32_res);
3450 panic("cannot transform floating point Mux");
3453 assert(ia32_mode_needs_gp_reg(mode));
3455 if (is_Proj(cond)) {
3456 ir_node *cmp = get_Proj_pred(cond);
3458 ir_node *cmp_left = get_Cmp_left(cmp);
3459 ir_node *cmp_right = get_Cmp_right(cmp);
3460 pn_Cmp pnc = get_Proj_proj(cond);
3462 /* check for unsigned Doz first */
3463 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3464 is_Const_0(mux_false) && is_Sub(mux_true) &&
3465 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3466 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3467 return create_doz(node, cmp_left, cmp_right);
3468 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3469 is_Const_0(mux_true) && is_Sub(mux_false) &&
3470 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3471 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3472 return create_doz(node, cmp_left, cmp_right);
3477 flags = get_flags_node(cond, &pnc);
3479 if (is_Const(mux_true) && is_Const(mux_false)) {
3480 /* both are const, good */
3481 tarval *tv_true = get_Const_tarval(mux_true);
3482 tarval *tv_false = get_Const_tarval(mux_false);
3483 setcc_transform_t res;
3486 /* check if flags is a cmp node and we are the only user,
3487 i.e no other user yet */
3488 int permutate_allowed = 0;
3489 if (is_ia32_Cmp(flags) && get_irn_n_edges(flags) == 0) {
3490 /* yes, we can permutate its inputs */
3491 permutate_allowed = 1;
3493 find_const_transform(pnc, tv_true, tv_false, &res, 0);
3495 if (res.permutate_cmp_ins) {
3496 ia32_attr_t *attr = get_ia32_attr(flags);
3497 attr->data.ins_permuted ^= 1;
3499 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3502 switch (res.steps[step].transform) {
3504 imm = ia32_immediate_from_long(res.steps[step].val);
3505 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3507 case SETCC_TR_ADDxx:
3508 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3511 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3512 set_ia32_am_scale(new_node, res.steps[step].scale);
3513 set_ia32_am_offs_int(new_node, res.steps[step].val);
3515 case SETCC_TR_LEAxx:
3516 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3517 set_ia32_am_scale(new_node, res.steps[step].scale);
3518 set_ia32_am_offs_int(new_node, res.steps[step].val);
3521 imm = ia32_immediate_from_long(res.steps[step].scale);
3522 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3525 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3528 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3531 imm = ia32_immediate_from_long(res.steps[step].val);
3532 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3535 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, new_node);
3538 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3541 panic("unknown setcc transform");
3545 new_node = create_CMov(node, cond, flags, pnc);
3553 * Create a conversion from x87 state register to general purpose.
3555 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3557 ir_node *block = be_transform_node(get_nodes_block(node));
3558 ir_node *op = get_Conv_op(node);
3559 ir_node *new_op = be_transform_node(op);
3560 ir_graph *irg = current_ir_graph;
3561 dbg_info *dbgi = get_irn_dbg_info(node);
3562 ir_mode *mode = get_irn_mode(node);
3563 ir_node *fist, *load, *mem;
3565 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3566 set_irn_pinned(fist, op_pin_state_floats);
3567 set_ia32_use_frame(fist);
3568 set_ia32_op_type(fist, ia32_AddrModeD);
3570 assert(get_mode_size_bits(mode) <= 32);
3571 /* exception we can only store signed 32 bit integers, so for unsigned
3572 we store a 64bit (signed) integer and load the lower bits */
3573 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3574 set_ia32_ls_mode(fist, mode_Ls);
3576 set_ia32_ls_mode(fist, mode_Is);
3578 SET_IA32_ORIG_NODE(fist, node);
3581 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3583 set_irn_pinned(load, op_pin_state_floats);
3584 set_ia32_use_frame(load);
3585 set_ia32_op_type(load, ia32_AddrModeS);
3586 set_ia32_ls_mode(load, mode_Is);
3587 if (get_ia32_ls_mode(fist) == mode_Ls) {
3588 ia32_attr_t *attr = get_ia32_attr(load);
3589 attr->data.need_64bit_stackent = 1;
3591 ia32_attr_t *attr = get_ia32_attr(load);
3592 attr->data.need_32bit_stackent = 1;
3594 SET_IA32_ORIG_NODE(load, node);
3596 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
3600 * Creates a x87 strict Conv by placing a Store and a Load
3602 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3604 ir_node *block = get_nodes_block(node);
3605 ir_graph *irg = get_Block_irg(block);
3606 dbg_info *dbgi = get_irn_dbg_info(node);
3607 ir_node *frame = get_irg_frame(irg);
3608 ir_node *store, *load;
3611 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3612 set_ia32_use_frame(store);
3613 set_ia32_op_type(store, ia32_AddrModeD);
3614 SET_IA32_ORIG_NODE(store, node);
3616 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3617 set_ia32_use_frame(load);
3618 set_ia32_op_type(load, ia32_AddrModeS);
3619 SET_IA32_ORIG_NODE(load, node);
3621 new_node = new_r_Proj(block, load, mode_E, pn_ia32_vfld_res);
3625 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3626 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3628 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3630 func = get_mode_size_bits(mode) == 8 ?
3631 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3632 return func(dbgi, block, base, index, mem, val, mode);
3636 * Create a conversion from general purpose to x87 register
3638 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3640 ir_node *src_block = get_nodes_block(node);
3641 ir_node *block = be_transform_node(src_block);
3642 ir_graph *irg = get_Block_irg(block);
3643 dbg_info *dbgi = get_irn_dbg_info(node);
3644 ir_node *op = get_Conv_op(node);
3645 ir_node *new_op = NULL;
3647 ir_mode *store_mode;
3652 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3653 if (possible_int_mode_for_fp(src_mode)) {
3654 ia32_address_mode_t am;
3656 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3657 if (am.op_type == ia32_AddrModeS) {
3658 ia32_address_t *addr = &am.addr;
3660 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3661 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3663 set_am_attributes(fild, &am);
3664 SET_IA32_ORIG_NODE(fild, node);
3666 fix_mem_proj(fild, &am);
3671 if (new_op == NULL) {
3672 new_op = be_transform_node(op);
3675 mode = get_irn_mode(op);
3677 /* first convert to 32 bit signed if necessary */
3678 if (get_mode_size_bits(src_mode) < 32) {
3679 if (!upper_bits_clean(new_op, src_mode)) {
3680 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3681 SET_IA32_ORIG_NODE(new_op, node);
3686 assert(get_mode_size_bits(mode) == 32);
3689 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3691 set_ia32_use_frame(store);
3692 set_ia32_op_type(store, ia32_AddrModeD);
3693 set_ia32_ls_mode(store, mode_Iu);
3695 /* exception for 32bit unsigned, do a 64bit spill+load */
3696 if (!mode_is_signed(mode)) {
3699 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3701 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3702 noreg_GP, nomem, zero_const);
3704 set_ia32_use_frame(zero_store);
3705 set_ia32_op_type(zero_store, ia32_AddrModeD);
3706 add_ia32_am_offs_int(zero_store, 4);
3707 set_ia32_ls_mode(zero_store, mode_Iu);
3712 store = new_rd_Sync(dbgi, block, 2, in);
3713 store_mode = mode_Ls;
3715 store_mode = mode_Is;
3719 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3721 set_ia32_use_frame(fild);
3722 set_ia32_op_type(fild, ia32_AddrModeS);
3723 set_ia32_ls_mode(fild, store_mode);
3725 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3731 * Create a conversion from one integer mode into another one
3733 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3734 dbg_info *dbgi, ir_node *block, ir_node *op,
3737 ir_node *new_block = be_transform_node(block);
3739 ir_mode *smaller_mode;
3740 ia32_address_mode_t am;
3741 ia32_address_t *addr = &am.addr;
3744 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3745 smaller_mode = src_mode;
3747 smaller_mode = tgt_mode;
3750 #ifdef DEBUG_libfirm
3752 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3757 match_arguments(&am, block, NULL, op, NULL,
3758 match_am | match_8bit_am | match_16bit_am);
3760 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3761 /* unnecessary conv. in theory it shouldn't have been AM */
3762 assert(is_ia32_NoReg_GP(addr->base));
3763 assert(is_ia32_NoReg_GP(addr->index));
3764 assert(is_NoMem(addr->mem));
3765 assert(am.addr.offset == 0);
3766 assert(am.addr.symconst_ent == NULL);
3770 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3771 addr->mem, am.new_op2, smaller_mode);
3772 set_am_attributes(new_node, &am);
3773 /* match_arguments assume that out-mode = in-mode, this isn't true here
3775 set_ia32_ls_mode(new_node, smaller_mode);
3776 SET_IA32_ORIG_NODE(new_node, node);
3777 new_node = fix_mem_proj(new_node, &am);
3782 * Transforms a Conv node.
3784 * @return The created ia32 Conv node
3786 static ir_node *gen_Conv(ir_node *node)
3788 ir_node *block = get_nodes_block(node);
3789 ir_node *new_block = be_transform_node(block);
3790 ir_node *op = get_Conv_op(node);
3791 ir_node *new_op = NULL;
3792 dbg_info *dbgi = get_irn_dbg_info(node);
3793 ir_mode *src_mode = get_irn_mode(op);
3794 ir_mode *tgt_mode = get_irn_mode(node);
3795 int src_bits = get_mode_size_bits(src_mode);
3796 int tgt_bits = get_mode_size_bits(tgt_mode);
3797 ir_node *res = NULL;
3799 assert(!mode_is_int(src_mode) || src_bits <= 32);
3800 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3802 /* modeB -> X should already be lowered by the lower_mode_b pass */
3803 if (src_mode == mode_b) {
3804 panic("ConvB not lowered %+F", node);
3807 if (src_mode == tgt_mode) {
3808 if (get_Conv_strict(node)) {
3809 if (ia32_cg_config.use_sse2) {
3810 /* when we are in SSE mode, we can kill all strict no-op conversion */
3811 return be_transform_node(op);
3814 /* this should be optimized already, but who knows... */
3815 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3816 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3817 return be_transform_node(op);
3821 if (mode_is_float(src_mode)) {
3822 new_op = be_transform_node(op);
3823 /* we convert from float ... */
3824 if (mode_is_float(tgt_mode)) {
3826 if (ia32_cg_config.use_sse2) {
3827 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3828 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3830 set_ia32_ls_mode(res, tgt_mode);
3832 if (get_Conv_strict(node)) {
3833 /* if fp_no_float_fold is not set then we assume that we
3834 * don't have any float operations in a non
3835 * mode_float_arithmetic mode and can skip strict upconvs */
3836 if (src_bits < tgt_bits
3837 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3838 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3841 res = gen_x87_strict_conv(tgt_mode, new_op);
3842 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3846 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3851 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3852 if (ia32_cg_config.use_sse2) {
3853 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3855 set_ia32_ls_mode(res, src_mode);
3857 return gen_x87_fp_to_gp(node);
3861 /* we convert from int ... */
3862 if (mode_is_float(tgt_mode)) {
3864 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3865 if (ia32_cg_config.use_sse2) {
3866 new_op = be_transform_node(op);
3867 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3869 set_ia32_ls_mode(res, tgt_mode);
3871 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3872 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3873 res = gen_x87_gp_to_fp(node, src_mode);
3875 /* we need a strict-Conv, if the int mode has more bits than the
3877 if (float_mantissa < int_mantissa) {
3878 res = gen_x87_strict_conv(tgt_mode, res);
3879 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3883 } else if (tgt_mode == mode_b) {
3884 /* mode_b lowering already took care that we only have 0/1 values */
3885 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3886 src_mode, tgt_mode));
3887 return be_transform_node(op);
3890 if (src_bits == tgt_bits) {
3891 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3892 src_mode, tgt_mode));
3893 return be_transform_node(op);
3896 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3904 static ir_node *create_immediate_or_transform(ir_node *node,
3905 char immediate_constraint_type)
3907 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3908 if (new_node == NULL) {
3909 new_node = be_transform_node(node);
3915 * Transforms a FrameAddr into an ia32 Add.
3917 static ir_node *gen_be_FrameAddr(ir_node *node)
3919 ir_node *block = be_transform_node(get_nodes_block(node));
3920 ir_node *op = be_get_FrameAddr_frame(node);
3921 ir_node *new_op = be_transform_node(op);
3922 dbg_info *dbgi = get_irn_dbg_info(node);
3925 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3926 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3927 set_ia32_use_frame(new_node);
3929 SET_IA32_ORIG_NODE(new_node, node);
3935 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3937 static ir_node *gen_be_Return(ir_node *node)
3939 ir_graph *irg = current_ir_graph;
3940 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3941 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3942 ir_entity *ent = get_irg_entity(irg);
3943 ir_type *tp = get_entity_type(ent);
3948 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3949 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3951 int pn_ret_val, pn_ret_mem, arity, i;
3953 assert(ret_val != NULL);
3954 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3955 return be_duplicate_node(node);
3958 res_type = get_method_res_type(tp, 0);
3960 if (! is_Primitive_type(res_type)) {
3961 return be_duplicate_node(node);
3964 mode = get_type_mode(res_type);
3965 if (! mode_is_float(mode)) {
3966 return be_duplicate_node(node);
3969 assert(get_method_n_ress(tp) == 1);
3971 pn_ret_val = get_Proj_proj(ret_val);
3972 pn_ret_mem = get_Proj_proj(ret_mem);
3974 /* get the Barrier */
3975 barrier = get_Proj_pred(ret_val);
3977 /* get result input of the Barrier */
3978 ret_val = get_irn_n(barrier, pn_ret_val);
3979 new_ret_val = be_transform_node(ret_val);
3981 /* get memory input of the Barrier */
3982 ret_mem = get_irn_n(barrier, pn_ret_mem);
3983 new_ret_mem = be_transform_node(ret_mem);
3985 frame = get_irg_frame(irg);
3987 dbgi = get_irn_dbg_info(barrier);
3988 block = be_transform_node(get_nodes_block(barrier));
3990 /* store xmm0 onto stack */
3991 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3992 new_ret_mem, new_ret_val);
3993 set_ia32_ls_mode(sse_store, mode);
3994 set_ia32_op_type(sse_store, ia32_AddrModeD);
3995 set_ia32_use_frame(sse_store);
3997 /* load into x87 register */
3998 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3999 set_ia32_op_type(fld, ia32_AddrModeS);
4000 set_ia32_use_frame(fld);
4002 mproj = new_r_Proj(block, fld, mode_M, pn_ia32_vfld_M);
4003 fld = new_r_Proj(block, fld, mode_vfp, pn_ia32_vfld_res);
4005 /* create a new barrier */
4006 arity = get_irn_arity(barrier);
4007 in = ALLOCAN(ir_node*, arity);
4008 for (i = 0; i < arity; ++i) {
4011 if (i == pn_ret_val) {
4013 } else if (i == pn_ret_mem) {
4016 ir_node *in = get_irn_n(barrier, i);
4017 new_in = be_transform_node(in);
4022 new_barrier = new_ir_node(dbgi, irg, block,
4023 get_irn_op(barrier), get_irn_mode(barrier),
4025 copy_node_attr(barrier, new_barrier);
4026 be_duplicate_deps(barrier, new_barrier);
4027 be_set_transformed_node(barrier, new_barrier);
4029 /* transform normally */
4030 return be_duplicate_node(node);
4034 * Transform a be_AddSP into an ia32_SubSP.
4036 static ir_node *gen_be_AddSP(ir_node *node)
4038 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4039 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4041 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4042 match_am | match_immediate);
4046 * Transform a be_SubSP into an ia32_AddSP
4048 static ir_node *gen_be_SubSP(ir_node *node)
4050 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4051 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4053 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4054 match_am | match_immediate);
4058 * Change some phi modes
4060 static ir_node *gen_Phi(ir_node *node)
4062 const arch_register_req_t *req;
4063 ir_node *block = be_transform_node(get_nodes_block(node));
4064 ir_graph *irg = current_ir_graph;
4065 dbg_info *dbgi = get_irn_dbg_info(node);
4066 ir_mode *mode = get_irn_mode(node);
4069 if (ia32_mode_needs_gp_reg(mode)) {
4070 /* we shouldn't have any 64bit stuff around anymore */
4071 assert(get_mode_size_bits(mode) <= 32);
4072 /* all integer operations are on 32bit registers now */
4074 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4075 } else if (mode_is_float(mode)) {
4076 if (ia32_cg_config.use_sse2) {
4078 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4081 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4084 req = arch_no_register_req;
4087 /* phi nodes allow loops, so we use the old arguments for now
4088 * and fix this later */
4089 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4090 get_irn_in(node) + 1);
4091 copy_node_attr(node, phi);
4092 be_duplicate_deps(node, phi);
4094 arch_set_out_register_req(phi, 0, req);
4096 be_enqueue_preds(node);
4101 static ir_node *gen_Jmp(ir_node *node)
4103 ir_node *block = get_nodes_block(node);
4104 ir_node *new_block = be_transform_node(block);
4105 dbg_info *dbgi = get_irn_dbg_info(node);
4108 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4109 SET_IA32_ORIG_NODE(new_node, node);
4117 static ir_node *gen_IJmp(ir_node *node)
4119 ir_node *block = get_nodes_block(node);
4120 ir_node *new_block = be_transform_node(block);
4121 dbg_info *dbgi = get_irn_dbg_info(node);
4122 ir_node *op = get_IJmp_target(node);
4124 ia32_address_mode_t am;
4125 ia32_address_t *addr = &am.addr;
4127 assert(get_irn_mode(op) == mode_P);
4129 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4131 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4132 addr->mem, am.new_op2);
4133 set_am_attributes(new_node, &am);
4134 SET_IA32_ORIG_NODE(new_node, node);
4136 new_node = fix_mem_proj(new_node, &am);
4142 * Transform a Bound node.
4144 static ir_node *gen_Bound(ir_node *node)
4147 ir_node *lower = get_Bound_lower(node);
4148 dbg_info *dbgi = get_irn_dbg_info(node);
4150 if (is_Const_0(lower)) {
4151 /* typical case for Java */
4152 ir_node *sub, *res, *flags, *block;
4154 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4155 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
4157 block = get_nodes_block(res);
4158 if (! is_Proj(res)) {
4160 set_irn_mode(sub, mode_T);
4161 res = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_res);
4163 sub = get_Proj_pred(res);
4165 flags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
4166 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4167 SET_IA32_ORIG_NODE(new_node, node);
4169 panic("generic Bound not supported in ia32 Backend");
4175 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4177 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4178 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4180 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4181 match_immediate | match_mode_neutral);
4184 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4186 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4187 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4188 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4192 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4194 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4195 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4196 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4200 static ir_node *gen_ia32_l_Add(ir_node *node)
4202 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4203 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4204 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4205 match_commutative | match_am | match_immediate |
4206 match_mode_neutral);
4208 if (is_Proj(lowered)) {
4209 lowered = get_Proj_pred(lowered);
4211 assert(is_ia32_Add(lowered));
4212 set_irn_mode(lowered, mode_T);
4218 static ir_node *gen_ia32_l_Adc(ir_node *node)
4220 return gen_binop_flags(node, new_bd_ia32_Adc,
4221 match_commutative | match_am | match_immediate |
4222 match_mode_neutral);
4226 * Transforms a l_MulS into a "real" MulS node.
4228 * @return the created ia32 Mul node
4230 static ir_node *gen_ia32_l_Mul(ir_node *node)
4232 ir_node *left = get_binop_left(node);
4233 ir_node *right = get_binop_right(node);
4235 return gen_binop(node, left, right, new_bd_ia32_Mul,
4236 match_commutative | match_am | match_mode_neutral);
4240 * Transforms a l_IMulS into a "real" IMul1OPS node.
4242 * @return the created ia32 IMul1OP node
4244 static ir_node *gen_ia32_l_IMul(ir_node *node)
4246 ir_node *left = get_binop_left(node);
4247 ir_node *right = get_binop_right(node);
4249 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4250 match_commutative | match_am | match_mode_neutral);
4253 static ir_node *gen_ia32_l_Sub(ir_node *node)
4255 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4256 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4257 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4258 match_am | match_immediate | match_mode_neutral);
4260 if (is_Proj(lowered)) {
4261 lowered = get_Proj_pred(lowered);
4263 assert(is_ia32_Sub(lowered));
4264 set_irn_mode(lowered, mode_T);
4270 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4272 return gen_binop_flags(node, new_bd_ia32_Sbb,
4273 match_am | match_immediate | match_mode_neutral);
4277 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4278 * op1 - target to be shifted
4279 * op2 - contains bits to be shifted into target
4281 * Only op3 can be an immediate.
4283 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4284 ir_node *low, ir_node *count)
4286 ir_node *block = get_nodes_block(node);
4287 ir_node *new_block = be_transform_node(block);
4288 dbg_info *dbgi = get_irn_dbg_info(node);
4289 ir_node *new_high = be_transform_node(high);
4290 ir_node *new_low = be_transform_node(low);
4294 /* the shift amount can be any mode that is bigger than 5 bits, since all
4295 * other bits are ignored anyway */
4296 while (is_Conv(count) &&
4297 get_irn_n_edges(count) == 1 &&
4298 mode_is_int(get_irn_mode(count))) {
4299 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4300 count = get_Conv_op(count);
4302 new_count = create_immediate_or_transform(count, 0);
4304 if (is_ia32_l_ShlD(node)) {
4305 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4308 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4311 SET_IA32_ORIG_NODE(new_node, node);
4316 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4318 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4319 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4320 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4321 return gen_lowered_64bit_shifts(node, high, low, count);
4324 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4326 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4327 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4328 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4329 return gen_lowered_64bit_shifts(node, high, low, count);
4332 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4334 ir_node *src_block = get_nodes_block(node);
4335 ir_node *block = be_transform_node(src_block);
4336 ir_graph *irg = current_ir_graph;
4337 dbg_info *dbgi = get_irn_dbg_info(node);
4338 ir_node *frame = get_irg_frame(irg);
4339 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4340 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4341 ir_node *new_val_low = be_transform_node(val_low);
4342 ir_node *new_val_high = be_transform_node(val_high);
4344 ir_node *sync, *fild, *res;
4345 ir_node *store_low, *store_high;
4347 if (ia32_cg_config.use_sse2) {
4348 panic("ia32_l_LLtoFloat not implemented for SSE2");
4352 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4354 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4356 SET_IA32_ORIG_NODE(store_low, node);
4357 SET_IA32_ORIG_NODE(store_high, node);
4359 set_ia32_use_frame(store_low);
4360 set_ia32_use_frame(store_high);
4361 set_ia32_op_type(store_low, ia32_AddrModeD);
4362 set_ia32_op_type(store_high, ia32_AddrModeD);
4363 set_ia32_ls_mode(store_low, mode_Iu);
4364 set_ia32_ls_mode(store_high, mode_Is);
4365 add_ia32_am_offs_int(store_high, 4);
4369 sync = new_rd_Sync(dbgi, block, 2, in);
4372 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4374 set_ia32_use_frame(fild);
4375 set_ia32_op_type(fild, ia32_AddrModeS);
4376 set_ia32_ls_mode(fild, mode_Ls);
4378 SET_IA32_ORIG_NODE(fild, node);
4380 res = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
4382 if (! mode_is_signed(get_irn_mode(val_high))) {
4383 ia32_address_mode_t am;
4385 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4388 am.addr.base = noreg_GP;
4389 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4390 am.addr.mem = nomem;
4393 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4394 am.addr.use_frame = 0;
4395 am.addr.frame_entity = NULL;
4396 am.addr.symconst_sign = 0;
4397 am.ls_mode = mode_F;
4398 am.mem_proj = nomem;
4399 am.op_type = ia32_AddrModeS;
4401 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4402 am.pinned = op_pin_state_floats;
4404 am.ins_permuted = 0;
4406 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4407 am.new_op1, am.new_op2, get_fpcw());
4408 set_am_attributes(fadd, &am);
4410 set_irn_mode(fadd, mode_T);
4411 res = new_rd_Proj(NULL, block, fadd, mode_vfp, pn_ia32_res);
4416 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4418 ir_node *src_block = get_nodes_block(node);
4419 ir_node *block = be_transform_node(src_block);
4420 ir_graph *irg = get_Block_irg(block);
4421 dbg_info *dbgi = get_irn_dbg_info(node);
4422 ir_node *frame = get_irg_frame(irg);
4423 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4424 ir_node *new_val = be_transform_node(val);
4425 ir_node *fist, *mem;
4427 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4428 SET_IA32_ORIG_NODE(fist, node);
4429 set_ia32_use_frame(fist);
4430 set_ia32_op_type(fist, ia32_AddrModeD);
4431 set_ia32_ls_mode(fist, mode_Ls);
4437 * the BAD transformer.
4439 static ir_node *bad_transform(ir_node *node)
4441 panic("No transform function for %+F available.", node);
4445 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4447 ir_node *block = be_transform_node(get_nodes_block(node));
4448 ir_graph *irg = get_Block_irg(block);
4449 ir_node *pred = get_Proj_pred(node);
4450 ir_node *new_pred = be_transform_node(pred);
4451 ir_node *frame = get_irg_frame(irg);
4452 dbg_info *dbgi = get_irn_dbg_info(node);
4453 long pn = get_Proj_proj(node);
4458 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4459 SET_IA32_ORIG_NODE(load, node);
4460 set_ia32_use_frame(load);
4461 set_ia32_op_type(load, ia32_AddrModeS);
4462 set_ia32_ls_mode(load, mode_Iu);
4463 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4464 * 32 bit from it with this particular load */
4465 attr = get_ia32_attr(load);
4466 attr->data.need_64bit_stackent = 1;
4468 if (pn == pn_ia32_l_FloattoLL_res_high) {
4469 add_ia32_am_offs_int(load, 4);
4471 assert(pn == pn_ia32_l_FloattoLL_res_low);
4474 proj = new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4480 * Transform the Projs of an AddSP.
4482 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4484 ir_node *block = be_transform_node(get_nodes_block(node));
4485 ir_node *pred = get_Proj_pred(node);
4486 ir_node *new_pred = be_transform_node(pred);
4487 dbg_info *dbgi = get_irn_dbg_info(node);
4488 long proj = get_Proj_proj(node);
4490 if (proj == pn_be_AddSP_sp) {
4491 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4492 pn_ia32_SubSP_stack);
4493 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4495 } else if (proj == pn_be_AddSP_res) {
4496 return new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4497 pn_ia32_SubSP_addr);
4498 } else if (proj == pn_be_AddSP_M) {
4499 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_SubSP_M);
4502 panic("No idea how to transform proj->AddSP");
4506 * Transform the Projs of a SubSP.
4508 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4510 ir_node *block = be_transform_node(get_nodes_block(node));
4511 ir_node *pred = get_Proj_pred(node);
4512 ir_node *new_pred = be_transform_node(pred);
4513 dbg_info *dbgi = get_irn_dbg_info(node);
4514 long proj = get_Proj_proj(node);
4516 if (proj == pn_be_SubSP_sp) {
4517 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4518 pn_ia32_AddSP_stack);
4519 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4521 } else if (proj == pn_be_SubSP_M) {
4522 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_AddSP_M);
4525 panic("No idea how to transform proj->SubSP");
4529 * Transform and renumber the Projs from a Load.
4531 static ir_node *gen_Proj_Load(ir_node *node)
4534 ir_node *block = be_transform_node(get_nodes_block(node));
4535 ir_node *pred = get_Proj_pred(node);
4536 dbg_info *dbgi = get_irn_dbg_info(node);
4537 long proj = get_Proj_proj(node);
4539 /* loads might be part of source address mode matches, so we don't
4540 * transform the ProjMs yet (with the exception of loads whose result is
4543 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4545 ir_node *old_block = get_nodes_block(node);
4547 /* this is needed, because sometimes we have loops that are only
4548 reachable through the ProjM */
4549 be_enqueue_preds(node);
4550 /* do it in 2 steps, to silence firm verifier */
4551 res = new_rd_Proj(dbgi, old_block, pred, mode_M, pn_Load_M);
4552 set_Proj_proj(res, pn_ia32_mem);
4556 /* renumber the proj */
4557 new_pred = be_transform_node(pred);
4558 if (is_ia32_Load(new_pred)) {
4561 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Load_res);
4563 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Load_M);
4564 case pn_Load_X_regular:
4565 return new_rd_Jmp(dbgi, block);
4566 case pn_Load_X_except:
4567 /* This Load might raise an exception. Mark it. */
4568 set_ia32_exc_label(new_pred, 1);
4569 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4573 } else if (is_ia32_Conv_I2I(new_pred) ||
4574 is_ia32_Conv_I2I8Bit(new_pred)) {
4575 set_irn_mode(new_pred, mode_T);
4576 if (proj == pn_Load_res) {
4577 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_res);
4578 } else if (proj == pn_Load_M) {
4579 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_mem);
4581 } else if (is_ia32_xLoad(new_pred)) {
4584 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4586 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xLoad_M);
4587 case pn_Load_X_regular:
4588 return new_rd_Jmp(dbgi, block);
4589 case pn_Load_X_except:
4590 /* This Load might raise an exception. Mark it. */
4591 set_ia32_exc_label(new_pred, 1);
4592 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4596 } else if (is_ia32_vfld(new_pred)) {
4599 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4601 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfld_M);
4602 case pn_Load_X_regular:
4603 return new_rd_Jmp(dbgi, block);
4604 case pn_Load_X_except:
4605 /* This Load might raise an exception. Mark it. */
4606 set_ia32_exc_label(new_pred, 1);
4607 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
4612 /* can happen for ProJMs when source address mode happened for the
4615 /* however it should not be the result proj, as that would mean the
4616 load had multiple users and should not have been used for
4618 if (proj != pn_Load_M) {
4619 panic("internal error: transformed node not a Load");
4621 return new_rd_Proj(dbgi, block, new_pred, mode_M, 1);
4624 panic("No idea how to transform proj");
4628 * Transform and renumber the Projs from a DivMod like instruction.
4630 static ir_node *gen_Proj_DivMod(ir_node *node)
4632 ir_node *block = be_transform_node(get_nodes_block(node));
4633 ir_node *pred = get_Proj_pred(node);
4634 ir_node *new_pred = be_transform_node(pred);
4635 dbg_info *dbgi = get_irn_dbg_info(node);
4636 long proj = get_Proj_proj(node);
4638 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4640 switch (get_irn_opcode(pred)) {
4644 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4646 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4647 case pn_Div_X_regular:
4648 return new_rd_Jmp(dbgi, block);
4649 case pn_Div_X_except:
4650 set_ia32_exc_label(new_pred, 1);
4651 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4659 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4661 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4662 case pn_Mod_X_except:
4663 set_ia32_exc_label(new_pred, 1);
4664 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4672 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4673 case pn_DivMod_res_div:
4674 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4675 case pn_DivMod_res_mod:
4676 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4677 case pn_DivMod_X_regular:
4678 return new_rd_Jmp(dbgi, block);
4679 case pn_DivMod_X_except:
4680 set_ia32_exc_label(new_pred, 1);
4681 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4690 panic("No idea how to transform proj->DivMod");
4694 * Transform and renumber the Projs from a CopyB.
4696 static ir_node *gen_Proj_CopyB(ir_node *node)
4698 ir_node *block = be_transform_node(get_nodes_block(node));
4699 ir_node *pred = get_Proj_pred(node);
4700 ir_node *new_pred = be_transform_node(pred);
4701 dbg_info *dbgi = get_irn_dbg_info(node);
4702 long proj = get_Proj_proj(node);
4705 case pn_CopyB_M_regular:
4706 if (is_ia32_CopyB_i(new_pred)) {
4707 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4708 } else if (is_ia32_CopyB(new_pred)) {
4709 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_M);
4716 panic("No idea how to transform proj->CopyB");
4720 * Transform and renumber the Projs from a Quot.
4722 static ir_node *gen_Proj_Quot(ir_node *node)
4724 ir_node *block = be_transform_node(get_nodes_block(node));
4725 ir_node *pred = get_Proj_pred(node);
4726 ir_node *new_pred = be_transform_node(pred);
4727 dbg_info *dbgi = get_irn_dbg_info(node);
4728 long proj = get_Proj_proj(node);
4732 if (is_ia32_xDiv(new_pred)) {
4733 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xDiv_M);
4734 } else if (is_ia32_vfdiv(new_pred)) {
4735 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4739 if (is_ia32_xDiv(new_pred)) {
4740 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4741 } else if (is_ia32_vfdiv(new_pred)) {
4742 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4745 case pn_Quot_X_regular:
4746 case pn_Quot_X_except:
4751 panic("No idea how to transform proj->Quot");
4754 static ir_node *gen_be_Call(ir_node *node)
4756 dbg_info *const dbgi = get_irn_dbg_info(node);
4757 ir_node *const src_block = get_nodes_block(node);
4758 ir_node *const block = be_transform_node(src_block);
4759 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4760 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4761 ir_node *const sp = be_transform_node(src_sp);
4762 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4763 ia32_address_mode_t am;
4764 ia32_address_t *const addr = &am.addr;
4769 ir_node * eax = noreg_GP;
4770 ir_node * ecx = noreg_GP;
4771 ir_node * edx = noreg_GP;
4772 unsigned const pop = be_Call_get_pop(node);
4773 ir_type *const call_tp = be_Call_get_type(node);
4774 int old_no_pic_adjust;
4776 /* Run the x87 simulator if the call returns a float value */
4777 if (get_method_n_ress(call_tp) > 0) {
4778 ir_type *const res_type = get_method_res_type(call_tp, 0);
4779 ir_mode *const res_mode = get_type_mode(res_type);
4781 if (res_mode != NULL && mode_is_float(res_mode)) {
4782 env_cg->do_x87_sim = 1;
4786 /* We do not want be_Call direct calls */
4787 assert(be_Call_get_entity(node) == NULL);
4789 /* special case for PIC trampoline calls */
4790 old_no_pic_adjust = no_pic_adjust;
4791 no_pic_adjust = env_cg->birg->main_env->options->pic;
4793 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4794 match_am | match_immediate);
4796 no_pic_adjust = old_no_pic_adjust;
4798 i = get_irn_arity(node) - 1;
4799 fpcw = be_transform_node(get_irn_n(node, i--));
4800 for (; i >= be_pos_Call_first_arg; --i) {
4801 arch_register_req_t const *const req = arch_get_register_req(node, i);
4802 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4804 assert(req->type == arch_register_req_type_limited);
4805 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4807 switch (*req->limited) {
4808 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4809 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4810 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4811 default: panic("Invalid GP register for register parameter");
4815 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4816 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4817 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4818 set_am_attributes(call, &am);
4819 call = fix_mem_proj(call, &am);
4821 if (get_irn_pinned(node) == op_pin_state_pinned)
4822 set_irn_pinned(call, op_pin_state_pinned);
4824 SET_IA32_ORIG_NODE(call, node);
4826 if (ia32_cg_config.use_sse2) {
4827 /* remember this call for post-processing */
4828 ARR_APP1(ir_node *, call_list, call);
4829 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4836 * Transform Builtin trap
4838 static ir_node *gen_trap(ir_node *node) {
4839 dbg_info *dbgi = get_irn_dbg_info(node);
4840 ir_node *block = be_transform_node(get_nodes_block(node));
4841 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4843 return new_bd_ia32_UD2(dbgi, block, mem);
4847 * Transform Builtin debugbreak
4849 static ir_node *gen_debugbreak(ir_node *node) {
4850 dbg_info *dbgi = get_irn_dbg_info(node);
4851 ir_node *block = be_transform_node(get_nodes_block(node));
4852 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4854 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4858 * Transform Builtin return_address
4860 static ir_node *gen_return_address(ir_node *node) {
4861 ir_node *param = get_Builtin_param(node, 0);
4862 ir_node *frame = get_Builtin_param(node, 1);
4863 dbg_info *dbgi = get_irn_dbg_info(node);
4864 tarval *tv = get_Const_tarval(param);
4865 unsigned long value = get_tarval_long(tv);
4867 ir_node *block = be_transform_node(get_nodes_block(node));
4868 ir_node *ptr = be_transform_node(frame);
4872 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4873 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4874 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4877 /* load the return address from this frame */
4878 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4880 set_irn_pinned(load, get_irn_pinned(node));
4881 set_ia32_op_type(load, ia32_AddrModeS);
4882 set_ia32_ls_mode(load, mode_Iu);
4884 set_ia32_am_offs_int(load, 0);
4885 set_ia32_use_frame(load);
4886 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4888 if (get_irn_pinned(node) == op_pin_state_floats) {
4889 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4890 && pn_ia32_vfld_res == pn_ia32_Load_res
4891 && pn_ia32_Load_res == pn_ia32_res);
4892 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4895 SET_IA32_ORIG_NODE(load, node);
4896 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4900 * Transform Builtin frame_address
4902 static ir_node *gen_frame_address(ir_node *node) {
4903 ir_node *param = get_Builtin_param(node, 0);
4904 ir_node *frame = get_Builtin_param(node, 1);
4905 dbg_info *dbgi = get_irn_dbg_info(node);
4906 tarval *tv = get_Const_tarval(param);
4907 unsigned long value = get_tarval_long(tv);
4909 ir_node *block = be_transform_node(get_nodes_block(node));
4910 ir_node *ptr = be_transform_node(frame);
4915 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4916 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4917 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4920 /* load the frame address from this frame */
4921 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4923 set_irn_pinned(load, get_irn_pinned(node));
4924 set_ia32_op_type(load, ia32_AddrModeS);
4925 set_ia32_ls_mode(load, mode_Iu);
4927 ent = ia32_get_frame_address_entity();
4929 set_ia32_am_offs_int(load, 0);
4930 set_ia32_use_frame(load);
4931 set_ia32_frame_ent(load, ent);
4933 /* will fail anyway, but gcc does this: */
4934 set_ia32_am_offs_int(load, 0);
4937 if (get_irn_pinned(node) == op_pin_state_floats) {
4938 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4939 && pn_ia32_vfld_res == pn_ia32_Load_res
4940 && pn_ia32_Load_res == pn_ia32_res);
4941 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4944 SET_IA32_ORIG_NODE(load, node);
4945 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4949 * Transform Builtin frame_address
4951 static ir_node *gen_prefetch(ir_node *node) {
4953 ir_node *ptr, *block, *mem, *base, *index;
4954 ir_node *param, *new_node;
4957 ia32_address_t addr;
4959 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4960 /* no prefetch at all, route memory */
4961 return be_transform_node(get_Builtin_mem(node));
4964 param = get_Builtin_param(node, 1);
4965 tv = get_Const_tarval(param);
4966 rw = get_tarval_long(tv);
4968 /* construct load address */
4969 memset(&addr, 0, sizeof(addr));
4970 ptr = get_Builtin_param(node, 0);
4971 ia32_create_address_mode(&addr, ptr, 0);
4978 base = be_transform_node(base);
4981 if (index == NULL) {
4984 index = be_transform_node(index);
4987 dbgi = get_irn_dbg_info(node);
4988 block = be_transform_node(get_nodes_block(node));
4989 mem = be_transform_node(get_Builtin_mem(node));
4991 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4992 /* we have 3DNow!, this was already checked above */
4993 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4994 } else if (ia32_cg_config.use_sse_prefetch) {
4995 /* note: rw == 1 is IGNORED in that case */
4996 param = get_Builtin_param(node, 2);
4997 tv = get_Const_tarval(param);
4998 locality = get_tarval_long(tv);
5000 /* SSE style prefetch */
5003 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
5006 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
5009 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
5012 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
5016 assert(ia32_cg_config.use_3dnow_prefetch);
5017 /* 3DNow! style prefetch */
5018 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
5021 set_irn_pinned(new_node, get_irn_pinned(node));
5022 set_ia32_op_type(new_node, ia32_AddrModeS);
5023 set_ia32_ls_mode(new_node, mode_Bu);
5024 set_address(new_node, &addr);
5026 SET_IA32_ORIG_NODE(new_node, node);
5028 be_dep_on_frame(new_node);
5029 return new_r_Proj(block, new_node, mode_M, pn_ia32_Prefetch_M);
5033 * Transform bsf like node
5035 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5037 ir_node *param = get_Builtin_param(node, 0);
5038 dbg_info *dbgi = get_irn_dbg_info(node);
5040 ir_node *block = get_nodes_block(node);
5041 ir_node *new_block = be_transform_node(block);
5043 ia32_address_mode_t am;
5044 ia32_address_t *addr = &am.addr;
5047 match_arguments(&am, block, NULL, param, NULL, match_am);
5049 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5050 set_am_attributes(cnt, &am);
5051 set_ia32_ls_mode(cnt, get_irn_mode(param));
5053 SET_IA32_ORIG_NODE(cnt, node);
5054 return fix_mem_proj(cnt, &am);
5058 * Transform builtin ffs.
5060 static ir_node *gen_ffs(ir_node *node)
5062 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5063 ir_node *real = skip_Proj(bsf);
5064 dbg_info *dbgi = get_irn_dbg_info(real);
5065 ir_node *block = get_nodes_block(real);
5066 ir_node *flag, *set, *conv, *neg, *or;
5069 if (get_irn_mode(real) != mode_T) {
5070 set_irn_mode(real, mode_T);
5071 bsf = new_r_Proj(block, real, mode_Iu, pn_ia32_res);
5074 flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
5077 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5078 SET_IA32_ORIG_NODE(set, node);
5081 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5082 SET_IA32_ORIG_NODE(conv, node);
5085 neg = new_bd_ia32_Neg(dbgi, block, conv);
5088 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5089 set_ia32_commutative(or);
5092 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5096 * Transform builtin clz.
5098 static ir_node *gen_clz(ir_node *node)
5100 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5101 ir_node *real = skip_Proj(bsr);
5102 dbg_info *dbgi = get_irn_dbg_info(real);
5103 ir_node *block = get_nodes_block(real);
5104 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5106 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5110 * Transform builtin ctz.
5112 static ir_node *gen_ctz(ir_node *node)
5114 return gen_unop_AM(node, new_bd_ia32_Bsf);
5118 * Transform builtin parity.
5120 static ir_node *gen_parity(ir_node *node)
5122 ir_node *param = get_Builtin_param(node, 0);
5123 dbg_info *dbgi = get_irn_dbg_info(node);
5125 ir_node *block = get_nodes_block(node);
5127 ir_node *new_block = be_transform_node(block);
5128 ir_node *imm, *cmp, *new_node;
5130 ia32_address_mode_t am;
5131 ia32_address_t *addr = &am.addr;
5135 match_arguments(&am, block, NULL, param, NULL, match_am);
5136 imm = ia32_create_Immediate(NULL, 0, 0);
5137 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5138 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5139 set_am_attributes(cmp, &am);
5140 set_ia32_ls_mode(cmp, mode_Iu);
5142 SET_IA32_ORIG_NODE(cmp, node);
5144 cmp = fix_mem_proj(cmp, &am);
5147 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5148 SET_IA32_ORIG_NODE(new_node, node);
5151 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5152 nomem, new_node, mode_Bu);
5153 SET_IA32_ORIG_NODE(new_node, node);
5158 * Transform builtin popcount
5160 static ir_node *gen_popcount(ir_node *node) {
5161 ir_node *param = get_Builtin_param(node, 0);
5162 dbg_info *dbgi = get_irn_dbg_info(node);
5164 ir_node *block = get_nodes_block(node);
5165 ir_node *new_block = be_transform_node(block);
5168 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5170 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5171 if (ia32_cg_config.use_popcnt) {
5172 ia32_address_mode_t am;
5173 ia32_address_t *addr = &am.addr;
5176 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5178 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5179 set_am_attributes(cnt, &am);
5180 set_ia32_ls_mode(cnt, get_irn_mode(param));
5182 SET_IA32_ORIG_NODE(cnt, node);
5183 return fix_mem_proj(cnt, &am);
5186 new_param = be_transform_node(param);
5188 /* do the standard popcount algo */
5190 /* m1 = x & 0x55555555 */
5191 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5192 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5195 simm = ia32_create_Immediate(NULL, 0, 1);
5196 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5198 /* m2 = s1 & 0x55555555 */
5199 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5202 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5204 /* m4 = m3 & 0x33333333 */
5205 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5206 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5209 simm = ia32_create_Immediate(NULL, 0, 2);
5210 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5212 /* m5 = s2 & 0x33333333 */
5213 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5216 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5218 /* m7 = m6 & 0x0F0F0F0F */
5219 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5220 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5223 simm = ia32_create_Immediate(NULL, 0, 4);
5224 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5226 /* m8 = s3 & 0x0F0F0F0F */
5227 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5230 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5232 /* m10 = m9 & 0x00FF00FF */
5233 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5234 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5237 simm = ia32_create_Immediate(NULL, 0, 8);
5238 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5240 /* m11 = s4 & 0x00FF00FF */
5241 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5243 /* m12 = m10 + m11 */
5244 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5246 /* m13 = m12 & 0x0000FFFF */
5247 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5248 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5250 /* s5 = m12 >> 16 */
5251 simm = ia32_create_Immediate(NULL, 0, 16);
5252 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5254 /* res = m13 + s5 */
5255 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5259 * Transform builtin byte swap.
5261 static ir_node *gen_bswap(ir_node *node) {
5262 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5263 dbg_info *dbgi = get_irn_dbg_info(node);
5265 ir_node *block = get_nodes_block(node);
5266 ir_node *new_block = be_transform_node(block);
5267 ir_mode *mode = get_irn_mode(param);
5268 unsigned size = get_mode_size_bits(mode);
5269 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5273 if (ia32_cg_config.use_i486) {
5274 /* swap available */
5275 return new_bd_ia32_Bswap(dbgi, new_block, param);
5277 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5278 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5280 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5281 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5283 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5285 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5286 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5288 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5289 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5292 /* swap16 always available */
5293 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5296 panic("Invalid bswap size (%d)", size);
5301 * Transform builtin outport.
5303 static ir_node *gen_outport(ir_node *node) {
5304 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5305 ir_node *oldv = get_Builtin_param(node, 1);
5306 ir_mode *mode = get_irn_mode(oldv);
5307 ir_node *value = be_transform_node(oldv);
5308 ir_node *block = be_transform_node(get_nodes_block(node));
5309 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5310 dbg_info *dbgi = get_irn_dbg_info(node);
5312 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5313 set_ia32_ls_mode(res, mode);
5318 * Transform builtin inport.
5320 static ir_node *gen_inport(ir_node *node) {
5321 ir_type *tp = get_Builtin_type(node);
5322 ir_type *rstp = get_method_res_type(tp, 0);
5323 ir_mode *mode = get_type_mode(rstp);
5324 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5325 ir_node *block = be_transform_node(get_nodes_block(node));
5326 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5327 dbg_info *dbgi = get_irn_dbg_info(node);
5329 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5330 set_ia32_ls_mode(res, mode);
5332 /* check for missing Result Proj */
5337 * Transform a builtin inner trampoline
5339 static ir_node *gen_inner_trampoline(ir_node *node) {
5340 ir_node *ptr = get_Builtin_param(node, 0);
5341 ir_node *callee = get_Builtin_param(node, 1);
5342 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5343 ir_node *mem = get_Builtin_mem(node);
5344 ir_node *block = get_nodes_block(node);
5345 ir_node *new_block = be_transform_node(block);
5349 ir_node *trampoline;
5351 dbg_info *dbgi = get_irn_dbg_info(node);
5352 ia32_address_t addr;
5354 /* construct store address */
5355 memset(&addr, 0, sizeof(addr));
5356 ia32_create_address_mode(&addr, ptr, 0);
5358 if (addr.base == NULL) {
5359 addr.base = noreg_GP;
5361 addr.base = be_transform_node(addr.base);
5364 if (addr.index == NULL) {
5365 addr.index = noreg_GP;
5367 addr.index = be_transform_node(addr.index);
5369 addr.mem = be_transform_node(mem);
5371 /* mov ecx, <env> */
5372 val = ia32_create_Immediate(NULL, 0, 0xB9);
5373 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5374 addr.index, addr.mem, val);
5375 set_irn_pinned(store, get_irn_pinned(node));
5376 set_ia32_op_type(store, ia32_AddrModeD);
5377 set_ia32_ls_mode(store, mode_Bu);
5378 set_address(store, &addr);
5382 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5383 addr.index, addr.mem, env);
5384 set_irn_pinned(store, get_irn_pinned(node));
5385 set_ia32_op_type(store, ia32_AddrModeD);
5386 set_ia32_ls_mode(store, mode_Iu);
5387 set_address(store, &addr);
5391 /* jmp rel <callee> */
5392 val = ia32_create_Immediate(NULL, 0, 0xE9);
5393 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5394 addr.index, addr.mem, val);
5395 set_irn_pinned(store, get_irn_pinned(node));
5396 set_ia32_op_type(store, ia32_AddrModeD);
5397 set_ia32_ls_mode(store, mode_Bu);
5398 set_address(store, &addr);
5402 trampoline = be_transform_node(ptr);
5404 /* the callee is typically an immediate */
5405 if (is_SymConst(callee)) {
5406 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5408 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5410 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5412 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5413 addr.index, addr.mem, rel);
5414 set_irn_pinned(store, get_irn_pinned(node));
5415 set_ia32_op_type(store, ia32_AddrModeD);
5416 set_ia32_ls_mode(store, mode_Iu);
5417 set_address(store, &addr);
5422 return new_r_Tuple(new_block, 2, in);
5426 * Transform Builtin node.
5428 static ir_node *gen_Builtin(ir_node *node) {
5429 ir_builtin_kind kind = get_Builtin_kind(node);
5433 return gen_trap(node);
5434 case ir_bk_debugbreak:
5435 return gen_debugbreak(node);
5436 case ir_bk_return_address:
5437 return gen_return_address(node);
5438 case ir_bk_frame_address:
5439 return gen_frame_address(node);
5440 case ir_bk_prefetch:
5441 return gen_prefetch(node);
5443 return gen_ffs(node);
5445 return gen_clz(node);
5447 return gen_ctz(node);
5449 return gen_parity(node);
5450 case ir_bk_popcount:
5451 return gen_popcount(node);
5453 return gen_bswap(node);
5455 return gen_outport(node);
5457 return gen_inport(node);
5458 case ir_bk_inner_trampoline:
5459 return gen_inner_trampoline(node);
5461 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5465 * Transform Proj(Builtin) node.
5467 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5468 ir_node *node = get_Proj_pred(proj);
5469 ir_node *new_node = be_transform_node(node);
5470 ir_builtin_kind kind = get_Builtin_kind(node);
5473 case ir_bk_return_address:
5474 case ir_bk_frame_address:
5479 case ir_bk_popcount:
5481 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5484 case ir_bk_debugbreak:
5485 case ir_bk_prefetch:
5487 assert(get_Proj_proj(proj) == pn_Builtin_M);
5490 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5491 return new_r_Proj(get_nodes_block(new_node),
5492 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5494 assert(get_Proj_proj(proj) == pn_Builtin_M);
5495 return new_r_Proj(get_nodes_block(new_node),
5496 new_node, mode_M, pn_ia32_Inport_M);
5498 case ir_bk_inner_trampoline:
5499 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5500 return get_Tuple_pred(new_node, 1);
5502 assert(get_Proj_proj(proj) == pn_Builtin_M);
5503 return get_Tuple_pred(new_node, 0);
5506 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5509 static ir_node *gen_be_IncSP(ir_node *node)
5511 ir_node *res = be_duplicate_node(node);
5512 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5518 * Transform the Projs from a be_Call.
5520 static ir_node *gen_Proj_be_Call(ir_node *node)
5522 ir_node *block = be_transform_node(get_nodes_block(node));
5523 ir_node *call = get_Proj_pred(node);
5524 ir_node *new_call = be_transform_node(call);
5525 dbg_info *dbgi = get_irn_dbg_info(node);
5526 long proj = get_Proj_proj(node);
5527 ir_mode *mode = get_irn_mode(node);
5530 if (proj == pn_be_Call_M_regular) {
5531 return new_rd_Proj(dbgi, block, new_call, mode_M, n_ia32_Call_mem);
5533 /* transform call modes */
5534 if (mode_is_data(mode)) {
5535 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5539 /* Map from be_Call to ia32_Call proj number */
5540 if (proj == pn_be_Call_sp) {
5541 proj = pn_ia32_Call_stack;
5542 } else if (proj == pn_be_Call_M_regular) {
5543 proj = pn_ia32_Call_M;
5545 arch_register_req_t const *const req = arch_get_register_req_out(node);
5546 int const n_outs = arch_irn_get_n_outs(new_call);
5549 assert(proj >= pn_be_Call_first_res);
5550 assert(req->type & arch_register_req_type_limited);
5552 for (i = 0; i < n_outs; ++i) {
5553 arch_register_req_t const *const new_req
5554 = arch_get_out_register_req(new_call, i);
5556 if (!(new_req->type & arch_register_req_type_limited) ||
5557 new_req->cls != req->cls ||
5558 *new_req->limited != *req->limited)
5567 res = new_rd_Proj(dbgi, block, new_call, mode, proj);
5569 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5571 case pn_ia32_Call_stack:
5572 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5575 case pn_ia32_Call_fpcw:
5576 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5584 * Transform the Projs from a Cmp.
5586 static ir_node *gen_Proj_Cmp(ir_node *node)
5588 /* this probably means not all mode_b nodes were lowered... */
5589 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5594 * Transform the Projs from a Bound.
5596 static ir_node *gen_Proj_Bound(ir_node *node)
5598 ir_node *new_node, *block;
5599 ir_node *pred = get_Proj_pred(node);
5601 switch (get_Proj_proj(node)) {
5603 return be_transform_node(get_Bound_mem(pred));
5604 case pn_Bound_X_regular:
5605 new_node = be_transform_node(pred);
5606 block = get_nodes_block(new_node);
5607 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_true);
5608 case pn_Bound_X_except:
5609 new_node = be_transform_node(pred);
5610 block = get_nodes_block(new_node);
5611 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_false);
5613 return be_transform_node(get_Bound_index(pred));
5615 panic("unsupported Proj from Bound");
5619 static ir_node *gen_Proj_ASM(ir_node *node)
5621 ir_mode *mode = get_irn_mode(node);
5622 ir_node *pred = get_Proj_pred(node);
5623 ir_node *new_pred = be_transform_node(pred);
5624 ir_node *block = get_nodes_block(new_pred);
5625 long pos = get_Proj_proj(node);
5627 if (mode == mode_M) {
5628 pos = arch_irn_get_n_outs(new_pred)-1;
5629 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5631 } else if (mode_is_float(mode)) {
5634 panic("unexpected proj mode at ASM");
5637 return new_r_Proj(block, new_pred, mode, pos);
5641 * Transform and potentially renumber Proj nodes.
5643 static ir_node *gen_Proj(ir_node *node)
5645 ir_node *pred = get_Proj_pred(node);
5648 switch (get_irn_opcode(pred)) {
5650 proj = get_Proj_proj(node);
5651 if (proj == pn_Store_M) {
5652 return be_transform_node(pred);
5654 panic("No idea how to transform proj->Store");
5657 return gen_Proj_Load(node);
5659 return gen_Proj_ASM(node);
5661 return gen_Proj_Builtin(node);
5665 return gen_Proj_DivMod(node);
5667 return gen_Proj_CopyB(node);
5669 return gen_Proj_Quot(node);
5671 return gen_Proj_be_SubSP(node);
5673 return gen_Proj_be_AddSP(node);
5675 return gen_Proj_be_Call(node);
5677 return gen_Proj_Cmp(node);
5679 return gen_Proj_Bound(node);
5681 proj = get_Proj_proj(node);
5683 case pn_Start_X_initial_exec: {
5684 ir_node *block = get_nodes_block(pred);
5685 ir_node *new_block = be_transform_node(block);
5686 dbg_info *dbgi = get_irn_dbg_info(node);
5687 /* we exchange the ProjX with a jump */
5688 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5693 case pn_Start_P_tls:
5694 return gen_Proj_tls(node);
5699 if (is_ia32_l_FloattoLL(pred)) {
5700 return gen_Proj_l_FloattoLL(node);
5702 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5706 ir_mode *mode = get_irn_mode(node);
5707 if (ia32_mode_needs_gp_reg(mode)) {
5708 ir_node *new_pred = be_transform_node(pred);
5709 ir_node *block = be_transform_node(get_nodes_block(node));
5710 ir_node *new_proj = new_r_Proj(block, new_pred,
5711 mode_Iu, get_Proj_proj(node));
5712 new_proj->node_nr = node->node_nr;
5717 return be_duplicate_node(node);
5721 * Enters all transform functions into the generic pointer
5723 static void register_transformers(void)
5725 /* first clear the generic function pointer for all ops */
5726 clear_irp_opcodes_generic_func();
5728 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5729 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5769 /* transform ops from intrinsic lowering */
5781 GEN(ia32_l_LLtoFloat);
5782 GEN(ia32_l_FloattoLL);
5788 /* we should never see these nodes */
5803 /* handle builtins */
5806 /* handle generic backend nodes */
5820 * Pre-transform all unknown and noreg nodes.
5822 static void ia32_pretransform_node(void)
5824 ia32_code_gen_t *cg = env_cg;
5826 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5827 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5828 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5829 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5830 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5831 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5833 nomem = get_irg_no_mem(current_ir_graph);
5834 noreg_GP = ia32_new_NoReg_gp(cg);
5840 * Walker, checks if all ia32 nodes producing more than one result have their
5841 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5843 static void add_missing_keep_walker(ir_node *node, void *data)
5846 unsigned found_projs = 0;
5847 const ir_edge_t *edge;
5848 ir_mode *mode = get_irn_mode(node);
5853 if (!is_ia32_irn(node))
5856 n_outs = arch_irn_get_n_outs(node);
5859 if (is_ia32_SwitchJmp(node))
5862 assert(n_outs < (int) sizeof(unsigned) * 8);
5863 foreach_out_edge(node, edge) {
5864 ir_node *proj = get_edge_src_irn(edge);
5867 /* The node could be kept */
5871 if (get_irn_mode(proj) == mode_M)
5874 pn = get_Proj_proj(proj);
5875 assert(pn < n_outs);
5876 found_projs |= 1 << pn;
5880 /* are keeps missing? */
5882 for (i = 0; i < n_outs; ++i) {
5885 const arch_register_req_t *req;
5886 const arch_register_class_t *cls;
5888 if (found_projs & (1 << i)) {
5892 req = arch_get_out_register_req(node, i);
5897 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5901 block = get_nodes_block(node);
5902 in[0] = new_r_Proj(block, node, arch_register_class_mode(cls), i);
5903 if (last_keep != NULL) {
5904 be_Keep_add_node(last_keep, cls, in[0]);
5906 last_keep = be_new_Keep(block, 1, in);
5907 if (sched_is_scheduled(node)) {
5908 sched_add_after(node, last_keep);
5915 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5918 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5920 ir_graph *irg = be_get_birg_irg(cg->birg);
5921 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5925 * Post-process all calls if we are in SSE mode.
5926 * The ABI requires that the results are in st0, copy them
5927 * to a xmm register.
5929 static void postprocess_fp_call_results(void) {
5932 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5933 ir_node *call = call_list[i];
5934 ir_type *mtp = call_types[i];
5937 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5938 ir_type *res_tp = get_method_res_type(mtp, j);
5939 ir_node *res, *new_res;
5940 const ir_edge_t *edge, *next;
5943 if (! is_atomic_type(res_tp)) {
5944 /* no floating point return */
5947 mode = get_type_mode(res_tp);
5948 if (! mode_is_float(mode)) {
5949 /* no floating point return */
5953 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5956 /* now patch the users */
5957 foreach_out_edge_safe(res, edge, next) {
5958 ir_node *succ = get_edge_src_irn(edge);
5961 if (be_is_Keep(succ))
5964 if (is_ia32_xStore(succ)) {
5965 /* an xStore can be patched into an vfst */
5966 dbg_info *db = get_irn_dbg_info(succ);
5967 ir_node *block = get_nodes_block(succ);
5968 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5969 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5970 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5971 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5972 ir_mode *mode = get_ia32_ls_mode(succ);
5974 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5975 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5976 if (is_ia32_use_frame(succ))
5977 set_ia32_use_frame(st);
5978 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5979 set_irn_pinned(st, get_irn_pinned(succ));
5980 set_ia32_op_type(st, ia32_AddrModeD);
5984 if (new_res == NULL) {
5985 dbg_info *db = get_irn_dbg_info(call);
5986 ir_node *block = get_nodes_block(call);
5987 ir_node *frame = get_irg_frame(current_ir_graph);
5988 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5989 ir_node *call_mem = new_r_Proj(block, call, mode_M, pn_ia32_Call_M);
5990 ir_node *vfst, *xld, *new_mem;
5992 /* store st(0) on stack */
5993 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5994 set_ia32_op_type(vfst, ia32_AddrModeD);
5995 set_ia32_use_frame(vfst);
5997 /* load into SSE register */
5998 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5999 set_ia32_op_type(xld, ia32_AddrModeS);
6000 set_ia32_use_frame(xld);
6002 new_res = new_r_Proj(block, xld, mode, pn_ia32_xLoad_res);
6003 new_mem = new_r_Proj(block, xld, mode_M, pn_ia32_xLoad_M);
6005 if (old_mem != NULL) {
6006 edges_reroute(old_mem, new_mem, current_ir_graph);
6010 set_irn_n(succ, get_edge_src_pos(edge), new_res);
6017 /* do the transformation */
6018 void ia32_transform_graph(ia32_code_gen_t *cg)
6022 register_transformers();
6024 initial_fpcw = NULL;
6027 be_timer_push(T_HEIGHTS);
6028 heights = heights_new(cg->irg);
6029 be_timer_pop(T_HEIGHTS);
6030 ia32_calculate_non_address_mode_nodes(cg->birg);
6032 /* the transform phase is not safe for CSE (yet) because several nodes get
6033 * attributes set after their creation */
6034 cse_last = get_opt_cse();
6037 call_list = NEW_ARR_F(ir_node *, 0);
6038 call_types = NEW_ARR_F(ir_type *, 0);
6039 be_transform_graph(cg->birg, ia32_pretransform_node);
6041 if (ia32_cg_config.use_sse2)
6042 postprocess_fp_call_results();
6043 DEL_ARR_F(call_types);
6044 DEL_ARR_F(call_list);
6046 set_opt_cse(cse_last);
6048 ia32_free_non_address_mode_nodes();
6049 heights_free(heights);
6053 void ia32_init_transform(void)
6055 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");