2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
293 floatent = create_float_const_entity(node);
294 /* create_float_const_ent is smart and sometimes creates
296 ls_mode = get_type_mode(get_entity_type(floatent));
298 if (env_cg->birg->main_env->options->pic) {
299 base = arch_code_generator_get_pic_base(env_cg);
304 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
306 set_ia32_op_type(load, ia32_AddrModeS);
307 set_ia32_am_sc(load, floatent);
308 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
309 res = new_r_Proj(block, load, mode_vfp, pn_ia32_vfld_res);
312 #ifdef CONSTRUCT_SSE_CONST
314 #endif /* CONSTRUCT_SSE_CONST */
315 SET_IA32_ORIG_NODE(load, node);
317 be_dep_on_frame(load);
319 } else { /* non-float mode */
321 tarval *tv = get_Const_tarval(node);
324 tv = tarval_convert_to(tv, mode_Iu);
326 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
328 panic("couldn't convert constant tarval (%+F)", node);
330 val = get_tarval_long(tv);
332 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
333 SET_IA32_ORIG_NODE(cnst, node);
335 be_dep_on_frame(cnst);
341 * Transforms a SymConst.
343 static ir_node *gen_SymConst(ir_node *node)
345 ir_node *old_block = get_nodes_block(node);
346 ir_node *block = be_transform_node(old_block);
347 dbg_info *dbgi = get_irn_dbg_info(node);
348 ir_mode *mode = get_irn_mode(node);
351 if (mode_is_float(mode)) {
352 if (ia32_cg_config.use_sse2)
353 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
355 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
356 set_ia32_am_sc(cnst, get_SymConst_entity(node));
357 set_ia32_use_frame(cnst);
361 if (get_SymConst_kind(node) != symconst_addr_ent) {
362 panic("backend only support symconst_addr_ent (at %+F)", node);
364 entity = get_SymConst_entity(node);
365 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
368 SET_IA32_ORIG_NODE(cnst, node);
370 be_dep_on_frame(cnst);
375 * Create a float type for the given mode and cache it.
377 * @param mode the mode for the float type (might be integer mode for SSE2 types)
378 * @param align alignment
380 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
386 if (mode == mode_Iu) {
387 static ir_type *int_Iu[16] = {NULL, };
389 if (int_Iu[align] == NULL) {
390 int_Iu[align] = tp = new_type_primitive(mode);
391 /* set the specified alignment */
392 set_type_alignment_bytes(tp, align);
394 return int_Iu[align];
395 } else if (mode == mode_Lu) {
396 static ir_type *int_Lu[16] = {NULL, };
398 if (int_Lu[align] == NULL) {
399 int_Lu[align] = tp = new_type_primitive(mode);
400 /* set the specified alignment */
401 set_type_alignment_bytes(tp, align);
403 return int_Lu[align];
404 } else if (mode == mode_F) {
405 static ir_type *float_F[16] = {NULL, };
407 if (float_F[align] == NULL) {
408 float_F[align] = tp = new_type_primitive(mode);
409 /* set the specified alignment */
410 set_type_alignment_bytes(tp, align);
412 return float_F[align];
413 } else if (mode == mode_D) {
414 static ir_type *float_D[16] = {NULL, };
416 if (float_D[align] == NULL) {
417 float_D[align] = tp = new_type_primitive(mode);
418 /* set the specified alignment */
419 set_type_alignment_bytes(tp, align);
421 return float_D[align];
423 static ir_type *float_E[16] = {NULL, };
425 if (float_E[align] == NULL) {
426 float_E[align] = tp = new_type_primitive(mode);
427 /* set the specified alignment */
428 set_type_alignment_bytes(tp, align);
430 return float_E[align];
435 * Create a float[2] array type for the given atomic type.
437 * @param tp the atomic type
439 static ir_type *ia32_create_float_array(ir_type *tp)
441 ir_mode *mode = get_type_mode(tp);
442 unsigned align = get_type_alignment_bytes(tp);
447 if (mode == mode_F) {
448 static ir_type *float_F[16] = {NULL, };
450 if (float_F[align] != NULL)
451 return float_F[align];
452 arr = float_F[align] = new_type_array(1, tp);
453 } else if (mode == mode_D) {
454 static ir_type *float_D[16] = {NULL, };
456 if (float_D[align] != NULL)
457 return float_D[align];
458 arr = float_D[align] = new_type_array(1, tp);
460 static ir_type *float_E[16] = {NULL, };
462 if (float_E[align] != NULL)
463 return float_E[align];
464 arr = float_E[align] = new_type_array(1, tp);
466 set_type_alignment_bytes(arr, align);
467 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
468 set_type_state(arr, layout_fixed);
472 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
473 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
475 static const struct {
476 const char *ent_name;
477 const char *cnst_str;
480 } names [ia32_known_const_max] = {
481 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
482 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
483 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
484 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
485 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
487 static ir_entity *ent_cache[ia32_known_const_max];
489 const char *ent_name, *cnst_str;
495 ent_name = names[kct].ent_name;
496 if (! ent_cache[kct]) {
497 cnst_str = names[kct].cnst_str;
499 switch (names[kct].mode) {
500 case 0: mode = mode_Iu; break;
501 case 1: mode = mode_Lu; break;
502 default: mode = mode_F; break;
504 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
505 tp = ia32_create_float_type(mode, names[kct].align);
507 if (kct == ia32_ULLBIAS)
508 tp = ia32_create_float_array(tp);
509 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
511 set_entity_ld_ident(ent, get_entity_ident(ent));
512 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
513 set_entity_visibility(ent, ir_visibility_local);
515 if (kct == ia32_ULLBIAS) {
516 ir_initializer_t *initializer = create_initializer_compound(2);
518 set_initializer_compound_value(initializer, 0,
519 create_initializer_tarval(get_tarval_null(mode)));
520 set_initializer_compound_value(initializer, 1,
521 create_initializer_tarval(tv));
523 set_entity_initializer(ent, initializer);
525 set_entity_initializer(ent, create_initializer_tarval(tv));
528 /* cache the entry */
529 ent_cache[kct] = ent;
532 return ent_cache[kct];
536 * return true if the node is a Proj(Load) and could be used in source address
537 * mode for another node. Will return only true if the @p other node is not
538 * dependent on the memory of the Load (for binary operations use the other
539 * input here, for unary operations use NULL).
541 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
542 ir_node *other, ir_node *other2, match_flags_t flags)
547 /* float constants are always available */
548 if (is_Const(node)) {
549 ir_mode *mode = get_irn_mode(node);
550 if (mode_is_float(mode)) {
551 if (ia32_cg_config.use_sse2) {
552 if (is_simple_sse_Const(node))
555 if (is_simple_x87_Const(node))
558 if (get_irn_n_edges(node) > 1)
566 load = get_Proj_pred(node);
567 pn = get_Proj_proj(node);
568 if (!is_Load(load) || pn != pn_Load_res)
570 if (get_nodes_block(load) != block)
572 /* we only use address mode if we're the only user of the load */
573 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
575 /* in some edge cases with address mode we might reach the load normally
576 * and through some AM sequence, if it is already materialized then we
577 * can't create an AM node from it */
578 if (be_is_transformed(node))
581 /* don't do AM if other node inputs depend on the load (via mem-proj) */
582 if (other != NULL && prevents_AM(block, load, other))
585 if (other2 != NULL && prevents_AM(block, load, other2))
591 typedef struct ia32_address_mode_t ia32_address_mode_t;
592 struct ia32_address_mode_t {
597 ia32_op_type_t op_type;
601 unsigned commutative : 1;
602 unsigned ins_permuted : 1;
605 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
607 /* construct load address */
608 memset(addr, 0, sizeof(addr[0]));
609 ia32_create_address_mode(addr, ptr, 0);
611 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
612 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
613 addr->mem = be_transform_node(mem);
616 static void build_address(ia32_address_mode_t *am, ir_node *node,
617 ia32_create_am_flags_t flags)
619 ia32_address_t *addr = &am->addr;
625 if (is_Const(node)) {
626 ir_entity *entity = create_float_const_entity(node);
627 addr->base = noreg_GP;
628 addr->index = noreg_GP;
630 addr->symconst_ent = entity;
632 am->ls_mode = get_type_mode(get_entity_type(entity));
633 am->pinned = op_pin_state_floats;
637 load = get_Proj_pred(node);
638 ptr = get_Load_ptr(load);
639 mem = get_Load_mem(load);
640 new_mem = be_transform_node(mem);
641 am->pinned = get_irn_pinned(load);
642 am->ls_mode = get_Load_mode(load);
643 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
646 /* construct load address */
647 ia32_create_address_mode(addr, ptr, flags);
649 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
650 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
654 static void set_address(ir_node *node, const ia32_address_t *addr)
656 set_ia32_am_scale(node, addr->scale);
657 set_ia32_am_sc(node, addr->symconst_ent);
658 set_ia32_am_offs_int(node, addr->offset);
659 if (addr->symconst_sign)
660 set_ia32_am_sc_sign(node);
662 set_ia32_use_frame(node);
663 set_ia32_frame_ent(node, addr->frame_entity);
667 * Apply attributes of a given address mode to a node.
669 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
671 set_address(node, &am->addr);
673 set_ia32_op_type(node, am->op_type);
674 set_ia32_ls_mode(node, am->ls_mode);
675 if (am->pinned == op_pin_state_pinned) {
676 /* beware: some nodes are already pinned and did not allow to change the state */
677 if (get_irn_pinned(node) != op_pin_state_pinned)
678 set_irn_pinned(node, op_pin_state_pinned);
681 set_ia32_commutative(node);
685 * Check, if a given node is a Down-Conv, ie. a integer Conv
686 * from a mode with a mode with more bits to a mode with lesser bits.
687 * Moreover, we return only true if the node has not more than 1 user.
689 * @param node the node
690 * @return non-zero if node is a Down-Conv
692 static int is_downconv(const ir_node *node)
700 /* we only want to skip the conv when we're the only user
701 * (not optimal but for now...)
703 if (get_irn_n_edges(node) > 1)
706 src_mode = get_irn_mode(get_Conv_op(node));
707 dest_mode = get_irn_mode(node);
709 ia32_mode_needs_gp_reg(src_mode) &&
710 ia32_mode_needs_gp_reg(dest_mode) &&
711 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
714 /* Skip all Down-Conv's on a given node and return the resulting node. */
715 ir_node *ia32_skip_downconv(ir_node *node)
717 while (is_downconv(node))
718 node = get_Conv_op(node);
723 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
725 ir_mode *mode = get_irn_mode(node);
730 if (mode_is_signed(mode)) {
735 block = get_nodes_block(node);
736 dbgi = get_irn_dbg_info(node);
738 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
742 * matches operands of a node into ia32 addressing/operand modes. This covers
743 * usage of source address mode, immediates, operations with non 32-bit modes,
745 * The resulting data is filled into the @p am struct. block is the block
746 * of the node whose arguments are matched. op1, op2 are the first and second
747 * input that are matched (op1 may be NULL). other_op is another unrelated
748 * input that is not matched! but which is needed sometimes to check if AM
749 * for op1/op2 is legal.
750 * @p flags describes the supported modes of the operation in detail.
752 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
753 ir_node *op1, ir_node *op2, ir_node *other_op,
756 ia32_address_t *addr = &am->addr;
757 ir_mode *mode = get_irn_mode(op2);
758 int mode_bits = get_mode_size_bits(mode);
759 ir_node *new_op1, *new_op2;
761 unsigned commutative;
762 int use_am_and_immediates;
765 memset(am, 0, sizeof(am[0]));
767 commutative = (flags & match_commutative) != 0;
768 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
769 use_am = (flags & match_am) != 0;
770 use_immediate = (flags & match_immediate) != 0;
771 assert(!use_am_and_immediates || use_immediate);
774 assert(!commutative || op1 != NULL);
775 assert(use_am || !(flags & match_8bit_am));
776 assert(use_am || !(flags & match_16bit_am));
778 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
779 (mode_bits == 16 && !(flags & match_16bit_am))) {
783 /* we can simply skip downconvs for mode neutral nodes: the upper bits
784 * can be random for these operations */
785 if (flags & match_mode_neutral) {
786 op2 = ia32_skip_downconv(op2);
788 op1 = ia32_skip_downconv(op1);
792 /* match immediates. firm nodes are normalized: constants are always on the
795 if (!(flags & match_try_am) && use_immediate) {
796 new_op2 = try_create_Immediate(op2, 0);
799 if (new_op2 == NULL &&
800 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
801 build_address(am, op2, 0);
802 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
803 if (mode_is_float(mode)) {
804 new_op2 = ia32_new_NoReg_vfp(env_cg);
808 am->op_type = ia32_AddrModeS;
809 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
811 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
813 build_address(am, op1, 0);
815 if (mode_is_float(mode)) {
816 noreg = ia32_new_NoReg_vfp(env_cg);
821 if (new_op2 != NULL) {
824 new_op1 = be_transform_node(op2);
826 am->ins_permuted = 1;
828 am->op_type = ia32_AddrModeS;
831 am->op_type = ia32_Normal;
833 if (flags & match_try_am) {
839 mode = get_irn_mode(op2);
840 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
841 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
843 new_op2 = create_upconv(op2, NULL);
844 am->ls_mode = mode_Iu;
846 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
848 new_op2 = be_transform_node(op2);
849 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
852 if (addr->base == NULL)
853 addr->base = noreg_GP;
854 if (addr->index == NULL)
855 addr->index = noreg_GP;
856 if (addr->mem == NULL)
859 am->new_op1 = new_op1;
860 am->new_op2 = new_op2;
861 am->commutative = commutative;
865 * "Fixes" a node that uses address mode by turning it into mode_T
866 * and returning a pn_ia32_res Proj.
868 * @param node the node
869 * @param am its address mode
871 * @return a Proj(pn_ia32_res) if a memory address mode is used,
874 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
879 if (am->mem_proj == NULL)
882 /* we have to create a mode_T so the old MemProj can attach to us */
883 mode = get_irn_mode(node);
884 load = get_Proj_pred(am->mem_proj);
886 be_set_transformed_node(load, node);
888 if (mode != mode_T) {
889 set_irn_mode(node, mode_T);
890 return new_rd_Proj(NULL, get_nodes_block(node), node, mode, pn_ia32_res);
897 * Construct a standard binary operation, set AM and immediate if required.
899 * @param node The original node for which the binop is created
900 * @param op1 The first operand
901 * @param op2 The second operand
902 * @param func The node constructor function
903 * @return The constructed ia32 node.
905 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
906 construct_binop_func *func, match_flags_t flags)
909 ir_node *block, *new_block, *new_node;
910 ia32_address_mode_t am;
911 ia32_address_t *addr = &am.addr;
913 block = get_nodes_block(node);
914 match_arguments(&am, block, op1, op2, NULL, flags);
916 dbgi = get_irn_dbg_info(node);
917 new_block = be_transform_node(block);
918 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
919 am.new_op1, am.new_op2);
920 set_am_attributes(new_node, &am);
921 /* we can't use source address mode anymore when using immediates */
922 if (!(flags & match_am_and_immediates) &&
923 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
924 set_ia32_am_support(new_node, ia32_am_none);
925 SET_IA32_ORIG_NODE(new_node, node);
927 new_node = fix_mem_proj(new_node, &am);
933 * Generic names for the inputs of an ia32 binary op.
936 n_ia32_l_binop_left, /**< ia32 left input */
937 n_ia32_l_binop_right, /**< ia32 right input */
938 n_ia32_l_binop_eflags /**< ia32 eflags input */
940 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
941 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
942 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
943 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
944 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
945 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
948 * Construct a binary operation which also consumes the eflags.
950 * @param node The node to transform
951 * @param func The node constructor function
952 * @param flags The match flags
953 * @return The constructor ia32 node
955 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
958 ir_node *src_block = get_nodes_block(node);
959 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
960 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
961 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
963 ir_node *block, *new_node, *new_eflags;
964 ia32_address_mode_t am;
965 ia32_address_t *addr = &am.addr;
967 match_arguments(&am, src_block, op1, op2, eflags, flags);
969 dbgi = get_irn_dbg_info(node);
970 block = be_transform_node(src_block);
971 new_eflags = be_transform_node(eflags);
972 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
973 am.new_op1, am.new_op2, new_eflags);
974 set_am_attributes(new_node, &am);
975 /* we can't use source address mode anymore when using immediates */
976 if (!(flags & match_am_and_immediates) &&
977 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
978 set_ia32_am_support(new_node, ia32_am_none);
979 SET_IA32_ORIG_NODE(new_node, node);
981 new_node = fix_mem_proj(new_node, &am);
986 static ir_node *get_fpcw(void)
989 if (initial_fpcw != NULL)
992 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
993 &ia32_fp_cw_regs[REG_FPCW]);
994 initial_fpcw = be_transform_node(fpcw);
1000 * Construct a standard binary operation, set AM and immediate if required.
1002 * @param op1 The first operand
1003 * @param op2 The second operand
1004 * @param func The node constructor function
1005 * @return The constructed ia32 node.
1007 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1008 construct_binop_float_func *func)
1010 ir_mode *mode = get_irn_mode(node);
1012 ir_node *block, *new_block, *new_node;
1013 ia32_address_mode_t am;
1014 ia32_address_t *addr = &am.addr;
1015 ia32_x87_attr_t *attr;
1016 /* All operations are considered commutative, because there are reverse
1018 match_flags_t flags = match_commutative;
1020 /* happens for div nodes... */
1022 mode = get_divop_resmod(node);
1024 /* cannot use address mode with long double on x87 */
1025 if (get_mode_size_bits(mode) <= 64)
1028 block = get_nodes_block(node);
1029 match_arguments(&am, block, op1, op2, NULL, flags);
1031 dbgi = get_irn_dbg_info(node);
1032 new_block = be_transform_node(block);
1033 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1034 am.new_op1, am.new_op2, get_fpcw());
1035 set_am_attributes(new_node, &am);
1037 attr = get_ia32_x87_attr(new_node);
1038 attr->attr.data.ins_permuted = am.ins_permuted;
1040 SET_IA32_ORIG_NODE(new_node, node);
1042 new_node = fix_mem_proj(new_node, &am);
1048 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1050 * @param op1 The first operand
1051 * @param op2 The second operand
1052 * @param func The node constructor function
1053 * @return The constructed ia32 node.
1055 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1056 construct_shift_func *func,
1057 match_flags_t flags)
1060 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1062 assert(! mode_is_float(get_irn_mode(node)));
1063 assert(flags & match_immediate);
1064 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1066 if (flags & match_mode_neutral) {
1067 op1 = ia32_skip_downconv(op1);
1068 new_op1 = be_transform_node(op1);
1069 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1070 new_op1 = create_upconv(op1, node);
1072 new_op1 = be_transform_node(op1);
1075 /* the shift amount can be any mode that is bigger than 5 bits, since all
1076 * other bits are ignored anyway */
1077 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1078 ir_node *const op = get_Conv_op(op2);
1079 if (mode_is_float(get_irn_mode(op)))
1082 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1084 new_op2 = create_immediate_or_transform(op2, 0);
1086 dbgi = get_irn_dbg_info(node);
1087 block = get_nodes_block(node);
1088 new_block = be_transform_node(block);
1089 new_node = func(dbgi, new_block, new_op1, new_op2);
1090 SET_IA32_ORIG_NODE(new_node, node);
1092 /* lowered shift instruction may have a dependency operand, handle it here */
1093 if (get_irn_arity(node) == 3) {
1094 /* we have a dependency */
1095 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1096 add_irn_dep(new_node, new_dep);
1104 * Construct a standard unary operation, set AM and immediate if required.
1106 * @param op The operand
1107 * @param func The node constructor function
1108 * @return The constructed ia32 node.
1110 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1111 match_flags_t flags)
1114 ir_node *block, *new_block, *new_op, *new_node;
1116 assert(flags == 0 || flags == match_mode_neutral);
1117 if (flags & match_mode_neutral) {
1118 op = ia32_skip_downconv(op);
1121 new_op = be_transform_node(op);
1122 dbgi = get_irn_dbg_info(node);
1123 block = get_nodes_block(node);
1124 new_block = be_transform_node(block);
1125 new_node = func(dbgi, new_block, new_op);
1127 SET_IA32_ORIG_NODE(new_node, node);
1132 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1133 ia32_address_t *addr)
1135 ir_node *base, *index, *res;
1141 base = be_transform_node(base);
1144 index = addr->index;
1145 if (index == NULL) {
1148 index = be_transform_node(index);
1151 res = new_bd_ia32_Lea(dbgi, block, base, index);
1152 set_address(res, addr);
1158 * Returns non-zero if a given address mode has a symbolic or
1159 * numerical offset != 0.
1161 static int am_has_immediates(const ia32_address_t *addr)
1163 return addr->offset != 0 || addr->symconst_ent != NULL
1164 || addr->frame_entity || addr->use_frame;
1168 * Creates an ia32 Add.
1170 * @return the created ia32 Add node
1172 static ir_node *gen_Add(ir_node *node)
1174 ir_mode *mode = get_irn_mode(node);
1175 ir_node *op1 = get_Add_left(node);
1176 ir_node *op2 = get_Add_right(node);
1178 ir_node *block, *new_block, *new_node, *add_immediate_op;
1179 ia32_address_t addr;
1180 ia32_address_mode_t am;
1182 if (mode_is_float(mode)) {
1183 if (ia32_cg_config.use_sse2)
1184 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1185 match_commutative | match_am);
1187 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1190 ia32_mark_non_am(node);
1192 op2 = ia32_skip_downconv(op2);
1193 op1 = ia32_skip_downconv(op1);
1197 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1198 * 1. Add with immediate -> Lea
1199 * 2. Add with possible source address mode -> Add
1200 * 3. Otherwise -> Lea
1202 memset(&addr, 0, sizeof(addr));
1203 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1204 add_immediate_op = NULL;
1206 dbgi = get_irn_dbg_info(node);
1207 block = get_nodes_block(node);
1208 new_block = be_transform_node(block);
1211 if (addr.base == NULL && addr.index == NULL) {
1212 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1213 addr.symconst_sign, 0, addr.offset);
1214 be_dep_on_frame(new_node);
1215 SET_IA32_ORIG_NODE(new_node, node);
1218 /* add with immediate? */
1219 if (addr.index == NULL) {
1220 add_immediate_op = addr.base;
1221 } else if (addr.base == NULL && addr.scale == 0) {
1222 add_immediate_op = addr.index;
1225 if (add_immediate_op != NULL) {
1226 if (!am_has_immediates(&addr)) {
1227 #ifdef DEBUG_libfirm
1228 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1231 return be_transform_node(add_immediate_op);
1234 new_node = create_lea_from_address(dbgi, new_block, &addr);
1235 SET_IA32_ORIG_NODE(new_node, node);
1239 /* test if we can use source address mode */
1240 match_arguments(&am, block, op1, op2, NULL, match_commutative
1241 | match_mode_neutral | match_am | match_immediate | match_try_am);
1243 /* construct an Add with source address mode */
1244 if (am.op_type == ia32_AddrModeS) {
1245 ia32_address_t *am_addr = &am.addr;
1246 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1247 am_addr->index, am_addr->mem, am.new_op1,
1249 set_am_attributes(new_node, &am);
1250 SET_IA32_ORIG_NODE(new_node, node);
1252 new_node = fix_mem_proj(new_node, &am);
1257 /* otherwise construct a lea */
1258 new_node = create_lea_from_address(dbgi, new_block, &addr);
1259 SET_IA32_ORIG_NODE(new_node, node);
1264 * Creates an ia32 Mul.
1266 * @return the created ia32 Mul node
1268 static ir_node *gen_Mul(ir_node *node)
1270 ir_node *op1 = get_Mul_left(node);
1271 ir_node *op2 = get_Mul_right(node);
1272 ir_mode *mode = get_irn_mode(node);
1274 if (mode_is_float(mode)) {
1275 if (ia32_cg_config.use_sse2)
1276 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1277 match_commutative | match_am);
1279 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1281 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1282 match_commutative | match_am | match_mode_neutral |
1283 match_immediate | match_am_and_immediates);
1287 * Creates an ia32 Mulh.
1288 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1289 * this result while Mul returns the lower 32 bit.
1291 * @return the created ia32 Mulh node
1293 static ir_node *gen_Mulh(ir_node *node)
1295 ir_node *block = get_nodes_block(node);
1296 ir_node *new_block = be_transform_node(block);
1297 dbg_info *dbgi = get_irn_dbg_info(node);
1298 ir_node *op1 = get_Mulh_left(node);
1299 ir_node *op2 = get_Mulh_right(node);
1300 ir_mode *mode = get_irn_mode(node);
1302 ir_node *proj_res_high;
1304 if (get_mode_size_bits(mode) != 32) {
1305 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1308 if (mode_is_signed(mode)) {
1309 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1310 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1312 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1313 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_Mul_res_high);
1315 return proj_res_high;
1319 * Creates an ia32 And.
1321 * @return The created ia32 And node
1323 static ir_node *gen_And(ir_node *node)
1325 ir_node *op1 = get_And_left(node);
1326 ir_node *op2 = get_And_right(node);
1327 assert(! mode_is_float(get_irn_mode(node)));
1329 /* is it a zero extension? */
1330 if (is_Const(op2)) {
1331 tarval *tv = get_Const_tarval(op2);
1332 long v = get_tarval_long(tv);
1334 if (v == 0xFF || v == 0xFFFF) {
1335 dbg_info *dbgi = get_irn_dbg_info(node);
1336 ir_node *block = get_nodes_block(node);
1343 assert(v == 0xFFFF);
1346 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1351 return gen_binop(node, op1, op2, new_bd_ia32_And,
1352 match_commutative | match_mode_neutral | match_am | match_immediate);
1358 * Creates an ia32 Or.
1360 * @return The created ia32 Or node
1362 static ir_node *gen_Or(ir_node *node)
1364 ir_node *op1 = get_Or_left(node);
1365 ir_node *op2 = get_Or_right(node);
1367 assert (! mode_is_float(get_irn_mode(node)));
1368 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1369 | match_mode_neutral | match_am | match_immediate);
1375 * Creates an ia32 Eor.
1377 * @return The created ia32 Eor node
1379 static ir_node *gen_Eor(ir_node *node)
1381 ir_node *op1 = get_Eor_left(node);
1382 ir_node *op2 = get_Eor_right(node);
1384 assert(! mode_is_float(get_irn_mode(node)));
1385 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1386 | match_mode_neutral | match_am | match_immediate);
1391 * Creates an ia32 Sub.
1393 * @return The created ia32 Sub node
1395 static ir_node *gen_Sub(ir_node *node)
1397 ir_node *op1 = get_Sub_left(node);
1398 ir_node *op2 = get_Sub_right(node);
1399 ir_mode *mode = get_irn_mode(node);
1401 if (mode_is_float(mode)) {
1402 if (ia32_cg_config.use_sse2)
1403 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1405 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1408 if (is_Const(op2)) {
1409 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1413 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1414 | match_am | match_immediate);
1417 static ir_node *transform_AM_mem(ir_node *const block,
1418 ir_node *const src_val,
1419 ir_node *const src_mem,
1420 ir_node *const am_mem)
1422 if (is_NoMem(am_mem)) {
1423 return be_transform_node(src_mem);
1424 } else if (is_Proj(src_val) &&
1426 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1427 /* avoid memory loop */
1429 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1430 ir_node *const ptr_pred = get_Proj_pred(src_val);
1431 int const arity = get_Sync_n_preds(src_mem);
1436 NEW_ARR_A(ir_node*, ins, arity + 1);
1438 /* NOTE: This sometimes produces dead-code because the old sync in
1439 * src_mem might not be used anymore, we should detect this case
1440 * and kill the sync... */
1441 for (i = arity - 1; i >= 0; --i) {
1442 ir_node *const pred = get_Sync_pred(src_mem, i);
1444 /* avoid memory loop */
1445 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1448 ins[n++] = be_transform_node(pred);
1453 return new_r_Sync(block, n, ins);
1457 ins[0] = be_transform_node(src_mem);
1459 return new_r_Sync(block, 2, ins);
1464 * Create a 32bit to 64bit signed extension.
1466 * @param dbgi debug info
1467 * @param block the block where node nodes should be placed
1468 * @param val the value to extend
1469 * @param orig the original node
1471 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1472 ir_node *val, const ir_node *orig)
1477 if (ia32_cg_config.use_short_sex_eax) {
1478 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1479 be_dep_on_frame(pval);
1480 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1482 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1483 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1485 SET_IA32_ORIG_NODE(res, orig);
1490 * Generates an ia32 DivMod with additional infrastructure for the
1491 * register allocator if needed.
1493 static ir_node *create_Div(ir_node *node)
1495 dbg_info *dbgi = get_irn_dbg_info(node);
1496 ir_node *block = get_nodes_block(node);
1497 ir_node *new_block = be_transform_node(block);
1504 ir_node *sign_extension;
1505 ia32_address_mode_t am;
1506 ia32_address_t *addr = &am.addr;
1508 /* the upper bits have random contents for smaller modes */
1509 switch (get_irn_opcode(node)) {
1511 op1 = get_Div_left(node);
1512 op2 = get_Div_right(node);
1513 mem = get_Div_mem(node);
1514 mode = get_Div_resmode(node);
1517 op1 = get_Mod_left(node);
1518 op2 = get_Mod_right(node);
1519 mem = get_Mod_mem(node);
1520 mode = get_Mod_resmode(node);
1523 op1 = get_DivMod_left(node);
1524 op2 = get_DivMod_right(node);
1525 mem = get_DivMod_mem(node);
1526 mode = get_DivMod_resmode(node);
1529 panic("invalid divmod node %+F", node);
1532 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1534 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1535 is the memory of the consumed address. We can have only the second op as address
1536 in Div nodes, so check only op2. */
1537 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1539 if (mode_is_signed(mode)) {
1540 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1541 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1542 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1544 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1545 be_dep_on_frame(sign_extension);
1547 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1548 addr->index, new_mem, am.new_op2,
1549 am.new_op1, sign_extension);
1552 set_irn_pinned(new_node, get_irn_pinned(node));
1554 set_am_attributes(new_node, &am);
1555 SET_IA32_ORIG_NODE(new_node, node);
1557 new_node = fix_mem_proj(new_node, &am);
1563 * Generates an ia32 Mod.
1565 static ir_node *gen_Mod(ir_node *node)
1567 return create_Div(node);
1571 * Generates an ia32 Div.
1573 static ir_node *gen_Div(ir_node *node)
1575 return create_Div(node);
1579 * Generates an ia32 DivMod.
1581 static ir_node *gen_DivMod(ir_node *node)
1583 return create_Div(node);
1589 * Creates an ia32 floating Div.
1591 * @return The created ia32 xDiv node
1593 static ir_node *gen_Quot(ir_node *node)
1595 ir_node *op1 = get_Quot_left(node);
1596 ir_node *op2 = get_Quot_right(node);
1598 if (ia32_cg_config.use_sse2) {
1599 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1601 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1607 * Creates an ia32 Shl.
1609 * @return The created ia32 Shl node
1611 static ir_node *gen_Shl(ir_node *node)
1613 ir_node *left = get_Shl_left(node);
1614 ir_node *right = get_Shl_right(node);
1616 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1617 match_mode_neutral | match_immediate);
1621 * Creates an ia32 Shr.
1623 * @return The created ia32 Shr node
1625 static ir_node *gen_Shr(ir_node *node)
1627 ir_node *left = get_Shr_left(node);
1628 ir_node *right = get_Shr_right(node);
1630 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1636 * Creates an ia32 Sar.
1638 * @return The created ia32 Shrs node
1640 static ir_node *gen_Shrs(ir_node *node)
1642 ir_node *left = get_Shrs_left(node);
1643 ir_node *right = get_Shrs_right(node);
1645 if (is_Const(right)) {
1646 tarval *tv = get_Const_tarval(right);
1647 long val = get_tarval_long(tv);
1649 /* this is a sign extension */
1650 dbg_info *dbgi = get_irn_dbg_info(node);
1651 ir_node *block = be_transform_node(get_nodes_block(node));
1652 ir_node *new_op = be_transform_node(left);
1654 return create_sex_32_64(dbgi, block, new_op, node);
1658 /* 8 or 16 bit sign extension? */
1659 if (is_Const(right) && is_Shl(left)) {
1660 ir_node *shl_left = get_Shl_left(left);
1661 ir_node *shl_right = get_Shl_right(left);
1662 if (is_Const(shl_right)) {
1663 tarval *tv1 = get_Const_tarval(right);
1664 tarval *tv2 = get_Const_tarval(shl_right);
1665 if (tv1 == tv2 && tarval_is_long(tv1)) {
1666 long val = get_tarval_long(tv1);
1667 if (val == 16 || val == 24) {
1668 dbg_info *dbgi = get_irn_dbg_info(node);
1669 ir_node *block = get_nodes_block(node);
1679 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1688 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1694 * Creates an ia32 Rol.
1696 * @param op1 The first operator
1697 * @param op2 The second operator
1698 * @return The created ia32 RotL node
1700 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1702 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1708 * Creates an ia32 Ror.
1709 * NOTE: There is no RotR with immediate because this would always be a RotL
1710 * "imm-mode_size_bits" which can be pre-calculated.
1712 * @param op1 The first operator
1713 * @param op2 The second operator
1714 * @return The created ia32 RotR node
1716 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1718 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1724 * Creates an ia32 RotR or RotL (depending on the found pattern).
1726 * @return The created ia32 RotL or RotR node
1728 static ir_node *gen_Rotl(ir_node *node)
1730 ir_node *rotate = NULL;
1731 ir_node *op1 = get_Rotl_left(node);
1732 ir_node *op2 = get_Rotl_right(node);
1734 /* Firm has only RotL, so we are looking for a right (op2)
1735 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1736 that means we can create a RotR instead of an Add and a RotL */
1740 ir_node *left = get_Add_left(add);
1741 ir_node *right = get_Add_right(add);
1742 if (is_Const(right)) {
1743 tarval *tv = get_Const_tarval(right);
1744 ir_mode *mode = get_irn_mode(node);
1745 long bits = get_mode_size_bits(mode);
1747 if (is_Minus(left) &&
1748 tarval_is_long(tv) &&
1749 get_tarval_long(tv) == bits &&
1752 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1753 rotate = gen_Ror(node, op1, get_Minus_op(left));
1758 if (rotate == NULL) {
1759 rotate = gen_Rol(node, op1, op2);
1768 * Transforms a Minus node.
1770 * @return The created ia32 Minus node
1772 static ir_node *gen_Minus(ir_node *node)
1774 ir_node *op = get_Minus_op(node);
1775 ir_node *block = be_transform_node(get_nodes_block(node));
1776 dbg_info *dbgi = get_irn_dbg_info(node);
1777 ir_mode *mode = get_irn_mode(node);
1782 if (mode_is_float(mode)) {
1783 ir_node *new_op = be_transform_node(op);
1784 if (ia32_cg_config.use_sse2) {
1785 /* TODO: non-optimal... if we have many xXors, then we should
1786 * rather create a load for the const and use that instead of
1787 * several AM nodes... */
1788 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1790 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1791 nomem, new_op, noreg_xmm);
1793 size = get_mode_size_bits(mode);
1794 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1796 set_ia32_am_sc(new_node, ent);
1797 set_ia32_op_type(new_node, ia32_AddrModeS);
1798 set_ia32_ls_mode(new_node, mode);
1800 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1803 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1806 SET_IA32_ORIG_NODE(new_node, node);
1812 * Transforms a Not node.
1814 * @return The created ia32 Not node
1816 static ir_node *gen_Not(ir_node *node)
1818 ir_node *op = get_Not_op(node);
1820 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1821 assert (! mode_is_float(get_irn_mode(node)));
1823 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1829 * Transforms an Abs node.
1831 * @return The created ia32 Abs node
1833 static ir_node *gen_Abs(ir_node *node)
1835 ir_node *block = get_nodes_block(node);
1836 ir_node *new_block = be_transform_node(block);
1837 ir_node *op = get_Abs_op(node);
1838 dbg_info *dbgi = get_irn_dbg_info(node);
1839 ir_mode *mode = get_irn_mode(node);
1845 if (mode_is_float(mode)) {
1846 new_op = be_transform_node(op);
1848 if (ia32_cg_config.use_sse2) {
1849 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1850 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1851 nomem, new_op, noreg_fp);
1853 size = get_mode_size_bits(mode);
1854 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1856 set_ia32_am_sc(new_node, ent);
1858 SET_IA32_ORIG_NODE(new_node, node);
1860 set_ia32_op_type(new_node, ia32_AddrModeS);
1861 set_ia32_ls_mode(new_node, mode);
1863 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1864 SET_IA32_ORIG_NODE(new_node, node);
1867 ir_node *xor, *sign_extension;
1869 if (get_mode_size_bits(mode) == 32) {
1870 new_op = be_transform_node(op);
1872 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1875 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1877 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1878 nomem, new_op, sign_extension);
1879 SET_IA32_ORIG_NODE(xor, node);
1881 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1882 nomem, xor, sign_extension);
1883 SET_IA32_ORIG_NODE(new_node, node);
1890 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1892 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1894 dbg_info *dbgi = get_irn_dbg_info(cmp);
1895 ir_node *block = get_nodes_block(cmp);
1896 ir_node *new_block = be_transform_node(block);
1897 ir_node *op1 = be_transform_node(x);
1898 ir_node *op2 = be_transform_node(n);
1900 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1904 * Transform a node returning a "flag" result.
1906 * @param node the node to transform
1907 * @param pnc_out the compare mode to use
1909 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1916 /* we have a Cmp as input */
1917 if (is_Proj(node)) {
1918 ir_node *pred = get_Proj_pred(node);
1920 pn_Cmp pnc = get_Proj_proj(node);
1921 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1922 ir_node *l = get_Cmp_left(pred);
1923 ir_node *r = get_Cmp_right(pred);
1925 ir_node *la = get_And_left(l);
1926 ir_node *ra = get_And_right(l);
1928 ir_node *c = get_Shl_left(la);
1929 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1930 /* (1 << n) & ra) */
1931 ir_node *n = get_Shl_right(la);
1932 flags = gen_bt(pred, ra, n);
1933 /* we must generate a Jc/Jnc jump */
1934 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1937 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1942 ir_node *c = get_Shl_left(ra);
1943 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1944 /* la & (1 << n)) */
1945 ir_node *n = get_Shl_right(ra);
1946 flags = gen_bt(pred, la, n);
1947 /* we must generate a Jc/Jnc jump */
1948 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1951 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1957 flags = be_transform_node(pred);
1958 if (mode_is_float(get_irn_mode(get_Cmp_left(pred))))
1959 pnc |= ia32_pn_Cmp_float;
1965 /* a mode_b value, we have to compare it against 0 */
1966 dbgi = get_irn_dbg_info(node);
1967 new_block = be_transform_node(get_nodes_block(node));
1968 new_op = be_transform_node(node);
1969 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1970 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1971 *pnc_out = pn_Cmp_Lg;
1976 * Transforms a Load.
1978 * @return the created ia32 Load node
1980 static ir_node *gen_Load(ir_node *node)
1982 ir_node *old_block = get_nodes_block(node);
1983 ir_node *block = be_transform_node(old_block);
1984 ir_node *ptr = get_Load_ptr(node);
1985 ir_node *mem = get_Load_mem(node);
1986 ir_node *new_mem = be_transform_node(mem);
1989 dbg_info *dbgi = get_irn_dbg_info(node);
1990 ir_mode *mode = get_Load_mode(node);
1993 ia32_address_t addr;
1995 /* construct load address */
1996 memset(&addr, 0, sizeof(addr));
1997 ia32_create_address_mode(&addr, ptr, 0);
2004 base = be_transform_node(base);
2007 if (index == NULL) {
2010 index = be_transform_node(index);
2013 if (mode_is_float(mode)) {
2014 if (ia32_cg_config.use_sse2) {
2015 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2017 res_mode = mode_xmm;
2019 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2021 res_mode = mode_vfp;
2024 assert(mode != mode_b);
2026 /* create a conv node with address mode for smaller modes */
2027 if (get_mode_size_bits(mode) < 32) {
2028 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2029 new_mem, noreg_GP, mode);
2031 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2036 set_irn_pinned(new_node, get_irn_pinned(node));
2037 set_ia32_op_type(new_node, ia32_AddrModeS);
2038 set_ia32_ls_mode(new_node, mode);
2039 set_address(new_node, &addr);
2041 if (get_irn_pinned(node) == op_pin_state_floats) {
2042 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2043 && pn_ia32_vfld_res == pn_ia32_Load_res
2044 && pn_ia32_Load_res == pn_ia32_res);
2045 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2048 SET_IA32_ORIG_NODE(new_node, node);
2050 be_dep_on_frame(new_node);
2054 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2055 ir_node *ptr, ir_node *other)
2062 /* we only use address mode if we're the only user of the load */
2063 if (get_irn_n_edges(node) > 1)
2066 load = get_Proj_pred(node);
2069 if (get_nodes_block(load) != block)
2072 /* store should have the same pointer as the load */
2073 if (get_Load_ptr(load) != ptr)
2076 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2077 if (other != NULL &&
2078 get_nodes_block(other) == block &&
2079 heights_reachable_in_block(heights, other, load)) {
2083 if (prevents_AM(block, load, mem))
2085 /* Store should be attached to the load via mem */
2086 assert(heights_reachable_in_block(heights, mem, load));
2091 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2092 ir_node *mem, ir_node *ptr, ir_mode *mode,
2093 construct_binop_dest_func *func,
2094 construct_binop_dest_func *func8bit,
2095 match_flags_t flags)
2097 ir_node *src_block = get_nodes_block(node);
2105 ia32_address_mode_t am;
2106 ia32_address_t *addr = &am.addr;
2107 memset(&am, 0, sizeof(am));
2109 assert(flags & match_immediate); /* there is no destam node without... */
2110 commutative = (flags & match_commutative) != 0;
2112 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2113 build_address(&am, op1, ia32_create_am_double_use);
2114 new_op = create_immediate_or_transform(op2, 0);
2115 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2116 build_address(&am, op2, ia32_create_am_double_use);
2117 new_op = create_immediate_or_transform(op1, 0);
2122 if (addr->base == NULL)
2123 addr->base = noreg_GP;
2124 if (addr->index == NULL)
2125 addr->index = noreg_GP;
2126 if (addr->mem == NULL)
2129 dbgi = get_irn_dbg_info(node);
2130 block = be_transform_node(src_block);
2131 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2133 if (get_mode_size_bits(mode) == 8) {
2134 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2136 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2138 set_address(new_node, addr);
2139 set_ia32_op_type(new_node, ia32_AddrModeD);
2140 set_ia32_ls_mode(new_node, mode);
2141 SET_IA32_ORIG_NODE(new_node, node);
2143 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2144 mem_proj = be_transform_node(am.mem_proj);
2145 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2150 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2151 ir_node *ptr, ir_mode *mode,
2152 construct_unop_dest_func *func)
2154 ir_node *src_block = get_nodes_block(node);
2160 ia32_address_mode_t am;
2161 ia32_address_t *addr = &am.addr;
2163 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2166 memset(&am, 0, sizeof(am));
2167 build_address(&am, op, ia32_create_am_double_use);
2169 dbgi = get_irn_dbg_info(node);
2170 block = be_transform_node(src_block);
2171 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2172 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2173 set_address(new_node, addr);
2174 set_ia32_op_type(new_node, ia32_AddrModeD);
2175 set_ia32_ls_mode(new_node, mode);
2176 SET_IA32_ORIG_NODE(new_node, node);
2178 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2179 mem_proj = be_transform_node(am.mem_proj);
2180 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2185 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2187 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2188 return get_negated_pnc(pnc, mode);
2191 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2193 ir_mode *mode = get_irn_mode(node);
2194 ir_node *mux_true = get_Mux_true(node);
2195 ir_node *mux_false = get_Mux_false(node);
2205 ia32_address_t addr;
2207 if (get_mode_size_bits(mode) != 8)
2210 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2212 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2218 cond = get_Mux_sel(node);
2219 flags = get_flags_node(cond, &pnc);
2220 /* we can't handle the float special cases with SetM */
2221 if (pnc & ia32_pn_Cmp_float)
2224 pnc = ia32_get_negated_pnc(pnc);
2226 build_address_ptr(&addr, ptr, mem);
2228 dbgi = get_irn_dbg_info(node);
2229 block = get_nodes_block(node);
2230 new_block = be_transform_node(block);
2231 new_mem = be_transform_node(mem);
2232 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2233 addr.index, addr.mem, flags, pnc);
2234 set_address(new_node, &addr);
2235 set_ia32_op_type(new_node, ia32_AddrModeD);
2236 set_ia32_ls_mode(new_node, mode);
2237 SET_IA32_ORIG_NODE(new_node, node);
2242 static ir_node *try_create_dest_am(ir_node *node)
2244 ir_node *val = get_Store_value(node);
2245 ir_node *mem = get_Store_mem(node);
2246 ir_node *ptr = get_Store_ptr(node);
2247 ir_mode *mode = get_irn_mode(val);
2248 unsigned bits = get_mode_size_bits(mode);
2253 /* handle only GP modes for now... */
2254 if (!ia32_mode_needs_gp_reg(mode))
2258 /* store must be the only user of the val node */
2259 if (get_irn_n_edges(val) > 1)
2261 /* skip pointless convs */
2263 ir_node *conv_op = get_Conv_op(val);
2264 ir_mode *pred_mode = get_irn_mode(conv_op);
2265 if (!ia32_mode_needs_gp_reg(pred_mode))
2267 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2275 /* value must be in the same block */
2276 if (get_nodes_block(node) != get_nodes_block(val))
2279 switch (get_irn_opcode(val)) {
2281 op1 = get_Add_left(val);
2282 op2 = get_Add_right(val);
2283 if (ia32_cg_config.use_incdec) {
2284 if (is_Const_1(op2)) {
2285 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2287 } else if (is_Const_Minus_1(op2)) {
2288 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2292 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2293 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2294 match_commutative | match_immediate);
2297 op1 = get_Sub_left(val);
2298 op2 = get_Sub_right(val);
2299 if (is_Const(op2)) {
2300 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2302 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2303 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2307 op1 = get_And_left(val);
2308 op2 = get_And_right(val);
2309 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2310 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2311 match_commutative | match_immediate);
2314 op1 = get_Or_left(val);
2315 op2 = get_Or_right(val);
2316 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2317 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2318 match_commutative | match_immediate);
2321 op1 = get_Eor_left(val);
2322 op2 = get_Eor_right(val);
2323 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2324 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2325 match_commutative | match_immediate);
2328 op1 = get_Shl_left(val);
2329 op2 = get_Shl_right(val);
2330 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2331 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2335 op1 = get_Shr_left(val);
2336 op2 = get_Shr_right(val);
2337 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2338 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2342 op1 = get_Shrs_left(val);
2343 op2 = get_Shrs_right(val);
2344 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2345 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2349 op1 = get_Rotl_left(val);
2350 op2 = get_Rotl_right(val);
2351 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2352 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2355 /* TODO: match ROR patterns... */
2357 new_node = try_create_SetMem(val, ptr, mem);
2361 op1 = get_Minus_op(val);
2362 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2365 /* should be lowered already */
2366 assert(mode != mode_b);
2367 op1 = get_Not_op(val);
2368 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2374 if (new_node != NULL) {
2375 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2376 get_irn_pinned(node) == op_pin_state_pinned) {
2377 set_irn_pinned(new_node, op_pin_state_pinned);
2384 static bool possible_int_mode_for_fp(ir_mode *mode)
2388 if (!mode_is_signed(mode))
2390 size = get_mode_size_bits(mode);
2391 if (size != 16 && size != 32)
2396 static int is_float_to_int_conv(const ir_node *node)
2398 ir_mode *mode = get_irn_mode(node);
2402 if (!possible_int_mode_for_fp(mode))
2407 conv_op = get_Conv_op(node);
2408 conv_mode = get_irn_mode(conv_op);
2410 if (!mode_is_float(conv_mode))
2417 * Transform a Store(floatConst) into a sequence of
2420 * @return the created ia32 Store node
2422 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2424 ir_mode *mode = get_irn_mode(cns);
2425 unsigned size = get_mode_size_bytes(mode);
2426 tarval *tv = get_Const_tarval(cns);
2427 ir_node *block = get_nodes_block(node);
2428 ir_node *new_block = be_transform_node(block);
2429 ir_node *ptr = get_Store_ptr(node);
2430 ir_node *mem = get_Store_mem(node);
2431 dbg_info *dbgi = get_irn_dbg_info(node);
2435 ia32_address_t addr;
2437 assert(size % 4 == 0);
2440 build_address_ptr(&addr, ptr, mem);
2444 get_tarval_sub_bits(tv, ofs) |
2445 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2446 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2447 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2448 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2450 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2451 addr.index, addr.mem, imm);
2453 set_irn_pinned(new_node, get_irn_pinned(node));
2454 set_ia32_op_type(new_node, ia32_AddrModeD);
2455 set_ia32_ls_mode(new_node, mode_Iu);
2456 set_address(new_node, &addr);
2457 SET_IA32_ORIG_NODE(new_node, node);
2460 ins[i++] = new_node;
2465 } while (size != 0);
2468 return new_rd_Sync(dbgi, new_block, i, ins);
2475 * Generate a vfist or vfisttp instruction.
2477 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2478 ir_node *mem, ir_node *val, ir_node **fist)
2482 if (ia32_cg_config.use_fisttp) {
2483 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2484 if other users exists */
2485 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2486 ir_node *value = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2487 be_new_Keep(block, 1, &value);
2489 new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2492 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2495 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2501 * Transforms a general (no special case) Store.
2503 * @return the created ia32 Store node
2505 static ir_node *gen_general_Store(ir_node *node)
2507 ir_node *val = get_Store_value(node);
2508 ir_mode *mode = get_irn_mode(val);
2509 ir_node *block = get_nodes_block(node);
2510 ir_node *new_block = be_transform_node(block);
2511 ir_node *ptr = get_Store_ptr(node);
2512 ir_node *mem = get_Store_mem(node);
2513 dbg_info *dbgi = get_irn_dbg_info(node);
2514 ir_node *new_val, *new_node, *store;
2515 ia32_address_t addr;
2517 /* check for destination address mode */
2518 new_node = try_create_dest_am(node);
2519 if (new_node != NULL)
2522 /* construct store address */
2523 memset(&addr, 0, sizeof(addr));
2524 ia32_create_address_mode(&addr, ptr, 0);
2526 if (addr.base == NULL) {
2527 addr.base = noreg_GP;
2529 addr.base = be_transform_node(addr.base);
2532 if (addr.index == NULL) {
2533 addr.index = noreg_GP;
2535 addr.index = be_transform_node(addr.index);
2537 addr.mem = be_transform_node(mem);
2539 if (mode_is_float(mode)) {
2540 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2542 while (is_Conv(val) && mode == get_irn_mode(val)) {
2543 ir_node *op = get_Conv_op(val);
2544 if (!mode_is_float(get_irn_mode(op)))
2548 new_val = be_transform_node(val);
2549 if (ia32_cg_config.use_sse2) {
2550 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2551 addr.index, addr.mem, new_val);
2553 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2554 addr.index, addr.mem, new_val, mode);
2557 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2558 val = get_Conv_op(val);
2560 /* TODO: is this optimisation still necessary at all (middleend)? */
2561 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2562 while (is_Conv(val)) {
2563 ir_node *op = get_Conv_op(val);
2564 if (!mode_is_float(get_irn_mode(op)))
2566 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2570 new_val = be_transform_node(val);
2571 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2573 new_val = create_immediate_or_transform(val, 0);
2574 assert(mode != mode_b);
2576 if (get_mode_size_bits(mode) == 8) {
2577 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2578 addr.index, addr.mem, new_val);
2580 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2581 addr.index, addr.mem, new_val);
2586 set_irn_pinned(store, get_irn_pinned(node));
2587 set_ia32_op_type(store, ia32_AddrModeD);
2588 set_ia32_ls_mode(store, mode);
2590 set_address(store, &addr);
2591 SET_IA32_ORIG_NODE(store, node);
2597 * Transforms a Store.
2599 * @return the created ia32 Store node
2601 static ir_node *gen_Store(ir_node *node)
2603 ir_node *val = get_Store_value(node);
2604 ir_mode *mode = get_irn_mode(val);
2606 if (mode_is_float(mode) && is_Const(val)) {
2607 /* We can transform every floating const store
2608 into a sequence of integer stores.
2609 If the constant is already in a register,
2610 it would be better to use it, but we don't
2611 have this information here. */
2612 return gen_float_const_Store(node, val);
2614 return gen_general_Store(node);
2618 * Transforms a Switch.
2620 * @return the created ia32 SwitchJmp node
2622 static ir_node *create_Switch(ir_node *node)
2624 dbg_info *dbgi = get_irn_dbg_info(node);
2625 ir_node *block = be_transform_node(get_nodes_block(node));
2626 ir_node *sel = get_Cond_selector(node);
2627 ir_node *new_sel = be_transform_node(sel);
2628 long switch_min = LONG_MAX;
2629 long switch_max = LONG_MIN;
2630 long default_pn = get_Cond_default_proj(node);
2632 const ir_edge_t *edge;
2634 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2636 /* determine the smallest switch case value */
2637 foreach_out_edge(node, edge) {
2638 ir_node *proj = get_edge_src_irn(edge);
2639 long pn = get_Proj_proj(proj);
2640 if (pn == default_pn)
2643 if (pn < switch_min)
2645 if (pn > switch_max)
2649 if ((unsigned long) (switch_max - switch_min) > 128000) {
2650 panic("Size of switch %+F bigger than 128000", node);
2653 if (switch_min != 0) {
2654 /* if smallest switch case is not 0 we need an additional sub */
2655 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2656 add_ia32_am_offs_int(new_sel, -switch_min);
2657 set_ia32_op_type(new_sel, ia32_AddrModeS);
2659 SET_IA32_ORIG_NODE(new_sel, node);
2662 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2663 SET_IA32_ORIG_NODE(new_node, node);
2669 * Transform a Cond node.
2671 static ir_node *gen_Cond(ir_node *node)
2673 ir_node *block = get_nodes_block(node);
2674 ir_node *new_block = be_transform_node(block);
2675 dbg_info *dbgi = get_irn_dbg_info(node);
2676 ir_node *sel = get_Cond_selector(node);
2677 ir_mode *sel_mode = get_irn_mode(sel);
2678 ir_node *flags = NULL;
2682 if (sel_mode != mode_b) {
2683 return create_Switch(node);
2686 /* we get flags from a Cmp */
2687 flags = get_flags_node(sel, &pnc);
2689 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2690 SET_IA32_ORIG_NODE(new_node, node);
2696 * Transform a be_Copy.
2698 static ir_node *gen_be_Copy(ir_node *node)
2700 ir_node *new_node = be_duplicate_node(node);
2701 ir_mode *mode = get_irn_mode(new_node);
2703 if (ia32_mode_needs_gp_reg(mode)) {
2704 set_irn_mode(new_node, mode_Iu);
2710 static ir_node *create_Fucom(ir_node *node)
2712 dbg_info *dbgi = get_irn_dbg_info(node);
2713 ir_node *block = get_nodes_block(node);
2714 ir_node *new_block = be_transform_node(block);
2715 ir_node *left = get_Cmp_left(node);
2716 ir_node *new_left = be_transform_node(left);
2717 ir_node *right = get_Cmp_right(node);
2721 if (ia32_cg_config.use_fucomi) {
2722 new_right = be_transform_node(right);
2723 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2725 set_ia32_commutative(new_node);
2726 SET_IA32_ORIG_NODE(new_node, node);
2728 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2729 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2731 new_right = be_transform_node(right);
2732 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2735 set_ia32_commutative(new_node);
2737 SET_IA32_ORIG_NODE(new_node, node);
2739 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2740 SET_IA32_ORIG_NODE(new_node, node);
2746 static ir_node *create_Ucomi(ir_node *node)
2748 dbg_info *dbgi = get_irn_dbg_info(node);
2749 ir_node *src_block = get_nodes_block(node);
2750 ir_node *new_block = be_transform_node(src_block);
2751 ir_node *left = get_Cmp_left(node);
2752 ir_node *right = get_Cmp_right(node);
2754 ia32_address_mode_t am;
2755 ia32_address_t *addr = &am.addr;
2757 match_arguments(&am, src_block, left, right, NULL,
2758 match_commutative | match_am);
2760 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2761 addr->mem, am.new_op1, am.new_op2,
2763 set_am_attributes(new_node, &am);
2765 SET_IA32_ORIG_NODE(new_node, node);
2767 new_node = fix_mem_proj(new_node, &am);
2773 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2774 * to fold an and into a test node
2776 static bool can_fold_test_and(ir_node *node)
2778 const ir_edge_t *edge;
2780 /** we can only have eq and lg projs */
2781 foreach_out_edge(node, edge) {
2782 ir_node *proj = get_edge_src_irn(edge);
2783 pn_Cmp pnc = get_Proj_proj(proj);
2784 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2792 * returns true if it is assured, that the upper bits of a node are "clean"
2793 * which means for a 16 or 8 bit value, that the upper bits in the register
2794 * are 0 for unsigned and a copy of the last significant bit for signed
2797 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2799 assert(ia32_mode_needs_gp_reg(mode));
2800 if (get_mode_size_bits(mode) >= 32)
2803 if (is_Proj(transformed_node))
2804 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2806 switch (get_ia32_irn_opcode(transformed_node)) {
2807 case iro_ia32_Conv_I2I:
2808 case iro_ia32_Conv_I2I8Bit: {
2809 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2810 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2812 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2819 if (mode_is_signed(mode)) {
2820 return false; /* TODO handle signed modes */
2822 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2823 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2824 const ia32_immediate_attr_t *attr
2825 = get_ia32_immediate_attr_const(right);
2826 if (attr->symconst == 0 &&
2827 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2831 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2835 /* TODO too conservative if shift amount is constant */
2836 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2839 if (!mode_is_signed(mode)) {
2841 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2842 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2844 /* TODO if one is known to be zero extended, then || is sufficient */
2849 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2850 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2852 case iro_ia32_Const:
2853 case iro_ia32_Immediate: {
2854 const ia32_immediate_attr_t *attr =
2855 get_ia32_immediate_attr_const(transformed_node);
2856 if (mode_is_signed(mode)) {
2857 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2858 return shifted == 0 || shifted == -1;
2860 unsigned long shifted = (unsigned long)attr->offset;
2861 shifted >>= get_mode_size_bits(mode);
2862 return shifted == 0;
2872 * Generate code for a Cmp.
2874 static ir_node *gen_Cmp(ir_node *node)
2876 dbg_info *dbgi = get_irn_dbg_info(node);
2877 ir_node *block = get_nodes_block(node);
2878 ir_node *new_block = be_transform_node(block);
2879 ir_node *left = get_Cmp_left(node);
2880 ir_node *right = get_Cmp_right(node);
2881 ir_mode *cmp_mode = get_irn_mode(left);
2883 ia32_address_mode_t am;
2884 ia32_address_t *addr = &am.addr;
2887 if (mode_is_float(cmp_mode)) {
2888 if (ia32_cg_config.use_sse2) {
2889 return create_Ucomi(node);
2891 return create_Fucom(node);
2895 assert(ia32_mode_needs_gp_reg(cmp_mode));
2897 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2898 cmp_unsigned = !mode_is_signed(cmp_mode);
2899 if (is_Const_0(right) &&
2901 get_irn_n_edges(left) == 1 &&
2902 can_fold_test_and(node)) {
2903 /* Test(and_left, and_right) */
2904 ir_node *and_left = get_And_left(left);
2905 ir_node *and_right = get_And_right(left);
2907 /* matze: code here used mode instead of cmd_mode, I think it is always
2908 * the same as cmp_mode, but I leave this here to see if this is really
2911 assert(get_irn_mode(and_left) == cmp_mode);
2913 match_arguments(&am, block, and_left, and_right, NULL,
2915 match_am | match_8bit_am | match_16bit_am |
2916 match_am_and_immediates | match_immediate);
2918 /* use 32bit compare mode if possible since the opcode is smaller */
2919 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2920 upper_bits_clean(am.new_op2, cmp_mode)) {
2921 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2924 if (get_mode_size_bits(cmp_mode) == 8) {
2925 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2926 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2929 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2930 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2933 /* Cmp(left, right) */
2934 match_arguments(&am, block, left, right, NULL,
2935 match_commutative | match_am | match_8bit_am |
2936 match_16bit_am | match_am_and_immediates |
2938 /* use 32bit compare mode if possible since the opcode is smaller */
2939 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2940 upper_bits_clean(am.new_op2, cmp_mode)) {
2941 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2944 if (get_mode_size_bits(cmp_mode) == 8) {
2945 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2946 addr->index, addr->mem, am.new_op1,
2947 am.new_op2, am.ins_permuted,
2950 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2951 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2954 set_am_attributes(new_node, &am);
2955 set_ia32_ls_mode(new_node, cmp_mode);
2957 SET_IA32_ORIG_NODE(new_node, node);
2959 new_node = fix_mem_proj(new_node, &am);
2964 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2967 dbg_info *dbgi = get_irn_dbg_info(node);
2968 ir_node *block = get_nodes_block(node);
2969 ir_node *new_block = be_transform_node(block);
2970 ir_node *val_true = get_Mux_true(node);
2971 ir_node *val_false = get_Mux_false(node);
2973 ia32_address_mode_t am;
2974 ia32_address_t *addr;
2976 assert(ia32_cg_config.use_cmov);
2977 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2981 match_arguments(&am, block, val_false, val_true, flags,
2982 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2984 if (am.ins_permuted)
2985 pnc = ia32_get_negated_pnc(pnc);
2987 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
2988 addr->mem, am.new_op1, am.new_op2, new_flags,
2990 set_am_attributes(new_node, &am);
2992 SET_IA32_ORIG_NODE(new_node, node);
2994 new_node = fix_mem_proj(new_node, &am);
3000 * Creates a ia32 Setcc instruction.
3002 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3003 ir_node *flags, pn_Cmp pnc,
3006 ir_mode *mode = get_irn_mode(orig_node);
3009 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3010 SET_IA32_ORIG_NODE(new_node, orig_node);
3012 /* we might need to conv the result up */
3013 if (get_mode_size_bits(mode) > 8) {
3014 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3015 nomem, new_node, mode_Bu);
3016 SET_IA32_ORIG_NODE(new_node, orig_node);
3023 * Create instruction for an unsigned Difference or Zero.
3025 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3027 ir_mode *mode = get_irn_mode(psi);
3037 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3038 match_mode_neutral | match_am | match_immediate | match_two_users);
3040 block = get_nodes_block(new_node);
3042 if (is_Proj(new_node)) {
3043 sub = get_Proj_pred(new_node);
3044 assert(is_ia32_Sub(sub));
3047 set_irn_mode(sub, mode_T);
3048 new_node = new_rd_Proj(NULL, block, sub, mode, pn_ia32_res);
3050 eflags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3052 dbgi = get_irn_dbg_info(psi);
3053 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3054 not = new_bd_ia32_Not(dbgi, block, sbb);
3056 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3057 set_ia32_commutative(new_node);
3062 * Create an const array of two float consts.
3064 * @param c0 the first constant
3065 * @param c1 the second constant
3066 * @param new_mode IN/OUT for the mode of the constants, if NULL
3067 * smallest possible mode will be used
3069 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3071 ir_mode *mode = *new_mode;
3073 ir_initializer_t *initializer;
3074 tarval *tv0 = get_Const_tarval(c0);
3075 tarval *tv1 = get_Const_tarval(c1);
3078 /* detect the best mode for the constants */
3079 mode = get_tarval_mode(tv0);
3081 if (mode != mode_F) {
3082 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3083 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3085 tv0 = tarval_convert_to(tv0, mode);
3086 tv1 = tarval_convert_to(tv1, mode);
3087 } else if (mode != mode_D) {
3088 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3089 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3091 tv0 = tarval_convert_to(tv0, mode);
3092 tv1 = tarval_convert_to(tv1, mode);
3099 tp = ia32_create_float_type(mode, 4);
3100 tp = ia32_create_float_array(tp);
3102 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3104 set_entity_ld_ident(ent, get_entity_ident(ent));
3105 set_entity_visibility(ent, ir_visibility_local);
3106 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3108 initializer = create_initializer_compound(2);
3110 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3111 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3113 set_entity_initializer(ent, initializer);
3120 * Transforms a Mux node into some code sequence.
3122 * @return The transformed node.
3124 static ir_node *gen_Mux(ir_node *node)
3126 dbg_info *dbgi = get_irn_dbg_info(node);
3127 ir_node *block = get_nodes_block(node);
3128 ir_node *new_block = be_transform_node(block);
3129 ir_node *mux_true = get_Mux_true(node);
3130 ir_node *mux_false = get_Mux_false(node);
3131 ir_node *cond = get_Mux_sel(node);
3132 ir_mode *mode = get_irn_mode(node);
3137 assert(get_irn_mode(cond) == mode_b);
3139 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3140 if (mode_is_float(mode)) {
3141 ir_node *cmp = get_Proj_pred(cond);
3142 ir_node *cmp_left = get_Cmp_left(cmp);
3143 ir_node *cmp_right = get_Cmp_right(cmp);
3144 pn_Cmp pnc = get_Proj_proj(cond);
3146 if (ia32_cg_config.use_sse2) {
3147 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3148 if (cmp_left == mux_true && cmp_right == mux_false) {
3149 /* Mux(a <= b, a, b) => MIN */
3150 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3151 match_commutative | match_am | match_two_users);
3152 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3153 /* Mux(a <= b, b, a) => MAX */
3154 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3155 match_commutative | match_am | match_two_users);
3157 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3158 if (cmp_left == mux_true && cmp_right == mux_false) {
3159 /* Mux(a >= b, a, b) => MAX */
3160 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3161 match_commutative | match_am | match_two_users);
3162 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3163 /* Mux(a >= b, b, a) => MIN */
3164 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3165 match_commutative | match_am | match_two_users);
3170 if (is_Const(mux_true) && is_Const(mux_false)) {
3171 ia32_address_mode_t am;
3176 flags = get_flags_node(cond, &pnc);
3177 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3179 if (ia32_cg_config.use_sse2) {
3180 /* cannot load from different mode on SSE */
3183 /* x87 can load any mode */
3187 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3189 switch (get_mode_size_bytes(new_mode)) {
3199 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3200 set_ia32_am_scale(new_node, 2);
3205 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3206 set_ia32_am_scale(new_node, 1);
3209 /* arg, shift 16 NOT supported */
3211 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3214 panic("Unsupported constant size");
3217 am.ls_mode = new_mode;
3218 am.addr.base = noreg_GP;
3219 am.addr.index = new_node;
3220 am.addr.mem = nomem;
3222 am.addr.scale = scale;
3223 am.addr.use_frame = 0;
3224 am.addr.frame_entity = NULL;
3225 am.addr.symconst_sign = 0;
3226 am.mem_proj = am.addr.mem;
3227 am.op_type = ia32_AddrModeS;
3230 am.pinned = op_pin_state_floats;
3232 am.ins_permuted = 0;
3234 if (ia32_cg_config.use_sse2)
3235 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3237 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3238 set_am_attributes(load, &am);
3240 return new_rd_Proj(NULL, block, load, mode_vfp, pn_ia32_res);
3242 panic("cannot transform floating point Mux");
3245 assert(ia32_mode_needs_gp_reg(mode));
3247 if (is_Proj(cond)) {
3248 ir_node *cmp = get_Proj_pred(cond);
3250 ir_node *cmp_left = get_Cmp_left(cmp);
3251 ir_node *cmp_right = get_Cmp_right(cmp);
3252 pn_Cmp pnc = get_Proj_proj(cond);
3254 /* check for unsigned Doz first */
3255 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3256 is_Const_0(mux_false) && is_Sub(mux_true) &&
3257 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3258 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3259 return create_doz(node, cmp_left, cmp_right);
3260 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3261 is_Const_0(mux_true) && is_Sub(mux_false) &&
3262 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3263 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3264 return create_doz(node, cmp_left, cmp_right);
3269 flags = get_flags_node(cond, &pnc);
3271 if (is_Const(mux_true) && is_Const(mux_false)) {
3272 /* both are const, good */
3273 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3274 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3275 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3276 pnc = ia32_get_negated_pnc(pnc);
3277 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3279 /* Not that simple. */
3284 new_node = create_CMov(node, cond, flags, pnc);
3292 * Create a conversion from x87 state register to general purpose.
3294 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3296 ir_node *block = be_transform_node(get_nodes_block(node));
3297 ir_node *op = get_Conv_op(node);
3298 ir_node *new_op = be_transform_node(op);
3299 ir_graph *irg = current_ir_graph;
3300 dbg_info *dbgi = get_irn_dbg_info(node);
3301 ir_mode *mode = get_irn_mode(node);
3302 ir_node *fist, *load, *mem;
3304 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3305 set_irn_pinned(fist, op_pin_state_floats);
3306 set_ia32_use_frame(fist);
3307 set_ia32_op_type(fist, ia32_AddrModeD);
3309 assert(get_mode_size_bits(mode) <= 32);
3310 /* exception we can only store signed 32 bit integers, so for unsigned
3311 we store a 64bit (signed) integer and load the lower bits */
3312 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3313 set_ia32_ls_mode(fist, mode_Ls);
3315 set_ia32_ls_mode(fist, mode_Is);
3317 SET_IA32_ORIG_NODE(fist, node);
3320 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3322 set_irn_pinned(load, op_pin_state_floats);
3323 set_ia32_use_frame(load);
3324 set_ia32_op_type(load, ia32_AddrModeS);
3325 set_ia32_ls_mode(load, mode_Is);
3326 if (get_ia32_ls_mode(fist) == mode_Ls) {
3327 ia32_attr_t *attr = get_ia32_attr(load);
3328 attr->data.need_64bit_stackent = 1;
3330 ia32_attr_t *attr = get_ia32_attr(load);
3331 attr->data.need_32bit_stackent = 1;
3333 SET_IA32_ORIG_NODE(load, node);
3335 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
3339 * Creates a x87 strict Conv by placing a Store and a Load
3341 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3343 ir_node *block = get_nodes_block(node);
3344 ir_graph *irg = get_Block_irg(block);
3345 dbg_info *dbgi = get_irn_dbg_info(node);
3346 ir_node *frame = get_irg_frame(irg);
3347 ir_node *store, *load;
3350 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3351 set_ia32_use_frame(store);
3352 set_ia32_op_type(store, ia32_AddrModeD);
3353 SET_IA32_ORIG_NODE(store, node);
3355 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3356 set_ia32_use_frame(load);
3357 set_ia32_op_type(load, ia32_AddrModeS);
3358 SET_IA32_ORIG_NODE(load, node);
3360 new_node = new_r_Proj(block, load, mode_E, pn_ia32_vfld_res);
3364 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3365 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3367 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3369 func = get_mode_size_bits(mode) == 8 ?
3370 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3371 return func(dbgi, block, base, index, mem, val, mode);
3375 * Create a conversion from general purpose to x87 register
3377 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3379 ir_node *src_block = get_nodes_block(node);
3380 ir_node *block = be_transform_node(src_block);
3381 ir_graph *irg = get_Block_irg(block);
3382 dbg_info *dbgi = get_irn_dbg_info(node);
3383 ir_node *op = get_Conv_op(node);
3384 ir_node *new_op = NULL;
3386 ir_mode *store_mode;
3391 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3392 if (possible_int_mode_for_fp(src_mode)) {
3393 ia32_address_mode_t am;
3395 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3396 if (am.op_type == ia32_AddrModeS) {
3397 ia32_address_t *addr = &am.addr;
3399 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3400 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3402 set_am_attributes(fild, &am);
3403 SET_IA32_ORIG_NODE(fild, node);
3405 fix_mem_proj(fild, &am);
3410 if (new_op == NULL) {
3411 new_op = be_transform_node(op);
3414 mode = get_irn_mode(op);
3416 /* first convert to 32 bit signed if necessary */
3417 if (get_mode_size_bits(src_mode) < 32) {
3418 if (!upper_bits_clean(new_op, src_mode)) {
3419 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3420 SET_IA32_ORIG_NODE(new_op, node);
3425 assert(get_mode_size_bits(mode) == 32);
3428 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3430 set_ia32_use_frame(store);
3431 set_ia32_op_type(store, ia32_AddrModeD);
3432 set_ia32_ls_mode(store, mode_Iu);
3434 /* exception for 32bit unsigned, do a 64bit spill+load */
3435 if (!mode_is_signed(mode)) {
3438 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3440 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3441 noreg_GP, nomem, zero_const);
3443 set_ia32_use_frame(zero_store);
3444 set_ia32_op_type(zero_store, ia32_AddrModeD);
3445 add_ia32_am_offs_int(zero_store, 4);
3446 set_ia32_ls_mode(zero_store, mode_Iu);
3451 store = new_rd_Sync(dbgi, block, 2, in);
3452 store_mode = mode_Ls;
3454 store_mode = mode_Is;
3458 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3460 set_ia32_use_frame(fild);
3461 set_ia32_op_type(fild, ia32_AddrModeS);
3462 set_ia32_ls_mode(fild, store_mode);
3464 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3470 * Create a conversion from one integer mode into another one
3472 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3473 dbg_info *dbgi, ir_node *block, ir_node *op,
3476 ir_node *new_block = be_transform_node(block);
3478 ir_mode *smaller_mode;
3479 ia32_address_mode_t am;
3480 ia32_address_t *addr = &am.addr;
3483 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3484 smaller_mode = src_mode;
3486 smaller_mode = tgt_mode;
3489 #ifdef DEBUG_libfirm
3491 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3496 match_arguments(&am, block, NULL, op, NULL,
3497 match_am | match_8bit_am | match_16bit_am);
3499 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3500 /* unnecessary conv. in theory it shouldn't have been AM */
3501 assert(is_ia32_NoReg_GP(addr->base));
3502 assert(is_ia32_NoReg_GP(addr->index));
3503 assert(is_NoMem(addr->mem));
3504 assert(am.addr.offset == 0);
3505 assert(am.addr.symconst_ent == NULL);
3509 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3510 addr->mem, am.new_op2, smaller_mode);
3511 set_am_attributes(new_node, &am);
3512 /* match_arguments assume that out-mode = in-mode, this isn't true here
3514 set_ia32_ls_mode(new_node, smaller_mode);
3515 SET_IA32_ORIG_NODE(new_node, node);
3516 new_node = fix_mem_proj(new_node, &am);
3521 * Transforms a Conv node.
3523 * @return The created ia32 Conv node
3525 static ir_node *gen_Conv(ir_node *node)
3527 ir_node *block = get_nodes_block(node);
3528 ir_node *new_block = be_transform_node(block);
3529 ir_node *op = get_Conv_op(node);
3530 ir_node *new_op = NULL;
3531 dbg_info *dbgi = get_irn_dbg_info(node);
3532 ir_mode *src_mode = get_irn_mode(op);
3533 ir_mode *tgt_mode = get_irn_mode(node);
3534 int src_bits = get_mode_size_bits(src_mode);
3535 int tgt_bits = get_mode_size_bits(tgt_mode);
3536 ir_node *res = NULL;
3538 assert(!mode_is_int(src_mode) || src_bits <= 32);
3539 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3541 /* modeB -> X should already be lowered by the lower_mode_b pass */
3542 if (src_mode == mode_b) {
3543 panic("ConvB not lowered %+F", node);
3546 if (src_mode == tgt_mode) {
3547 if (get_Conv_strict(node)) {
3548 if (ia32_cg_config.use_sse2) {
3549 /* when we are in SSE mode, we can kill all strict no-op conversion */
3550 return be_transform_node(op);
3553 /* this should be optimized already, but who knows... */
3554 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3555 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3556 return be_transform_node(op);
3560 if (mode_is_float(src_mode)) {
3561 new_op = be_transform_node(op);
3562 /* we convert from float ... */
3563 if (mode_is_float(tgt_mode)) {
3565 if (ia32_cg_config.use_sse2) {
3566 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3567 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3569 set_ia32_ls_mode(res, tgt_mode);
3571 if (get_Conv_strict(node)) {
3572 /* if fp_no_float_fold is not set then we assume that we
3573 * don't have any float operations in a non
3574 * mode_float_arithmetic mode and can skip strict upconvs */
3575 if (src_bits < tgt_bits
3576 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3577 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3580 res = gen_x87_strict_conv(tgt_mode, new_op);
3581 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3585 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3590 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3591 if (ia32_cg_config.use_sse2) {
3592 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3594 set_ia32_ls_mode(res, src_mode);
3596 return gen_x87_fp_to_gp(node);
3600 /* we convert from int ... */
3601 if (mode_is_float(tgt_mode)) {
3603 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3604 if (ia32_cg_config.use_sse2) {
3605 new_op = be_transform_node(op);
3606 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3608 set_ia32_ls_mode(res, tgt_mode);
3610 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3611 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3612 res = gen_x87_gp_to_fp(node, src_mode);
3614 /* we need a strict-Conv, if the int mode has more bits than the
3616 if (float_mantissa < int_mantissa) {
3617 res = gen_x87_strict_conv(tgt_mode, res);
3618 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3622 } else if (tgt_mode == mode_b) {
3623 /* mode_b lowering already took care that we only have 0/1 values */
3624 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3625 src_mode, tgt_mode));
3626 return be_transform_node(op);
3629 if (src_bits == tgt_bits) {
3630 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3631 src_mode, tgt_mode));
3632 return be_transform_node(op);
3635 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3643 static ir_node *create_immediate_or_transform(ir_node *node,
3644 char immediate_constraint_type)
3646 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3647 if (new_node == NULL) {
3648 new_node = be_transform_node(node);
3654 * Transforms a FrameAddr into an ia32 Add.
3656 static ir_node *gen_be_FrameAddr(ir_node *node)
3658 ir_node *block = be_transform_node(get_nodes_block(node));
3659 ir_node *op = be_get_FrameAddr_frame(node);
3660 ir_node *new_op = be_transform_node(op);
3661 dbg_info *dbgi = get_irn_dbg_info(node);
3664 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3665 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3666 set_ia32_use_frame(new_node);
3668 SET_IA32_ORIG_NODE(new_node, node);
3674 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3676 static ir_node *gen_be_Return(ir_node *node)
3678 ir_graph *irg = current_ir_graph;
3679 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3680 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3681 ir_entity *ent = get_irg_entity(irg);
3682 ir_type *tp = get_entity_type(ent);
3687 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3688 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3690 int pn_ret_val, pn_ret_mem, arity, i;
3692 assert(ret_val != NULL);
3693 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3694 return be_duplicate_node(node);
3697 res_type = get_method_res_type(tp, 0);
3699 if (! is_Primitive_type(res_type)) {
3700 return be_duplicate_node(node);
3703 mode = get_type_mode(res_type);
3704 if (! mode_is_float(mode)) {
3705 return be_duplicate_node(node);
3708 assert(get_method_n_ress(tp) == 1);
3710 pn_ret_val = get_Proj_proj(ret_val);
3711 pn_ret_mem = get_Proj_proj(ret_mem);
3713 /* get the Barrier */
3714 barrier = get_Proj_pred(ret_val);
3716 /* get result input of the Barrier */
3717 ret_val = get_irn_n(barrier, pn_ret_val);
3718 new_ret_val = be_transform_node(ret_val);
3720 /* get memory input of the Barrier */
3721 ret_mem = get_irn_n(barrier, pn_ret_mem);
3722 new_ret_mem = be_transform_node(ret_mem);
3724 frame = get_irg_frame(irg);
3726 dbgi = get_irn_dbg_info(barrier);
3727 block = be_transform_node(get_nodes_block(barrier));
3729 /* store xmm0 onto stack */
3730 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3731 new_ret_mem, new_ret_val);
3732 set_ia32_ls_mode(sse_store, mode);
3733 set_ia32_op_type(sse_store, ia32_AddrModeD);
3734 set_ia32_use_frame(sse_store);
3736 /* load into x87 register */
3737 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3738 set_ia32_op_type(fld, ia32_AddrModeS);
3739 set_ia32_use_frame(fld);
3741 mproj = new_r_Proj(block, fld, mode_M, pn_ia32_vfld_M);
3742 fld = new_r_Proj(block, fld, mode_vfp, pn_ia32_vfld_res);
3744 /* create a new barrier */
3745 arity = get_irn_arity(barrier);
3746 in = ALLOCAN(ir_node*, arity);
3747 for (i = 0; i < arity; ++i) {
3750 if (i == pn_ret_val) {
3752 } else if (i == pn_ret_mem) {
3755 ir_node *in = get_irn_n(barrier, i);
3756 new_in = be_transform_node(in);
3761 new_barrier = new_ir_node(dbgi, irg, block,
3762 get_irn_op(barrier), get_irn_mode(barrier),
3764 copy_node_attr(barrier, new_barrier);
3765 be_duplicate_deps(barrier, new_barrier);
3766 be_set_transformed_node(barrier, new_barrier);
3768 /* transform normally */
3769 return be_duplicate_node(node);
3773 * Transform a be_AddSP into an ia32_SubSP.
3775 static ir_node *gen_be_AddSP(ir_node *node)
3777 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3778 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3780 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3781 match_am | match_immediate);
3785 * Transform a be_SubSP into an ia32_AddSP
3787 static ir_node *gen_be_SubSP(ir_node *node)
3789 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3790 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3792 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3793 match_am | match_immediate);
3797 * Change some phi modes
3799 static ir_node *gen_Phi(ir_node *node)
3801 const arch_register_req_t *req;
3802 ir_node *block = be_transform_node(get_nodes_block(node));
3803 ir_graph *irg = current_ir_graph;
3804 dbg_info *dbgi = get_irn_dbg_info(node);
3805 ir_mode *mode = get_irn_mode(node);
3808 if (ia32_mode_needs_gp_reg(mode)) {
3809 /* we shouldn't have any 64bit stuff around anymore */
3810 assert(get_mode_size_bits(mode) <= 32);
3811 /* all integer operations are on 32bit registers now */
3813 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
3814 } else if (mode_is_float(mode)) {
3815 if (ia32_cg_config.use_sse2) {
3817 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
3820 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
3823 req = arch_no_register_req;
3826 /* phi nodes allow loops, so we use the old arguments for now
3827 * and fix this later */
3828 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3829 get_irn_in(node) + 1);
3830 copy_node_attr(node, phi);
3831 be_duplicate_deps(node, phi);
3833 arch_set_out_register_req(phi, 0, req);
3835 be_enqueue_preds(node);
3840 static ir_node *gen_Jmp(ir_node *node)
3842 ir_node *block = get_nodes_block(node);
3843 ir_node *new_block = be_transform_node(block);
3844 dbg_info *dbgi = get_irn_dbg_info(node);
3847 new_node = new_bd_ia32_Jmp(dbgi, new_block);
3848 SET_IA32_ORIG_NODE(new_node, node);
3856 static ir_node *gen_IJmp(ir_node *node)
3858 ir_node *block = get_nodes_block(node);
3859 ir_node *new_block = be_transform_node(block);
3860 dbg_info *dbgi = get_irn_dbg_info(node);
3861 ir_node *op = get_IJmp_target(node);
3863 ia32_address_mode_t am;
3864 ia32_address_t *addr = &am.addr;
3866 assert(get_irn_mode(op) == mode_P);
3868 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3870 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3871 addr->mem, am.new_op2);
3872 set_am_attributes(new_node, &am);
3873 SET_IA32_ORIG_NODE(new_node, node);
3875 new_node = fix_mem_proj(new_node, &am);
3881 * Transform a Bound node.
3883 static ir_node *gen_Bound(ir_node *node)
3886 ir_node *lower = get_Bound_lower(node);
3887 dbg_info *dbgi = get_irn_dbg_info(node);
3889 if (is_Const_0(lower)) {
3890 /* typical case for Java */
3891 ir_node *sub, *res, *flags, *block;
3893 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3894 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3896 block = get_nodes_block(res);
3897 if (! is_Proj(res)) {
3899 set_irn_mode(sub, mode_T);
3900 res = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_res);
3902 sub = get_Proj_pred(res);
3904 flags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3905 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3906 SET_IA32_ORIG_NODE(new_node, node);
3908 panic("generic Bound not supported in ia32 Backend");
3914 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3916 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3917 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3919 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3920 match_immediate | match_mode_neutral);
3923 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3925 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3926 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3927 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3931 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3933 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3934 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3935 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3939 static ir_node *gen_ia32_l_Add(ir_node *node)
3941 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3942 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3943 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3944 match_commutative | match_am | match_immediate |
3945 match_mode_neutral);
3947 if (is_Proj(lowered)) {
3948 lowered = get_Proj_pred(lowered);
3950 assert(is_ia32_Add(lowered));
3951 set_irn_mode(lowered, mode_T);
3957 static ir_node *gen_ia32_l_Adc(ir_node *node)
3959 return gen_binop_flags(node, new_bd_ia32_Adc,
3960 match_commutative | match_am | match_immediate |
3961 match_mode_neutral);
3965 * Transforms a l_MulS into a "real" MulS node.
3967 * @return the created ia32 Mul node
3969 static ir_node *gen_ia32_l_Mul(ir_node *node)
3971 ir_node *left = get_binop_left(node);
3972 ir_node *right = get_binop_right(node);
3974 return gen_binop(node, left, right, new_bd_ia32_Mul,
3975 match_commutative | match_am | match_mode_neutral);
3979 * Transforms a l_IMulS into a "real" IMul1OPS node.
3981 * @return the created ia32 IMul1OP node
3983 static ir_node *gen_ia32_l_IMul(ir_node *node)
3985 ir_node *left = get_binop_left(node);
3986 ir_node *right = get_binop_right(node);
3988 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3989 match_commutative | match_am | match_mode_neutral);
3992 static ir_node *gen_ia32_l_Sub(ir_node *node)
3994 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3995 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3996 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3997 match_am | match_immediate | match_mode_neutral);
3999 if (is_Proj(lowered)) {
4000 lowered = get_Proj_pred(lowered);
4002 assert(is_ia32_Sub(lowered));
4003 set_irn_mode(lowered, mode_T);
4009 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4011 return gen_binop_flags(node, new_bd_ia32_Sbb,
4012 match_am | match_immediate | match_mode_neutral);
4016 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4017 * op1 - target to be shifted
4018 * op2 - contains bits to be shifted into target
4020 * Only op3 can be an immediate.
4022 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4023 ir_node *low, ir_node *count)
4025 ir_node *block = get_nodes_block(node);
4026 ir_node *new_block = be_transform_node(block);
4027 dbg_info *dbgi = get_irn_dbg_info(node);
4028 ir_node *new_high = be_transform_node(high);
4029 ir_node *new_low = be_transform_node(low);
4033 /* the shift amount can be any mode that is bigger than 5 bits, since all
4034 * other bits are ignored anyway */
4035 while (is_Conv(count) &&
4036 get_irn_n_edges(count) == 1 &&
4037 mode_is_int(get_irn_mode(count))) {
4038 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4039 count = get_Conv_op(count);
4041 new_count = create_immediate_or_transform(count, 0);
4043 if (is_ia32_l_ShlD(node)) {
4044 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4047 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4050 SET_IA32_ORIG_NODE(new_node, node);
4055 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4057 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4058 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4059 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4060 return gen_lowered_64bit_shifts(node, high, low, count);
4063 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4065 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4066 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4067 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4068 return gen_lowered_64bit_shifts(node, high, low, count);
4071 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4073 ir_node *src_block = get_nodes_block(node);
4074 ir_node *block = be_transform_node(src_block);
4075 ir_graph *irg = current_ir_graph;
4076 dbg_info *dbgi = get_irn_dbg_info(node);
4077 ir_node *frame = get_irg_frame(irg);
4078 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4079 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4080 ir_node *new_val_low = be_transform_node(val_low);
4081 ir_node *new_val_high = be_transform_node(val_high);
4083 ir_node *sync, *fild, *res;
4084 ir_node *store_low, *store_high;
4086 if (ia32_cg_config.use_sse2) {
4087 panic("ia32_l_LLtoFloat not implemented for SSE2");
4091 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4093 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4095 SET_IA32_ORIG_NODE(store_low, node);
4096 SET_IA32_ORIG_NODE(store_high, node);
4098 set_ia32_use_frame(store_low);
4099 set_ia32_use_frame(store_high);
4100 set_ia32_op_type(store_low, ia32_AddrModeD);
4101 set_ia32_op_type(store_high, ia32_AddrModeD);
4102 set_ia32_ls_mode(store_low, mode_Iu);
4103 set_ia32_ls_mode(store_high, mode_Is);
4104 add_ia32_am_offs_int(store_high, 4);
4108 sync = new_rd_Sync(dbgi, block, 2, in);
4111 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4113 set_ia32_use_frame(fild);
4114 set_ia32_op_type(fild, ia32_AddrModeS);
4115 set_ia32_ls_mode(fild, mode_Ls);
4117 SET_IA32_ORIG_NODE(fild, node);
4119 res = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
4121 if (! mode_is_signed(get_irn_mode(val_high))) {
4122 ia32_address_mode_t am;
4124 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4127 am.addr.base = noreg_GP;
4128 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4129 am.addr.mem = nomem;
4132 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4133 am.addr.use_frame = 0;
4134 am.addr.frame_entity = NULL;
4135 am.addr.symconst_sign = 0;
4136 am.ls_mode = mode_F;
4137 am.mem_proj = nomem;
4138 am.op_type = ia32_AddrModeS;
4140 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4141 am.pinned = op_pin_state_floats;
4143 am.ins_permuted = 0;
4145 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4146 am.new_op1, am.new_op2, get_fpcw());
4147 set_am_attributes(fadd, &am);
4149 set_irn_mode(fadd, mode_T);
4150 res = new_rd_Proj(NULL, block, fadd, mode_vfp, pn_ia32_res);
4155 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4157 ir_node *src_block = get_nodes_block(node);
4158 ir_node *block = be_transform_node(src_block);
4159 ir_graph *irg = get_Block_irg(block);
4160 dbg_info *dbgi = get_irn_dbg_info(node);
4161 ir_node *frame = get_irg_frame(irg);
4162 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4163 ir_node *new_val = be_transform_node(val);
4164 ir_node *fist, *mem;
4166 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4167 SET_IA32_ORIG_NODE(fist, node);
4168 set_ia32_use_frame(fist);
4169 set_ia32_op_type(fist, ia32_AddrModeD);
4170 set_ia32_ls_mode(fist, mode_Ls);
4176 * the BAD transformer.
4178 static ir_node *bad_transform(ir_node *node)
4180 panic("No transform function for %+F available.", node);
4184 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4186 ir_node *block = be_transform_node(get_nodes_block(node));
4187 ir_graph *irg = get_Block_irg(block);
4188 ir_node *pred = get_Proj_pred(node);
4189 ir_node *new_pred = be_transform_node(pred);
4190 ir_node *frame = get_irg_frame(irg);
4191 dbg_info *dbgi = get_irn_dbg_info(node);
4192 long pn = get_Proj_proj(node);
4197 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4198 SET_IA32_ORIG_NODE(load, node);
4199 set_ia32_use_frame(load);
4200 set_ia32_op_type(load, ia32_AddrModeS);
4201 set_ia32_ls_mode(load, mode_Iu);
4202 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4203 * 32 bit from it with this particular load */
4204 attr = get_ia32_attr(load);
4205 attr->data.need_64bit_stackent = 1;
4207 if (pn == pn_ia32_l_FloattoLL_res_high) {
4208 add_ia32_am_offs_int(load, 4);
4210 assert(pn == pn_ia32_l_FloattoLL_res_low);
4213 proj = new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4219 * Transform the Projs of an AddSP.
4221 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4223 ir_node *block = be_transform_node(get_nodes_block(node));
4224 ir_node *pred = get_Proj_pred(node);
4225 ir_node *new_pred = be_transform_node(pred);
4226 dbg_info *dbgi = get_irn_dbg_info(node);
4227 long proj = get_Proj_proj(node);
4229 if (proj == pn_be_AddSP_sp) {
4230 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4231 pn_ia32_SubSP_stack);
4232 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4234 } else if (proj == pn_be_AddSP_res) {
4235 return new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4236 pn_ia32_SubSP_addr);
4237 } else if (proj == pn_be_AddSP_M) {
4238 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_SubSP_M);
4241 panic("No idea how to transform proj->AddSP");
4245 * Transform the Projs of a SubSP.
4247 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4249 ir_node *block = be_transform_node(get_nodes_block(node));
4250 ir_node *pred = get_Proj_pred(node);
4251 ir_node *new_pred = be_transform_node(pred);
4252 dbg_info *dbgi = get_irn_dbg_info(node);
4253 long proj = get_Proj_proj(node);
4255 if (proj == pn_be_SubSP_sp) {
4256 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4257 pn_ia32_AddSP_stack);
4258 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4260 } else if (proj == pn_be_SubSP_M) {
4261 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_AddSP_M);
4264 panic("No idea how to transform proj->SubSP");
4268 * Transform and renumber the Projs from a Load.
4270 static ir_node *gen_Proj_Load(ir_node *node)
4273 ir_node *block = be_transform_node(get_nodes_block(node));
4274 ir_node *pred = get_Proj_pred(node);
4275 dbg_info *dbgi = get_irn_dbg_info(node);
4276 long proj = get_Proj_proj(node);
4278 /* loads might be part of source address mode matches, so we don't
4279 * transform the ProjMs yet (with the exception of loads whose result is
4282 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4284 ir_node *old_block = get_nodes_block(node);
4286 /* this is needed, because sometimes we have loops that are only
4287 reachable through the ProjM */
4288 be_enqueue_preds(node);
4289 /* do it in 2 steps, to silence firm verifier */
4290 res = new_rd_Proj(dbgi, old_block, pred, mode_M, pn_Load_M);
4291 set_Proj_proj(res, pn_ia32_mem);
4295 /* renumber the proj */
4296 new_pred = be_transform_node(pred);
4297 if (is_ia32_Load(new_pred)) {
4300 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Load_res);
4302 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Load_M);
4303 case pn_Load_X_regular:
4304 return new_rd_Jmp(dbgi, block);
4305 case pn_Load_X_except:
4306 /* This Load might raise an exception. Mark it. */
4307 set_ia32_exc_label(new_pred, 1);
4308 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4312 } else if (is_ia32_Conv_I2I(new_pred) ||
4313 is_ia32_Conv_I2I8Bit(new_pred)) {
4314 set_irn_mode(new_pred, mode_T);
4315 if (proj == pn_Load_res) {
4316 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_res);
4317 } else if (proj == pn_Load_M) {
4318 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_mem);
4320 } else if (is_ia32_xLoad(new_pred)) {
4323 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4325 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xLoad_M);
4326 case pn_Load_X_regular:
4327 return new_rd_Jmp(dbgi, block);
4328 case pn_Load_X_except:
4329 /* This Load might raise an exception. Mark it. */
4330 set_ia32_exc_label(new_pred, 1);
4331 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4335 } else if (is_ia32_vfld(new_pred)) {
4338 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4340 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfld_M);
4341 case pn_Load_X_regular:
4342 return new_rd_Jmp(dbgi, block);
4343 case pn_Load_X_except:
4344 /* This Load might raise an exception. Mark it. */
4345 set_ia32_exc_label(new_pred, 1);
4346 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
4351 /* can happen for ProJMs when source address mode happened for the
4354 /* however it should not be the result proj, as that would mean the
4355 load had multiple users and should not have been used for
4357 if (proj != pn_Load_M) {
4358 panic("internal error: transformed node not a Load");
4360 return new_rd_Proj(dbgi, block, new_pred, mode_M, 1);
4363 panic("No idea how to transform proj");
4367 * Transform and renumber the Projs from a DivMod like instruction.
4369 static ir_node *gen_Proj_DivMod(ir_node *node)
4371 ir_node *block = be_transform_node(get_nodes_block(node));
4372 ir_node *pred = get_Proj_pred(node);
4373 ir_node *new_pred = be_transform_node(pred);
4374 dbg_info *dbgi = get_irn_dbg_info(node);
4375 long proj = get_Proj_proj(node);
4377 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4379 switch (get_irn_opcode(pred)) {
4383 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4385 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4386 case pn_Div_X_regular:
4387 return new_rd_Jmp(dbgi, block);
4388 case pn_Div_X_except:
4389 set_ia32_exc_label(new_pred, 1);
4390 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4398 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4400 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4401 case pn_Mod_X_except:
4402 set_ia32_exc_label(new_pred, 1);
4403 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4411 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4412 case pn_DivMod_res_div:
4413 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4414 case pn_DivMod_res_mod:
4415 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4416 case pn_DivMod_X_regular:
4417 return new_rd_Jmp(dbgi, block);
4418 case pn_DivMod_X_except:
4419 set_ia32_exc_label(new_pred, 1);
4420 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4429 panic("No idea how to transform proj->DivMod");
4433 * Transform and renumber the Projs from a CopyB.
4435 static ir_node *gen_Proj_CopyB(ir_node *node)
4437 ir_node *block = be_transform_node(get_nodes_block(node));
4438 ir_node *pred = get_Proj_pred(node);
4439 ir_node *new_pred = be_transform_node(pred);
4440 dbg_info *dbgi = get_irn_dbg_info(node);
4441 long proj = get_Proj_proj(node);
4444 case pn_CopyB_M_regular:
4445 if (is_ia32_CopyB_i(new_pred)) {
4446 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4447 } else if (is_ia32_CopyB(new_pred)) {
4448 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_M);
4455 panic("No idea how to transform proj->CopyB");
4459 * Transform and renumber the Projs from a Quot.
4461 static ir_node *gen_Proj_Quot(ir_node *node)
4463 ir_node *block = be_transform_node(get_nodes_block(node));
4464 ir_node *pred = get_Proj_pred(node);
4465 ir_node *new_pred = be_transform_node(pred);
4466 dbg_info *dbgi = get_irn_dbg_info(node);
4467 long proj = get_Proj_proj(node);
4471 if (is_ia32_xDiv(new_pred)) {
4472 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xDiv_M);
4473 } else if (is_ia32_vfdiv(new_pred)) {
4474 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4478 if (is_ia32_xDiv(new_pred)) {
4479 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4480 } else if (is_ia32_vfdiv(new_pred)) {
4481 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4484 case pn_Quot_X_regular:
4485 case pn_Quot_X_except:
4490 panic("No idea how to transform proj->Quot");
4493 static ir_node *gen_be_Call(ir_node *node)
4495 dbg_info *const dbgi = get_irn_dbg_info(node);
4496 ir_node *const src_block = get_nodes_block(node);
4497 ir_node *const block = be_transform_node(src_block);
4498 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4499 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4500 ir_node *const sp = be_transform_node(src_sp);
4501 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4502 ia32_address_mode_t am;
4503 ia32_address_t *const addr = &am.addr;
4508 ir_node * eax = noreg_GP;
4509 ir_node * ecx = noreg_GP;
4510 ir_node * edx = noreg_GP;
4511 unsigned const pop = be_Call_get_pop(node);
4512 ir_type *const call_tp = be_Call_get_type(node);
4513 int old_no_pic_adjust;
4515 /* Run the x87 simulator if the call returns a float value */
4516 if (get_method_n_ress(call_tp) > 0) {
4517 ir_type *const res_type = get_method_res_type(call_tp, 0);
4518 ir_mode *const res_mode = get_type_mode(res_type);
4520 if (res_mode != NULL && mode_is_float(res_mode)) {
4521 env_cg->do_x87_sim = 1;
4525 /* We do not want be_Call direct calls */
4526 assert(be_Call_get_entity(node) == NULL);
4528 /* special case for PIC trampoline calls */
4529 old_no_pic_adjust = no_pic_adjust;
4530 no_pic_adjust = env_cg->birg->main_env->options->pic;
4532 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4533 match_am | match_immediate);
4535 no_pic_adjust = old_no_pic_adjust;
4537 i = get_irn_arity(node) - 1;
4538 fpcw = be_transform_node(get_irn_n(node, i--));
4539 for (; i >= be_pos_Call_first_arg; --i) {
4540 arch_register_req_t const *const req = arch_get_register_req(node, i);
4541 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4543 assert(req->type == arch_register_req_type_limited);
4544 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4546 switch (*req->limited) {
4547 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4548 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4549 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4550 default: panic("Invalid GP register for register parameter");
4554 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4555 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4556 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4557 set_am_attributes(call, &am);
4558 call = fix_mem_proj(call, &am);
4560 if (get_irn_pinned(node) == op_pin_state_pinned)
4561 set_irn_pinned(call, op_pin_state_pinned);
4563 SET_IA32_ORIG_NODE(call, node);
4565 if (ia32_cg_config.use_sse2) {
4566 /* remember this call for post-processing */
4567 ARR_APP1(ir_node *, call_list, call);
4568 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4575 * Transform Builtin trap
4577 static ir_node *gen_trap(ir_node *node) {
4578 dbg_info *dbgi = get_irn_dbg_info(node);
4579 ir_node *block = be_transform_node(get_nodes_block(node));
4580 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4582 return new_bd_ia32_UD2(dbgi, block, mem);
4586 * Transform Builtin debugbreak
4588 static ir_node *gen_debugbreak(ir_node *node) {
4589 dbg_info *dbgi = get_irn_dbg_info(node);
4590 ir_node *block = be_transform_node(get_nodes_block(node));
4591 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4593 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4597 * Transform Builtin return_address
4599 static ir_node *gen_return_address(ir_node *node) {
4600 ir_node *param = get_Builtin_param(node, 0);
4601 ir_node *frame = get_Builtin_param(node, 1);
4602 dbg_info *dbgi = get_irn_dbg_info(node);
4603 tarval *tv = get_Const_tarval(param);
4604 unsigned long value = get_tarval_long(tv);
4606 ir_node *block = be_transform_node(get_nodes_block(node));
4607 ir_node *ptr = be_transform_node(frame);
4611 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4612 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4613 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4616 /* load the return address from this frame */
4617 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4619 set_irn_pinned(load, get_irn_pinned(node));
4620 set_ia32_op_type(load, ia32_AddrModeS);
4621 set_ia32_ls_mode(load, mode_Iu);
4623 set_ia32_am_offs_int(load, 0);
4624 set_ia32_use_frame(load);
4625 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4627 if (get_irn_pinned(node) == op_pin_state_floats) {
4628 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4629 && pn_ia32_vfld_res == pn_ia32_Load_res
4630 && pn_ia32_Load_res == pn_ia32_res);
4631 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4634 SET_IA32_ORIG_NODE(load, node);
4635 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4639 * Transform Builtin frame_address
4641 static ir_node *gen_frame_address(ir_node *node) {
4642 ir_node *param = get_Builtin_param(node, 0);
4643 ir_node *frame = get_Builtin_param(node, 1);
4644 dbg_info *dbgi = get_irn_dbg_info(node);
4645 tarval *tv = get_Const_tarval(param);
4646 unsigned long value = get_tarval_long(tv);
4648 ir_node *block = be_transform_node(get_nodes_block(node));
4649 ir_node *ptr = be_transform_node(frame);
4654 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4655 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4656 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4659 /* load the frame address from this frame */
4660 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4662 set_irn_pinned(load, get_irn_pinned(node));
4663 set_ia32_op_type(load, ia32_AddrModeS);
4664 set_ia32_ls_mode(load, mode_Iu);
4666 ent = ia32_get_frame_address_entity();
4668 set_ia32_am_offs_int(load, 0);
4669 set_ia32_use_frame(load);
4670 set_ia32_frame_ent(load, ent);
4672 /* will fail anyway, but gcc does this: */
4673 set_ia32_am_offs_int(load, 0);
4676 if (get_irn_pinned(node) == op_pin_state_floats) {
4677 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4678 && pn_ia32_vfld_res == pn_ia32_Load_res
4679 && pn_ia32_Load_res == pn_ia32_res);
4680 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4683 SET_IA32_ORIG_NODE(load, node);
4684 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4688 * Transform Builtin frame_address
4690 static ir_node *gen_prefetch(ir_node *node) {
4692 ir_node *ptr, *block, *mem, *base, *index;
4693 ir_node *param, *new_node;
4696 ia32_address_t addr;
4698 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4699 /* no prefetch at all, route memory */
4700 return be_transform_node(get_Builtin_mem(node));
4703 param = get_Builtin_param(node, 1);
4704 tv = get_Const_tarval(param);
4705 rw = get_tarval_long(tv);
4707 /* construct load address */
4708 memset(&addr, 0, sizeof(addr));
4709 ptr = get_Builtin_param(node, 0);
4710 ia32_create_address_mode(&addr, ptr, 0);
4717 base = be_transform_node(base);
4720 if (index == NULL) {
4723 index = be_transform_node(index);
4726 dbgi = get_irn_dbg_info(node);
4727 block = be_transform_node(get_nodes_block(node));
4728 mem = be_transform_node(get_Builtin_mem(node));
4730 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4731 /* we have 3DNow!, this was already checked above */
4732 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4733 } else if (ia32_cg_config.use_sse_prefetch) {
4734 /* note: rw == 1 is IGNORED in that case */
4735 param = get_Builtin_param(node, 2);
4736 tv = get_Const_tarval(param);
4737 locality = get_tarval_long(tv);
4739 /* SSE style prefetch */
4742 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4745 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4748 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4751 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4755 assert(ia32_cg_config.use_3dnow_prefetch);
4756 /* 3DNow! style prefetch */
4757 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4760 set_irn_pinned(new_node, get_irn_pinned(node));
4761 set_ia32_op_type(new_node, ia32_AddrModeS);
4762 set_ia32_ls_mode(new_node, mode_Bu);
4763 set_address(new_node, &addr);
4765 SET_IA32_ORIG_NODE(new_node, node);
4767 be_dep_on_frame(new_node);
4768 return new_r_Proj(block, new_node, mode_M, pn_ia32_Prefetch_M);
4772 * Transform bsf like node
4774 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4776 ir_node *param = get_Builtin_param(node, 0);
4777 dbg_info *dbgi = get_irn_dbg_info(node);
4779 ir_node *block = get_nodes_block(node);
4780 ir_node *new_block = be_transform_node(block);
4782 ia32_address_mode_t am;
4783 ia32_address_t *addr = &am.addr;
4786 match_arguments(&am, block, NULL, param, NULL, match_am);
4788 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4789 set_am_attributes(cnt, &am);
4790 set_ia32_ls_mode(cnt, get_irn_mode(param));
4792 SET_IA32_ORIG_NODE(cnt, node);
4793 return fix_mem_proj(cnt, &am);
4797 * Transform builtin ffs.
4799 static ir_node *gen_ffs(ir_node *node)
4801 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4802 ir_node *real = skip_Proj(bsf);
4803 dbg_info *dbgi = get_irn_dbg_info(real);
4804 ir_node *block = get_nodes_block(real);
4805 ir_node *flag, *set, *conv, *neg, *or;
4808 if (get_irn_mode(real) != mode_T) {
4809 set_irn_mode(real, mode_T);
4810 bsf = new_r_Proj(block, real, mode_Iu, pn_ia32_res);
4813 flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
4816 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
4817 SET_IA32_ORIG_NODE(set, node);
4820 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4821 SET_IA32_ORIG_NODE(conv, node);
4824 neg = new_bd_ia32_Neg(dbgi, block, conv);
4827 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4828 set_ia32_commutative(or);
4831 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4835 * Transform builtin clz.
4837 static ir_node *gen_clz(ir_node *node)
4839 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4840 ir_node *real = skip_Proj(bsr);
4841 dbg_info *dbgi = get_irn_dbg_info(real);
4842 ir_node *block = get_nodes_block(real);
4843 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4845 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4849 * Transform builtin ctz.
4851 static ir_node *gen_ctz(ir_node *node)
4853 return gen_unop_AM(node, new_bd_ia32_Bsf);
4857 * Transform builtin parity.
4859 static ir_node *gen_parity(ir_node *node)
4861 ir_node *param = get_Builtin_param(node, 0);
4862 dbg_info *dbgi = get_irn_dbg_info(node);
4864 ir_node *block = get_nodes_block(node);
4866 ir_node *new_block = be_transform_node(block);
4867 ir_node *imm, *cmp, *new_node;
4869 ia32_address_mode_t am;
4870 ia32_address_t *addr = &am.addr;
4874 match_arguments(&am, block, NULL, param, NULL, match_am);
4875 imm = ia32_create_Immediate(NULL, 0, 0);
4876 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4877 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4878 set_am_attributes(cmp, &am);
4879 set_ia32_ls_mode(cmp, mode_Iu);
4881 SET_IA32_ORIG_NODE(cmp, node);
4883 cmp = fix_mem_proj(cmp, &am);
4886 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
4887 SET_IA32_ORIG_NODE(new_node, node);
4890 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4891 nomem, new_node, mode_Bu);
4892 SET_IA32_ORIG_NODE(new_node, node);
4897 * Transform builtin popcount
4899 static ir_node *gen_popcount(ir_node *node) {
4900 ir_node *param = get_Builtin_param(node, 0);
4901 dbg_info *dbgi = get_irn_dbg_info(node);
4903 ir_node *block = get_nodes_block(node);
4904 ir_node *new_block = be_transform_node(block);
4907 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4909 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4910 if (ia32_cg_config.use_popcnt) {
4911 ia32_address_mode_t am;
4912 ia32_address_t *addr = &am.addr;
4915 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4917 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4918 set_am_attributes(cnt, &am);
4919 set_ia32_ls_mode(cnt, get_irn_mode(param));
4921 SET_IA32_ORIG_NODE(cnt, node);
4922 return fix_mem_proj(cnt, &am);
4925 new_param = be_transform_node(param);
4927 /* do the standard popcount algo */
4929 /* m1 = x & 0x55555555 */
4930 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4931 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4934 simm = ia32_create_Immediate(NULL, 0, 1);
4935 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4937 /* m2 = s1 & 0x55555555 */
4938 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4941 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4943 /* m4 = m3 & 0x33333333 */
4944 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4945 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4948 simm = ia32_create_Immediate(NULL, 0, 2);
4949 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4951 /* m5 = s2 & 0x33333333 */
4952 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4955 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4957 /* m7 = m6 & 0x0F0F0F0F */
4958 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4959 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4962 simm = ia32_create_Immediate(NULL, 0, 4);
4963 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4965 /* m8 = s3 & 0x0F0F0F0F */
4966 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4969 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4971 /* m10 = m9 & 0x00FF00FF */
4972 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4973 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4976 simm = ia32_create_Immediate(NULL, 0, 8);
4977 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4979 /* m11 = s4 & 0x00FF00FF */
4980 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4982 /* m12 = m10 + m11 */
4983 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4985 /* m13 = m12 & 0x0000FFFF */
4986 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4987 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4989 /* s5 = m12 >> 16 */
4990 simm = ia32_create_Immediate(NULL, 0, 16);
4991 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4993 /* res = m13 + s5 */
4994 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4998 * Transform builtin byte swap.
5000 static ir_node *gen_bswap(ir_node *node) {
5001 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5002 dbg_info *dbgi = get_irn_dbg_info(node);
5004 ir_node *block = get_nodes_block(node);
5005 ir_node *new_block = be_transform_node(block);
5006 ir_mode *mode = get_irn_mode(param);
5007 unsigned size = get_mode_size_bits(mode);
5008 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5012 if (ia32_cg_config.use_i486) {
5013 /* swap available */
5014 return new_bd_ia32_Bswap(dbgi, new_block, param);
5016 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5017 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5019 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5020 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5022 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5024 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5025 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5027 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5028 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5031 /* swap16 always available */
5032 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5035 panic("Invalid bswap size (%d)", size);
5040 * Transform builtin outport.
5042 static ir_node *gen_outport(ir_node *node) {
5043 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5044 ir_node *oldv = get_Builtin_param(node, 1);
5045 ir_mode *mode = get_irn_mode(oldv);
5046 ir_node *value = be_transform_node(oldv);
5047 ir_node *block = be_transform_node(get_nodes_block(node));
5048 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5049 dbg_info *dbgi = get_irn_dbg_info(node);
5051 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5052 set_ia32_ls_mode(res, mode);
5057 * Transform builtin inport.
5059 static ir_node *gen_inport(ir_node *node) {
5060 ir_type *tp = get_Builtin_type(node);
5061 ir_type *rstp = get_method_res_type(tp, 0);
5062 ir_mode *mode = get_type_mode(rstp);
5063 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5064 ir_node *block = be_transform_node(get_nodes_block(node));
5065 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5066 dbg_info *dbgi = get_irn_dbg_info(node);
5068 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5069 set_ia32_ls_mode(res, mode);
5071 /* check for missing Result Proj */
5076 * Transform a builtin inner trampoline
5078 static ir_node *gen_inner_trampoline(ir_node *node) {
5079 ir_node *ptr = get_Builtin_param(node, 0);
5080 ir_node *callee = get_Builtin_param(node, 1);
5081 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5082 ir_node *mem = get_Builtin_mem(node);
5083 ir_node *block = get_nodes_block(node);
5084 ir_node *new_block = be_transform_node(block);
5088 ir_node *trampoline;
5090 dbg_info *dbgi = get_irn_dbg_info(node);
5091 ia32_address_t addr;
5093 /* construct store address */
5094 memset(&addr, 0, sizeof(addr));
5095 ia32_create_address_mode(&addr, ptr, 0);
5097 if (addr.base == NULL) {
5098 addr.base = noreg_GP;
5100 addr.base = be_transform_node(addr.base);
5103 if (addr.index == NULL) {
5104 addr.index = noreg_GP;
5106 addr.index = be_transform_node(addr.index);
5108 addr.mem = be_transform_node(mem);
5110 /* mov ecx, <env> */
5111 val = ia32_create_Immediate(NULL, 0, 0xB9);
5112 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5113 addr.index, addr.mem, val);
5114 set_irn_pinned(store, get_irn_pinned(node));
5115 set_ia32_op_type(store, ia32_AddrModeD);
5116 set_ia32_ls_mode(store, mode_Bu);
5117 set_address(store, &addr);
5121 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5122 addr.index, addr.mem, env);
5123 set_irn_pinned(store, get_irn_pinned(node));
5124 set_ia32_op_type(store, ia32_AddrModeD);
5125 set_ia32_ls_mode(store, mode_Iu);
5126 set_address(store, &addr);
5130 /* jmp rel <callee> */
5131 val = ia32_create_Immediate(NULL, 0, 0xE9);
5132 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5133 addr.index, addr.mem, val);
5134 set_irn_pinned(store, get_irn_pinned(node));
5135 set_ia32_op_type(store, ia32_AddrModeD);
5136 set_ia32_ls_mode(store, mode_Bu);
5137 set_address(store, &addr);
5141 trampoline = be_transform_node(ptr);
5143 /* the callee is typically an immediate */
5144 if (is_SymConst(callee)) {
5145 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5147 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5149 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5151 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5152 addr.index, addr.mem, rel);
5153 set_irn_pinned(store, get_irn_pinned(node));
5154 set_ia32_op_type(store, ia32_AddrModeD);
5155 set_ia32_ls_mode(store, mode_Iu);
5156 set_address(store, &addr);
5161 return new_r_Tuple(new_block, 2, in);
5165 * Transform Builtin node.
5167 static ir_node *gen_Builtin(ir_node *node) {
5168 ir_builtin_kind kind = get_Builtin_kind(node);
5172 return gen_trap(node);
5173 case ir_bk_debugbreak:
5174 return gen_debugbreak(node);
5175 case ir_bk_return_address:
5176 return gen_return_address(node);
5177 case ir_bk_frame_address:
5178 return gen_frame_address(node);
5179 case ir_bk_prefetch:
5180 return gen_prefetch(node);
5182 return gen_ffs(node);
5184 return gen_clz(node);
5186 return gen_ctz(node);
5188 return gen_parity(node);
5189 case ir_bk_popcount:
5190 return gen_popcount(node);
5192 return gen_bswap(node);
5194 return gen_outport(node);
5196 return gen_inport(node);
5197 case ir_bk_inner_trampoline:
5198 return gen_inner_trampoline(node);
5200 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5204 * Transform Proj(Builtin) node.
5206 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5207 ir_node *node = get_Proj_pred(proj);
5208 ir_node *new_node = be_transform_node(node);
5209 ir_builtin_kind kind = get_Builtin_kind(node);
5212 case ir_bk_return_address:
5213 case ir_bk_frame_address:
5218 case ir_bk_popcount:
5220 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5223 case ir_bk_debugbreak:
5224 case ir_bk_prefetch:
5226 assert(get_Proj_proj(proj) == pn_Builtin_M);
5229 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5230 return new_r_Proj(get_nodes_block(new_node),
5231 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5233 assert(get_Proj_proj(proj) == pn_Builtin_M);
5234 return new_r_Proj(get_nodes_block(new_node),
5235 new_node, mode_M, pn_ia32_Inport_M);
5237 case ir_bk_inner_trampoline:
5238 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5239 return get_Tuple_pred(new_node, 1);
5241 assert(get_Proj_proj(proj) == pn_Builtin_M);
5242 return get_Tuple_pred(new_node, 0);
5245 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5248 static ir_node *gen_be_IncSP(ir_node *node)
5250 ir_node *res = be_duplicate_node(node);
5251 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5257 * Transform the Projs from a be_Call.
5259 static ir_node *gen_Proj_be_Call(ir_node *node)
5261 ir_node *block = be_transform_node(get_nodes_block(node));
5262 ir_node *call = get_Proj_pred(node);
5263 ir_node *new_call = be_transform_node(call);
5264 dbg_info *dbgi = get_irn_dbg_info(node);
5265 long proj = get_Proj_proj(node);
5266 ir_mode *mode = get_irn_mode(node);
5269 if (proj == pn_be_Call_M_regular) {
5270 return new_rd_Proj(dbgi, block, new_call, mode_M, n_ia32_Call_mem);
5272 /* transform call modes */
5273 if (mode_is_data(mode)) {
5274 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5278 /* Map from be_Call to ia32_Call proj number */
5279 if (proj == pn_be_Call_sp) {
5280 proj = pn_ia32_Call_stack;
5281 } else if (proj == pn_be_Call_M_regular) {
5282 proj = pn_ia32_Call_M;
5284 arch_register_req_t const *const req = arch_get_register_req_out(node);
5285 int const n_outs = arch_irn_get_n_outs(new_call);
5288 assert(proj >= pn_be_Call_first_res);
5289 assert(req->type & arch_register_req_type_limited);
5291 for (i = 0; i < n_outs; ++i) {
5292 arch_register_req_t const *const new_req
5293 = arch_get_out_register_req(new_call, i);
5295 if (!(new_req->type & arch_register_req_type_limited) ||
5296 new_req->cls != req->cls ||
5297 *new_req->limited != *req->limited)
5306 res = new_rd_Proj(dbgi, block, new_call, mode, proj);
5308 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5310 case pn_ia32_Call_stack:
5311 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5314 case pn_ia32_Call_fpcw:
5315 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5323 * Transform the Projs from a Cmp.
5325 static ir_node *gen_Proj_Cmp(ir_node *node)
5327 /* this probably means not all mode_b nodes were lowered... */
5328 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5333 * Transform the Projs from a Bound.
5335 static ir_node *gen_Proj_Bound(ir_node *node)
5337 ir_node *new_node, *block;
5338 ir_node *pred = get_Proj_pred(node);
5340 switch (get_Proj_proj(node)) {
5342 return be_transform_node(get_Bound_mem(pred));
5343 case pn_Bound_X_regular:
5344 new_node = be_transform_node(pred);
5345 block = get_nodes_block(new_node);
5346 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_true);
5347 case pn_Bound_X_except:
5348 new_node = be_transform_node(pred);
5349 block = get_nodes_block(new_node);
5350 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_false);
5352 return be_transform_node(get_Bound_index(pred));
5354 panic("unsupported Proj from Bound");
5358 static ir_node *gen_Proj_ASM(ir_node *node)
5360 ir_mode *mode = get_irn_mode(node);
5361 ir_node *pred = get_Proj_pred(node);
5362 ir_node *new_pred = be_transform_node(pred);
5363 ir_node *block = get_nodes_block(new_pred);
5364 long pos = get_Proj_proj(node);
5366 if (mode == mode_M) {
5367 pos = arch_irn_get_n_outs(new_pred)-1;
5368 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5370 } else if (mode_is_float(mode)) {
5373 panic("unexpected proj mode at ASM");
5376 return new_r_Proj(block, new_pred, mode, pos);
5380 * Transform and potentially renumber Proj nodes.
5382 static ir_node *gen_Proj(ir_node *node)
5384 ir_node *pred = get_Proj_pred(node);
5387 switch (get_irn_opcode(pred)) {
5389 proj = get_Proj_proj(node);
5390 if (proj == pn_Store_M) {
5391 return be_transform_node(pred);
5393 panic("No idea how to transform proj->Store");
5396 return gen_Proj_Load(node);
5398 return gen_Proj_ASM(node);
5400 return gen_Proj_Builtin(node);
5404 return gen_Proj_DivMod(node);
5406 return gen_Proj_CopyB(node);
5408 return gen_Proj_Quot(node);
5410 return gen_Proj_be_SubSP(node);
5412 return gen_Proj_be_AddSP(node);
5414 return gen_Proj_be_Call(node);
5416 return gen_Proj_Cmp(node);
5418 return gen_Proj_Bound(node);
5420 proj = get_Proj_proj(node);
5422 case pn_Start_X_initial_exec: {
5423 ir_node *block = get_nodes_block(pred);
5424 ir_node *new_block = be_transform_node(block);
5425 dbg_info *dbgi = get_irn_dbg_info(node);
5426 /* we exchange the ProjX with a jump */
5427 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5432 case pn_Start_P_tls:
5433 return gen_Proj_tls(node);
5438 if (is_ia32_l_FloattoLL(pred)) {
5439 return gen_Proj_l_FloattoLL(node);
5441 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5445 ir_mode *mode = get_irn_mode(node);
5446 if (ia32_mode_needs_gp_reg(mode)) {
5447 ir_node *new_pred = be_transform_node(pred);
5448 ir_node *block = be_transform_node(get_nodes_block(node));
5449 ir_node *new_proj = new_r_Proj(block, new_pred,
5450 mode_Iu, get_Proj_proj(node));
5451 new_proj->node_nr = node->node_nr;
5456 return be_duplicate_node(node);
5460 * Enters all transform functions into the generic pointer
5462 static void register_transformers(void)
5464 /* first clear the generic function pointer for all ops */
5465 clear_irp_opcodes_generic_func();
5467 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5468 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5508 /* transform ops from intrinsic lowering */
5520 GEN(ia32_l_LLtoFloat);
5521 GEN(ia32_l_FloattoLL);
5527 /* we should never see these nodes */
5542 /* handle builtins */
5545 /* handle generic backend nodes */
5559 * Pre-transform all unknown and noreg nodes.
5561 static void ia32_pretransform_node(void)
5563 ia32_code_gen_t *cg = env_cg;
5565 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5566 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5567 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5568 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5569 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5570 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5572 nomem = get_irg_no_mem(current_ir_graph);
5573 noreg_GP = ia32_new_NoReg_gp(cg);
5579 * Walker, checks if all ia32 nodes producing more than one result have their
5580 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5582 static void add_missing_keep_walker(ir_node *node, void *data)
5585 unsigned found_projs = 0;
5586 const ir_edge_t *edge;
5587 ir_mode *mode = get_irn_mode(node);
5592 if (!is_ia32_irn(node))
5595 n_outs = arch_irn_get_n_outs(node);
5598 if (is_ia32_SwitchJmp(node))
5601 assert(n_outs < (int) sizeof(unsigned) * 8);
5602 foreach_out_edge(node, edge) {
5603 ir_node *proj = get_edge_src_irn(edge);
5606 /* The node could be kept */
5610 if (get_irn_mode(proj) == mode_M)
5613 pn = get_Proj_proj(proj);
5614 assert(pn < n_outs);
5615 found_projs |= 1 << pn;
5619 /* are keeps missing? */
5621 for (i = 0; i < n_outs; ++i) {
5624 const arch_register_req_t *req;
5625 const arch_register_class_t *cls;
5627 if (found_projs & (1 << i)) {
5631 req = arch_get_out_register_req(node, i);
5636 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5640 block = get_nodes_block(node);
5641 in[0] = new_r_Proj(block, node, arch_register_class_mode(cls), i);
5642 if (last_keep != NULL) {
5643 be_Keep_add_node(last_keep, cls, in[0]);
5645 last_keep = be_new_Keep(block, 1, in);
5646 if (sched_is_scheduled(node)) {
5647 sched_add_after(node, last_keep);
5654 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5657 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5659 ir_graph *irg = be_get_birg_irg(cg->birg);
5660 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5664 * Post-process all calls if we are in SSE mode.
5665 * The ABI requires that the results are in st0, copy them
5666 * to a xmm register.
5668 static void postprocess_fp_call_results(void) {
5671 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5672 ir_node *call = call_list[i];
5673 ir_type *mtp = call_types[i];
5676 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5677 ir_type *res_tp = get_method_res_type(mtp, j);
5678 ir_node *res, *new_res;
5679 const ir_edge_t *edge, *next;
5682 if (! is_atomic_type(res_tp)) {
5683 /* no floating point return */
5686 mode = get_type_mode(res_tp);
5687 if (! mode_is_float(mode)) {
5688 /* no floating point return */
5692 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5695 /* now patch the users */
5696 foreach_out_edge_safe(res, edge, next) {
5697 ir_node *succ = get_edge_src_irn(edge);
5700 if (be_is_Keep(succ))
5703 if (is_ia32_xStore(succ)) {
5704 /* an xStore can be patched into an vfst */
5705 dbg_info *db = get_irn_dbg_info(succ);
5706 ir_node *block = get_nodes_block(succ);
5707 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5708 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5709 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5710 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5711 ir_mode *mode = get_ia32_ls_mode(succ);
5713 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5714 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5715 if (is_ia32_use_frame(succ))
5716 set_ia32_use_frame(st);
5717 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5718 set_irn_pinned(st, get_irn_pinned(succ));
5719 set_ia32_op_type(st, ia32_AddrModeD);
5723 if (new_res == NULL) {
5724 dbg_info *db = get_irn_dbg_info(call);
5725 ir_node *block = get_nodes_block(call);
5726 ir_node *frame = get_irg_frame(current_ir_graph);
5727 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5728 ir_node *call_mem = new_r_Proj(block, call, mode_M, pn_ia32_Call_M);
5729 ir_node *vfst, *xld, *new_mem;
5731 /* store st(0) on stack */
5732 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5733 set_ia32_op_type(vfst, ia32_AddrModeD);
5734 set_ia32_use_frame(vfst);
5736 /* load into SSE register */
5737 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5738 set_ia32_op_type(xld, ia32_AddrModeS);
5739 set_ia32_use_frame(xld);
5741 new_res = new_r_Proj(block, xld, mode, pn_ia32_xLoad_res);
5742 new_mem = new_r_Proj(block, xld, mode_M, pn_ia32_xLoad_M);
5744 if (old_mem != NULL) {
5745 edges_reroute(old_mem, new_mem, current_ir_graph);
5749 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5756 /* do the transformation */
5757 void ia32_transform_graph(ia32_code_gen_t *cg)
5761 register_transformers();
5763 initial_fpcw = NULL;
5766 be_timer_push(T_HEIGHTS);
5767 heights = heights_new(cg->irg);
5768 be_timer_pop(T_HEIGHTS);
5769 ia32_calculate_non_address_mode_nodes(cg->birg);
5771 /* the transform phase is not safe for CSE (yet) because several nodes get
5772 * attributes set after their creation */
5773 cse_last = get_opt_cse();
5776 call_list = NEW_ARR_F(ir_node *, 0);
5777 call_types = NEW_ARR_F(ir_type *, 0);
5778 be_transform_graph(cg->birg, ia32_pretransform_node);
5780 if (ia32_cg_config.use_sse2)
5781 postprocess_fp_call_results();
5782 DEL_ARR_F(call_types);
5783 DEL_ARR_F(call_list);
5785 set_opt_cse(cse_last);
5787 ia32_free_non_address_mode_nodes();
5788 heights_free(heights);
5792 void ia32_init_transform(void)
5794 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");