2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * return NoREG or pic_base in case of PIC.
200 * This is necessary as base address for newly created symbols
202 static ir_node *get_symconst_base(void)
204 if (env_cg->birg->main_env->options->pic) {
205 return arch_code_generator_get_pic_base(env_cg);
212 * Transforms a Const.
214 static ir_node *gen_Const(ir_node *node)
216 ir_node *old_block = get_nodes_block(node);
217 ir_node *block = be_transform_node(old_block);
218 dbg_info *dbgi = get_irn_dbg_info(node);
219 ir_mode *mode = get_irn_mode(node);
221 assert(is_Const(node));
223 if (mode_is_float(mode)) {
229 if (ia32_cg_config.use_sse2) {
230 tarval *tv = get_Const_tarval(node);
231 if (tarval_is_null(tv)) {
232 load = new_bd_ia32_xZero(dbgi, block);
233 set_ia32_ls_mode(load, mode);
235 #ifdef CONSTRUCT_SSE_CONST
236 } else if (tarval_is_one(tv)) {
237 int cnst = mode == mode_F ? 26 : 55;
238 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
239 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
240 ir_node *pslld, *psrld;
242 load = new_bd_ia32_xAllOnes(dbgi, block);
243 set_ia32_ls_mode(load, mode);
244 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
245 set_ia32_ls_mode(pslld, mode);
246 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
247 set_ia32_ls_mode(psrld, mode);
249 #endif /* CONSTRUCT_SSE_CONST */
250 } else if (mode == mode_F) {
251 /* we can place any 32bit constant by using a movd gp, sse */
252 unsigned val = get_tarval_sub_bits(tv, 0) |
253 (get_tarval_sub_bits(tv, 1) << 8) |
254 (get_tarval_sub_bits(tv, 2) << 16) |
255 (get_tarval_sub_bits(tv, 3) << 24);
256 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
257 load = new_bd_ia32_xMovd(dbgi, block, cnst);
258 set_ia32_ls_mode(load, mode);
261 #ifdef CONSTRUCT_SSE_CONST
262 if (mode == mode_D) {
263 unsigned val = get_tarval_sub_bits(tv, 0) |
264 (get_tarval_sub_bits(tv, 1) << 8) |
265 (get_tarval_sub_bits(tv, 2) << 16) |
266 (get_tarval_sub_bits(tv, 3) << 24);
268 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
269 ir_node *cnst, *psllq;
271 /* fine, lower 32bit are zero, produce 32bit value */
272 val = get_tarval_sub_bits(tv, 4) |
273 (get_tarval_sub_bits(tv, 5) << 8) |
274 (get_tarval_sub_bits(tv, 6) << 16) |
275 (get_tarval_sub_bits(tv, 7) << 24);
276 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
277 load = new_bd_ia32_xMovd(dbgi, block, cnst);
278 set_ia32_ls_mode(load, mode);
279 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
280 set_ia32_ls_mode(psllq, mode);
285 #endif /* CONSTRUCT_SSE_CONST */
286 floatent = create_float_const_entity(node);
288 base = get_symconst_base();
289 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
291 set_ia32_op_type(load, ia32_AddrModeS);
292 set_ia32_am_sc(load, floatent);
293 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
294 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
297 if (is_Const_null(node)) {
298 load = new_bd_ia32_vfldz(dbgi, block);
300 set_ia32_ls_mode(load, mode);
301 } else if (is_Const_one(node)) {
302 load = new_bd_ia32_vfld1(dbgi, block);
304 set_ia32_ls_mode(load, mode);
309 floatent = create_float_const_entity(node);
310 /* create_float_const_ent is smart and sometimes creates
312 ls_mode = get_type_mode(get_entity_type(floatent));
313 base = get_symconst_base();
314 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
316 set_ia32_op_type(load, ia32_AddrModeS);
317 set_ia32_am_sc(load, floatent);
318 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
319 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
322 #ifdef CONSTRUCT_SSE_CONST
324 #endif /* CONSTRUCT_SSE_CONST */
325 SET_IA32_ORIG_NODE(load, node);
327 be_dep_on_frame(load);
329 } else { /* non-float mode */
331 tarval *tv = get_Const_tarval(node);
334 tv = tarval_convert_to(tv, mode_Iu);
336 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
338 panic("couldn't convert constant tarval (%+F)", node);
340 val = get_tarval_long(tv);
342 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
343 SET_IA32_ORIG_NODE(cnst, node);
345 be_dep_on_frame(cnst);
351 * Transforms a SymConst.
353 static ir_node *gen_SymConst(ir_node *node)
355 ir_node *old_block = get_nodes_block(node);
356 ir_node *block = be_transform_node(old_block);
357 dbg_info *dbgi = get_irn_dbg_info(node);
358 ir_mode *mode = get_irn_mode(node);
361 if (mode_is_float(mode)) {
362 if (ia32_cg_config.use_sse2)
363 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
366 set_ia32_am_sc(cnst, get_SymConst_entity(node));
367 set_ia32_use_frame(cnst);
371 if (get_SymConst_kind(node) != symconst_addr_ent) {
372 panic("backend only support symconst_addr_ent (at %+F)", node);
374 entity = get_SymConst_entity(node);
375 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
378 SET_IA32_ORIG_NODE(cnst, node);
380 be_dep_on_frame(cnst);
385 * Create a float type for the given mode and cache it.
387 * @param mode the mode for the float type (might be integer mode for SSE2 types)
388 * @param align alignment
390 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
396 if (mode == mode_Iu) {
397 static ir_type *int_Iu[16] = {NULL, };
399 if (int_Iu[align] == NULL) {
400 int_Iu[align] = tp = new_type_primitive(mode);
401 /* set the specified alignment */
402 set_type_alignment_bytes(tp, align);
404 return int_Iu[align];
405 } else if (mode == mode_Lu) {
406 static ir_type *int_Lu[16] = {NULL, };
408 if (int_Lu[align] == NULL) {
409 int_Lu[align] = tp = new_type_primitive(mode);
410 /* set the specified alignment */
411 set_type_alignment_bytes(tp, align);
413 return int_Lu[align];
414 } else if (mode == mode_F) {
415 static ir_type *float_F[16] = {NULL, };
417 if (float_F[align] == NULL) {
418 float_F[align] = tp = new_type_primitive(mode);
419 /* set the specified alignment */
420 set_type_alignment_bytes(tp, align);
422 return float_F[align];
423 } else if (mode == mode_D) {
424 static ir_type *float_D[16] = {NULL, };
426 if (float_D[align] == NULL) {
427 float_D[align] = tp = new_type_primitive(mode);
428 /* set the specified alignment */
429 set_type_alignment_bytes(tp, align);
431 return float_D[align];
433 static ir_type *float_E[16] = {NULL, };
435 if (float_E[align] == NULL) {
436 float_E[align] = tp = new_type_primitive(mode);
437 /* set the specified alignment */
438 set_type_alignment_bytes(tp, align);
440 return float_E[align];
445 * Create a float[2] array type for the given atomic type.
447 * @param tp the atomic type
449 static ir_type *ia32_create_float_array(ir_type *tp)
451 ir_mode *mode = get_type_mode(tp);
452 unsigned align = get_type_alignment_bytes(tp);
457 if (mode == mode_F) {
458 static ir_type *float_F[16] = {NULL, };
460 if (float_F[align] != NULL)
461 return float_F[align];
462 arr = float_F[align] = new_type_array(1, tp);
463 } else if (mode == mode_D) {
464 static ir_type *float_D[16] = {NULL, };
466 if (float_D[align] != NULL)
467 return float_D[align];
468 arr = float_D[align] = new_type_array(1, tp);
470 static ir_type *float_E[16] = {NULL, };
472 if (float_E[align] != NULL)
473 return float_E[align];
474 arr = float_E[align] = new_type_array(1, tp);
476 set_type_alignment_bytes(arr, align);
477 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
478 set_type_state(arr, layout_fixed);
482 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
483 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
485 static const struct {
486 const char *ent_name;
487 const char *cnst_str;
490 } names [ia32_known_const_max] = {
491 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
492 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
493 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
494 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
495 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
497 static ir_entity *ent_cache[ia32_known_const_max];
499 const char *ent_name, *cnst_str;
505 ent_name = names[kct].ent_name;
506 if (! ent_cache[kct]) {
507 cnst_str = names[kct].cnst_str;
509 switch (names[kct].mode) {
510 case 0: mode = mode_Iu; break;
511 case 1: mode = mode_Lu; break;
512 default: mode = mode_F; break;
514 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
515 tp = ia32_create_float_type(mode, names[kct].align);
517 if (kct == ia32_ULLBIAS)
518 tp = ia32_create_float_array(tp);
519 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
521 set_entity_ld_ident(ent, get_entity_ident(ent));
522 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
523 set_entity_visibility(ent, ir_visibility_local);
525 if (kct == ia32_ULLBIAS) {
526 ir_initializer_t *initializer = create_initializer_compound(2);
528 set_initializer_compound_value(initializer, 0,
529 create_initializer_tarval(get_mode_null(mode)));
530 set_initializer_compound_value(initializer, 1,
531 create_initializer_tarval(tv));
533 set_entity_initializer(ent, initializer);
535 set_entity_initializer(ent, create_initializer_tarval(tv));
538 /* cache the entry */
539 ent_cache[kct] = ent;
542 return ent_cache[kct];
546 * return true if the node is a Proj(Load) and could be used in source address
547 * mode for another node. Will return only true if the @p other node is not
548 * dependent on the memory of the Load (for binary operations use the other
549 * input here, for unary operations use NULL).
551 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
552 ir_node *other, ir_node *other2, match_flags_t flags)
557 /* float constants are always available */
558 if (is_Const(node)) {
559 ir_mode *mode = get_irn_mode(node);
560 if (mode_is_float(mode)) {
561 if (ia32_cg_config.use_sse2) {
562 if (is_simple_sse_Const(node))
565 if (is_simple_x87_Const(node))
568 if (get_irn_n_edges(node) > 1)
576 load = get_Proj_pred(node);
577 pn = get_Proj_proj(node);
578 if (!is_Load(load) || pn != pn_Load_res)
580 if (get_nodes_block(load) != block)
582 /* we only use address mode if we're the only user of the load */
583 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
585 /* in some edge cases with address mode we might reach the load normally
586 * and through some AM sequence, if it is already materialized then we
587 * can't create an AM node from it */
588 if (be_is_transformed(node))
591 /* don't do AM if other node inputs depend on the load (via mem-proj) */
592 if (other != NULL && prevents_AM(block, load, other))
595 if (other2 != NULL && prevents_AM(block, load, other2))
601 typedef struct ia32_address_mode_t ia32_address_mode_t;
602 struct ia32_address_mode_t {
607 ia32_op_type_t op_type;
611 unsigned commutative : 1;
612 unsigned ins_permuted : 1;
615 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
617 /* construct load address */
618 memset(addr, 0, sizeof(addr[0]));
619 ia32_create_address_mode(addr, ptr, 0);
621 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
622 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
623 addr->mem = be_transform_node(mem);
626 static void build_address(ia32_address_mode_t *am, ir_node *node,
627 ia32_create_am_flags_t flags)
629 ia32_address_t *addr = &am->addr;
635 /* floating point immediates */
636 if (is_Const(node)) {
637 ir_entity *entity = create_float_const_entity(node);
638 addr->base = get_symconst_base();
639 addr->index = noreg_GP;
641 addr->symconst_ent = entity;
643 am->ls_mode = get_type_mode(get_entity_type(entity));
644 am->pinned = op_pin_state_floats;
648 load = get_Proj_pred(node);
649 ptr = get_Load_ptr(load);
650 mem = get_Load_mem(load);
651 new_mem = be_transform_node(mem);
652 am->pinned = get_irn_pinned(load);
653 am->ls_mode = get_Load_mode(load);
654 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
657 /* construct load address */
658 ia32_create_address_mode(addr, ptr, flags);
660 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
661 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
665 static void set_address(ir_node *node, const ia32_address_t *addr)
667 set_ia32_am_scale(node, addr->scale);
668 set_ia32_am_sc(node, addr->symconst_ent);
669 set_ia32_am_offs_int(node, addr->offset);
670 if (addr->symconst_sign)
671 set_ia32_am_sc_sign(node);
673 set_ia32_use_frame(node);
674 set_ia32_frame_ent(node, addr->frame_entity);
678 * Apply attributes of a given address mode to a node.
680 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
682 set_address(node, &am->addr);
684 set_ia32_op_type(node, am->op_type);
685 set_ia32_ls_mode(node, am->ls_mode);
686 if (am->pinned == op_pin_state_pinned) {
687 /* beware: some nodes are already pinned and did not allow to change the state */
688 if (get_irn_pinned(node) != op_pin_state_pinned)
689 set_irn_pinned(node, op_pin_state_pinned);
692 set_ia32_commutative(node);
696 * Check, if a given node is a Down-Conv, ie. a integer Conv
697 * from a mode with a mode with more bits to a mode with lesser bits.
698 * Moreover, we return only true if the node has not more than 1 user.
700 * @param node the node
701 * @return non-zero if node is a Down-Conv
703 static int is_downconv(const ir_node *node)
711 /* we only want to skip the conv when we're the only user
712 * (not optimal but for now...)
714 if (get_irn_n_edges(node) > 1)
717 src_mode = get_irn_mode(get_Conv_op(node));
718 dest_mode = get_irn_mode(node);
720 ia32_mode_needs_gp_reg(src_mode) &&
721 ia32_mode_needs_gp_reg(dest_mode) &&
722 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
725 /* Skip all Down-Conv's on a given node and return the resulting node. */
726 ir_node *ia32_skip_downconv(ir_node *node)
728 while (is_downconv(node))
729 node = get_Conv_op(node);
734 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
736 ir_mode *mode = get_irn_mode(node);
741 if (mode_is_signed(mode)) {
746 block = get_nodes_block(node);
747 dbgi = get_irn_dbg_info(node);
749 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
753 * matches operands of a node into ia32 addressing/operand modes. This covers
754 * usage of source address mode, immediates, operations with non 32-bit modes,
756 * The resulting data is filled into the @p am struct. block is the block
757 * of the node whose arguments are matched. op1, op2 are the first and second
758 * input that are matched (op1 may be NULL). other_op is another unrelated
759 * input that is not matched! but which is needed sometimes to check if AM
760 * for op1/op2 is legal.
761 * @p flags describes the supported modes of the operation in detail.
763 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
764 ir_node *op1, ir_node *op2, ir_node *other_op,
767 ia32_address_t *addr = &am->addr;
768 ir_mode *mode = get_irn_mode(op2);
769 int mode_bits = get_mode_size_bits(mode);
770 ir_node *new_op1, *new_op2;
772 unsigned commutative;
773 int use_am_and_immediates;
776 memset(am, 0, sizeof(am[0]));
778 commutative = (flags & match_commutative) != 0;
779 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
780 use_am = (flags & match_am) != 0;
781 use_immediate = (flags & match_immediate) != 0;
782 assert(!use_am_and_immediates || use_immediate);
785 assert(!commutative || op1 != NULL);
786 assert(use_am || !(flags & match_8bit_am));
787 assert(use_am || !(flags & match_16bit_am));
789 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
790 (mode_bits == 16 && !(flags & match_16bit_am))) {
794 /* we can simply skip downconvs for mode neutral nodes: the upper bits
795 * can be random for these operations */
796 if (flags & match_mode_neutral) {
797 op2 = ia32_skip_downconv(op2);
799 op1 = ia32_skip_downconv(op1);
803 /* match immediates. firm nodes are normalized: constants are always on the
806 if (!(flags & match_try_am) && use_immediate) {
807 new_op2 = try_create_Immediate(op2, 0);
810 if (new_op2 == NULL &&
811 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
812 build_address(am, op2, 0);
813 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
814 if (mode_is_float(mode)) {
815 new_op2 = ia32_new_NoReg_vfp(env_cg);
819 am->op_type = ia32_AddrModeS;
820 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
822 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
824 build_address(am, op1, 0);
826 if (mode_is_float(mode)) {
827 noreg = ia32_new_NoReg_vfp(env_cg);
832 if (new_op2 != NULL) {
835 new_op1 = be_transform_node(op2);
837 am->ins_permuted = 1;
839 am->op_type = ia32_AddrModeS;
842 am->op_type = ia32_Normal;
844 if (flags & match_try_am) {
850 mode = get_irn_mode(op2);
851 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
852 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
854 new_op2 = create_upconv(op2, NULL);
855 am->ls_mode = mode_Iu;
857 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
859 new_op2 = be_transform_node(op2);
860 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
863 if (addr->base == NULL)
864 addr->base = noreg_GP;
865 if (addr->index == NULL)
866 addr->index = noreg_GP;
867 if (addr->mem == NULL)
870 am->new_op1 = new_op1;
871 am->new_op2 = new_op2;
872 am->commutative = commutative;
876 * "Fixes" a node that uses address mode by turning it into mode_T
877 * and returning a pn_ia32_res Proj.
879 * @param node the node
880 * @param am its address mode
882 * @return a Proj(pn_ia32_res) if a memory address mode is used,
885 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
890 if (am->mem_proj == NULL)
893 /* we have to create a mode_T so the old MemProj can attach to us */
894 mode = get_irn_mode(node);
895 load = get_Proj_pred(am->mem_proj);
897 be_set_transformed_node(load, node);
899 if (mode != mode_T) {
900 set_irn_mode(node, mode_T);
901 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
908 * Construct a standard binary operation, set AM and immediate if required.
910 * @param node The original node for which the binop is created
911 * @param op1 The first operand
912 * @param op2 The second operand
913 * @param func The node constructor function
914 * @return The constructed ia32 node.
916 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
917 construct_binop_func *func, match_flags_t flags)
920 ir_node *block, *new_block, *new_node;
921 ia32_address_mode_t am;
922 ia32_address_t *addr = &am.addr;
924 block = get_nodes_block(node);
925 match_arguments(&am, block, op1, op2, NULL, flags);
927 dbgi = get_irn_dbg_info(node);
928 new_block = be_transform_node(block);
929 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
930 am.new_op1, am.new_op2);
931 set_am_attributes(new_node, &am);
932 /* we can't use source address mode anymore when using immediates */
933 if (!(flags & match_am_and_immediates) &&
934 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
935 set_ia32_am_support(new_node, ia32_am_none);
936 SET_IA32_ORIG_NODE(new_node, node);
938 new_node = fix_mem_proj(new_node, &am);
944 * Generic names for the inputs of an ia32 binary op.
947 n_ia32_l_binop_left, /**< ia32 left input */
948 n_ia32_l_binop_right, /**< ia32 right input */
949 n_ia32_l_binop_eflags /**< ia32 eflags input */
951 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
952 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
953 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
954 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
955 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
956 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
959 * Construct a binary operation which also consumes the eflags.
961 * @param node The node to transform
962 * @param func The node constructor function
963 * @param flags The match flags
964 * @return The constructor ia32 node
966 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
969 ir_node *src_block = get_nodes_block(node);
970 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
971 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
972 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
974 ir_node *block, *new_node, *new_eflags;
975 ia32_address_mode_t am;
976 ia32_address_t *addr = &am.addr;
978 match_arguments(&am, src_block, op1, op2, eflags, flags);
980 dbgi = get_irn_dbg_info(node);
981 block = be_transform_node(src_block);
982 new_eflags = be_transform_node(eflags);
983 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
984 am.new_op1, am.new_op2, new_eflags);
985 set_am_attributes(new_node, &am);
986 /* we can't use source address mode anymore when using immediates */
987 if (!(flags & match_am_and_immediates) &&
988 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
989 set_ia32_am_support(new_node, ia32_am_none);
990 SET_IA32_ORIG_NODE(new_node, node);
992 new_node = fix_mem_proj(new_node, &am);
997 static ir_node *get_fpcw(void)
1000 if (initial_fpcw != NULL)
1001 return initial_fpcw;
1003 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
1004 &ia32_fp_cw_regs[REG_FPCW]);
1005 initial_fpcw = be_transform_node(fpcw);
1007 return initial_fpcw;
1011 * Construct a standard binary operation, set AM and immediate if required.
1013 * @param op1 The first operand
1014 * @param op2 The second operand
1015 * @param func The node constructor function
1016 * @return The constructed ia32 node.
1018 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1019 construct_binop_float_func *func)
1021 ir_mode *mode = get_irn_mode(node);
1023 ir_node *block, *new_block, *new_node;
1024 ia32_address_mode_t am;
1025 ia32_address_t *addr = &am.addr;
1026 ia32_x87_attr_t *attr;
1027 /* All operations are considered commutative, because there are reverse
1029 match_flags_t flags = match_commutative;
1031 /* happens for div nodes... */
1033 mode = get_divop_resmod(node);
1035 /* cannot use address mode with long double on x87 */
1036 if (get_mode_size_bits(mode) <= 64)
1039 block = get_nodes_block(node);
1040 match_arguments(&am, block, op1, op2, NULL, flags);
1042 dbgi = get_irn_dbg_info(node);
1043 new_block = be_transform_node(block);
1044 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1045 am.new_op1, am.new_op2, get_fpcw());
1046 set_am_attributes(new_node, &am);
1048 attr = get_ia32_x87_attr(new_node);
1049 attr->attr.data.ins_permuted = am.ins_permuted;
1051 SET_IA32_ORIG_NODE(new_node, node);
1053 new_node = fix_mem_proj(new_node, &am);
1059 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1061 * @param op1 The first operand
1062 * @param op2 The second operand
1063 * @param func The node constructor function
1064 * @return The constructed ia32 node.
1066 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1067 construct_shift_func *func,
1068 match_flags_t flags)
1071 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1073 assert(! mode_is_float(get_irn_mode(node)));
1074 assert(flags & match_immediate);
1075 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1077 if (flags & match_mode_neutral) {
1078 op1 = ia32_skip_downconv(op1);
1079 new_op1 = be_transform_node(op1);
1080 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1081 new_op1 = create_upconv(op1, node);
1083 new_op1 = be_transform_node(op1);
1086 /* the shift amount can be any mode that is bigger than 5 bits, since all
1087 * other bits are ignored anyway */
1088 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1089 ir_node *const op = get_Conv_op(op2);
1090 if (mode_is_float(get_irn_mode(op)))
1093 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1095 new_op2 = create_immediate_or_transform(op2, 0);
1097 dbgi = get_irn_dbg_info(node);
1098 block = get_nodes_block(node);
1099 new_block = be_transform_node(block);
1100 new_node = func(dbgi, new_block, new_op1, new_op2);
1101 SET_IA32_ORIG_NODE(new_node, node);
1103 /* lowered shift instruction may have a dependency operand, handle it here */
1104 if (get_irn_arity(node) == 3) {
1105 /* we have a dependency */
1106 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1107 add_irn_dep(new_node, new_dep);
1115 * Construct a standard unary operation, set AM and immediate if required.
1117 * @param op The operand
1118 * @param func The node constructor function
1119 * @return The constructed ia32 node.
1121 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1122 match_flags_t flags)
1125 ir_node *block, *new_block, *new_op, *new_node;
1127 assert(flags == 0 || flags == match_mode_neutral);
1128 if (flags & match_mode_neutral) {
1129 op = ia32_skip_downconv(op);
1132 new_op = be_transform_node(op);
1133 dbgi = get_irn_dbg_info(node);
1134 block = get_nodes_block(node);
1135 new_block = be_transform_node(block);
1136 new_node = func(dbgi, new_block, new_op);
1138 SET_IA32_ORIG_NODE(new_node, node);
1143 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1144 ia32_address_t *addr)
1146 ir_node *base, *index, *res;
1152 base = be_transform_node(base);
1155 index = addr->index;
1156 if (index == NULL) {
1159 index = be_transform_node(index);
1162 res = new_bd_ia32_Lea(dbgi, block, base, index);
1163 set_address(res, addr);
1169 * Returns non-zero if a given address mode has a symbolic or
1170 * numerical offset != 0.
1172 static int am_has_immediates(const ia32_address_t *addr)
1174 return addr->offset != 0 || addr->symconst_ent != NULL
1175 || addr->frame_entity || addr->use_frame;
1179 * Creates an ia32 Add.
1181 * @return the created ia32 Add node
1183 static ir_node *gen_Add(ir_node *node)
1185 ir_mode *mode = get_irn_mode(node);
1186 ir_node *op1 = get_Add_left(node);
1187 ir_node *op2 = get_Add_right(node);
1189 ir_node *block, *new_block, *new_node, *add_immediate_op;
1190 ia32_address_t addr;
1191 ia32_address_mode_t am;
1193 if (mode_is_float(mode)) {
1194 if (ia32_cg_config.use_sse2)
1195 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1196 match_commutative | match_am);
1198 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1201 ia32_mark_non_am(node);
1203 op2 = ia32_skip_downconv(op2);
1204 op1 = ia32_skip_downconv(op1);
1208 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1209 * 1. Add with immediate -> Lea
1210 * 2. Add with possible source address mode -> Add
1211 * 3. Otherwise -> Lea
1213 memset(&addr, 0, sizeof(addr));
1214 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1215 add_immediate_op = NULL;
1217 dbgi = get_irn_dbg_info(node);
1218 block = get_nodes_block(node);
1219 new_block = be_transform_node(block);
1222 if (addr.base == NULL && addr.index == NULL) {
1223 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1224 addr.symconst_sign, 0, addr.offset);
1225 be_dep_on_frame(new_node);
1226 SET_IA32_ORIG_NODE(new_node, node);
1229 /* add with immediate? */
1230 if (addr.index == NULL) {
1231 add_immediate_op = addr.base;
1232 } else if (addr.base == NULL && addr.scale == 0) {
1233 add_immediate_op = addr.index;
1236 if (add_immediate_op != NULL) {
1237 if (!am_has_immediates(&addr)) {
1238 #ifdef DEBUG_libfirm
1239 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1242 return be_transform_node(add_immediate_op);
1245 new_node = create_lea_from_address(dbgi, new_block, &addr);
1246 SET_IA32_ORIG_NODE(new_node, node);
1250 /* test if we can use source address mode */
1251 match_arguments(&am, block, op1, op2, NULL, match_commutative
1252 | match_mode_neutral | match_am | match_immediate | match_try_am);
1254 /* construct an Add with source address mode */
1255 if (am.op_type == ia32_AddrModeS) {
1256 ia32_address_t *am_addr = &am.addr;
1257 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1258 am_addr->index, am_addr->mem, am.new_op1,
1260 set_am_attributes(new_node, &am);
1261 SET_IA32_ORIG_NODE(new_node, node);
1263 new_node = fix_mem_proj(new_node, &am);
1268 /* otherwise construct a lea */
1269 new_node = create_lea_from_address(dbgi, new_block, &addr);
1270 SET_IA32_ORIG_NODE(new_node, node);
1275 * Creates an ia32 Mul.
1277 * @return the created ia32 Mul node
1279 static ir_node *gen_Mul(ir_node *node)
1281 ir_node *op1 = get_Mul_left(node);
1282 ir_node *op2 = get_Mul_right(node);
1283 ir_mode *mode = get_irn_mode(node);
1285 if (mode_is_float(mode)) {
1286 if (ia32_cg_config.use_sse2)
1287 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1288 match_commutative | match_am);
1290 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1292 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1293 match_commutative | match_am | match_mode_neutral |
1294 match_immediate | match_am_and_immediates);
1298 * Creates an ia32 Mulh.
1299 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1300 * this result while Mul returns the lower 32 bit.
1302 * @return the created ia32 Mulh node
1304 static ir_node *gen_Mulh(ir_node *node)
1306 dbg_info *dbgi = get_irn_dbg_info(node);
1307 ir_node *op1 = get_Mulh_left(node);
1308 ir_node *op2 = get_Mulh_right(node);
1309 ir_mode *mode = get_irn_mode(node);
1311 ir_node *proj_res_high;
1313 if (get_mode_size_bits(mode) != 32) {
1314 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1317 if (mode_is_signed(mode)) {
1318 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1319 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1321 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1322 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1324 return proj_res_high;
1328 * Creates an ia32 And.
1330 * @return The created ia32 And node
1332 static ir_node *gen_And(ir_node *node)
1334 ir_node *op1 = get_And_left(node);
1335 ir_node *op2 = get_And_right(node);
1336 assert(! mode_is_float(get_irn_mode(node)));
1338 /* is it a zero extension? */
1339 if (is_Const(op2)) {
1340 tarval *tv = get_Const_tarval(op2);
1341 long v = get_tarval_long(tv);
1343 if (v == 0xFF || v == 0xFFFF) {
1344 dbg_info *dbgi = get_irn_dbg_info(node);
1345 ir_node *block = get_nodes_block(node);
1352 assert(v == 0xFFFF);
1355 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1360 return gen_binop(node, op1, op2, new_bd_ia32_And,
1361 match_commutative | match_mode_neutral | match_am | match_immediate);
1367 * Creates an ia32 Or.
1369 * @return The created ia32 Or node
1371 static ir_node *gen_Or(ir_node *node)
1373 ir_node *op1 = get_Or_left(node);
1374 ir_node *op2 = get_Or_right(node);
1376 assert (! mode_is_float(get_irn_mode(node)));
1377 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1378 | match_mode_neutral | match_am | match_immediate);
1384 * Creates an ia32 Eor.
1386 * @return The created ia32 Eor node
1388 static ir_node *gen_Eor(ir_node *node)
1390 ir_node *op1 = get_Eor_left(node);
1391 ir_node *op2 = get_Eor_right(node);
1393 assert(! mode_is_float(get_irn_mode(node)));
1394 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1395 | match_mode_neutral | match_am | match_immediate);
1400 * Creates an ia32 Sub.
1402 * @return The created ia32 Sub node
1404 static ir_node *gen_Sub(ir_node *node)
1406 ir_node *op1 = get_Sub_left(node);
1407 ir_node *op2 = get_Sub_right(node);
1408 ir_mode *mode = get_irn_mode(node);
1410 if (mode_is_float(mode)) {
1411 if (ia32_cg_config.use_sse2)
1412 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1414 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1417 if (is_Const(op2)) {
1418 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1422 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1423 | match_am | match_immediate);
1426 static ir_node *transform_AM_mem(ir_node *const block,
1427 ir_node *const src_val,
1428 ir_node *const src_mem,
1429 ir_node *const am_mem)
1431 if (is_NoMem(am_mem)) {
1432 return be_transform_node(src_mem);
1433 } else if (is_Proj(src_val) &&
1435 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1436 /* avoid memory loop */
1438 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1439 ir_node *const ptr_pred = get_Proj_pred(src_val);
1440 int const arity = get_Sync_n_preds(src_mem);
1445 NEW_ARR_A(ir_node*, ins, arity + 1);
1447 /* NOTE: This sometimes produces dead-code because the old sync in
1448 * src_mem might not be used anymore, we should detect this case
1449 * and kill the sync... */
1450 for (i = arity - 1; i >= 0; --i) {
1451 ir_node *const pred = get_Sync_pred(src_mem, i);
1453 /* avoid memory loop */
1454 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1457 ins[n++] = be_transform_node(pred);
1462 return new_r_Sync(block, n, ins);
1466 ins[0] = be_transform_node(src_mem);
1468 return new_r_Sync(block, 2, ins);
1473 * Create a 32bit to 64bit signed extension.
1475 * @param dbgi debug info
1476 * @param block the block where node nodes should be placed
1477 * @param val the value to extend
1478 * @param orig the original node
1480 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1481 ir_node *val, const ir_node *orig)
1486 if (ia32_cg_config.use_short_sex_eax) {
1487 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1488 be_dep_on_frame(pval);
1489 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1491 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1492 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1494 SET_IA32_ORIG_NODE(res, orig);
1499 * Generates an ia32 DivMod with additional infrastructure for the
1500 * register allocator if needed.
1502 static ir_node *create_Div(ir_node *node)
1504 dbg_info *dbgi = get_irn_dbg_info(node);
1505 ir_node *block = get_nodes_block(node);
1506 ir_node *new_block = be_transform_node(block);
1513 ir_node *sign_extension;
1514 ia32_address_mode_t am;
1515 ia32_address_t *addr = &am.addr;
1517 /* the upper bits have random contents for smaller modes */
1518 switch (get_irn_opcode(node)) {
1520 op1 = get_Div_left(node);
1521 op2 = get_Div_right(node);
1522 mem = get_Div_mem(node);
1523 mode = get_Div_resmode(node);
1526 op1 = get_Mod_left(node);
1527 op2 = get_Mod_right(node);
1528 mem = get_Mod_mem(node);
1529 mode = get_Mod_resmode(node);
1532 op1 = get_DivMod_left(node);
1533 op2 = get_DivMod_right(node);
1534 mem = get_DivMod_mem(node);
1535 mode = get_DivMod_resmode(node);
1538 panic("invalid divmod node %+F", node);
1541 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1543 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1544 is the memory of the consumed address. We can have only the second op as address
1545 in Div nodes, so check only op2. */
1546 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1548 if (mode_is_signed(mode)) {
1549 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1550 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1551 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1553 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1554 be_dep_on_frame(sign_extension);
1556 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1557 addr->index, new_mem, am.new_op2,
1558 am.new_op1, sign_extension);
1561 set_irn_pinned(new_node, get_irn_pinned(node));
1563 set_am_attributes(new_node, &am);
1564 SET_IA32_ORIG_NODE(new_node, node);
1566 new_node = fix_mem_proj(new_node, &am);
1572 * Generates an ia32 Mod.
1574 static ir_node *gen_Mod(ir_node *node)
1576 return create_Div(node);
1580 * Generates an ia32 Div.
1582 static ir_node *gen_Div(ir_node *node)
1584 return create_Div(node);
1588 * Generates an ia32 DivMod.
1590 static ir_node *gen_DivMod(ir_node *node)
1592 return create_Div(node);
1598 * Creates an ia32 floating Div.
1600 * @return The created ia32 xDiv node
1602 static ir_node *gen_Quot(ir_node *node)
1604 ir_node *op1 = get_Quot_left(node);
1605 ir_node *op2 = get_Quot_right(node);
1607 if (ia32_cg_config.use_sse2) {
1608 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1610 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1616 * Creates an ia32 Shl.
1618 * @return The created ia32 Shl node
1620 static ir_node *gen_Shl(ir_node *node)
1622 ir_node *left = get_Shl_left(node);
1623 ir_node *right = get_Shl_right(node);
1625 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1626 match_mode_neutral | match_immediate);
1630 * Creates an ia32 Shr.
1632 * @return The created ia32 Shr node
1634 static ir_node *gen_Shr(ir_node *node)
1636 ir_node *left = get_Shr_left(node);
1637 ir_node *right = get_Shr_right(node);
1639 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1645 * Creates an ia32 Sar.
1647 * @return The created ia32 Shrs node
1649 static ir_node *gen_Shrs(ir_node *node)
1651 ir_node *left = get_Shrs_left(node);
1652 ir_node *right = get_Shrs_right(node);
1654 if (is_Const(right)) {
1655 tarval *tv = get_Const_tarval(right);
1656 long val = get_tarval_long(tv);
1658 /* this is a sign extension */
1659 dbg_info *dbgi = get_irn_dbg_info(node);
1660 ir_node *block = be_transform_node(get_nodes_block(node));
1661 ir_node *new_op = be_transform_node(left);
1663 return create_sex_32_64(dbgi, block, new_op, node);
1667 /* 8 or 16 bit sign extension? */
1668 if (is_Const(right) && is_Shl(left)) {
1669 ir_node *shl_left = get_Shl_left(left);
1670 ir_node *shl_right = get_Shl_right(left);
1671 if (is_Const(shl_right)) {
1672 tarval *tv1 = get_Const_tarval(right);
1673 tarval *tv2 = get_Const_tarval(shl_right);
1674 if (tv1 == tv2 && tarval_is_long(tv1)) {
1675 long val = get_tarval_long(tv1);
1676 if (val == 16 || val == 24) {
1677 dbg_info *dbgi = get_irn_dbg_info(node);
1678 ir_node *block = get_nodes_block(node);
1688 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1697 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1703 * Creates an ia32 Rol.
1705 * @param op1 The first operator
1706 * @param op2 The second operator
1707 * @return The created ia32 RotL node
1709 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1711 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1717 * Creates an ia32 Ror.
1718 * NOTE: There is no RotR with immediate because this would always be a RotL
1719 * "imm-mode_size_bits" which can be pre-calculated.
1721 * @param op1 The first operator
1722 * @param op2 The second operator
1723 * @return The created ia32 RotR node
1725 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1727 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1733 * Creates an ia32 RotR or RotL (depending on the found pattern).
1735 * @return The created ia32 RotL or RotR node
1737 static ir_node *gen_Rotl(ir_node *node)
1739 ir_node *rotate = NULL;
1740 ir_node *op1 = get_Rotl_left(node);
1741 ir_node *op2 = get_Rotl_right(node);
1743 /* Firm has only RotL, so we are looking for a right (op2)
1744 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1745 that means we can create a RotR instead of an Add and a RotL */
1749 ir_node *left = get_Add_left(add);
1750 ir_node *right = get_Add_right(add);
1751 if (is_Const(right)) {
1752 tarval *tv = get_Const_tarval(right);
1753 ir_mode *mode = get_irn_mode(node);
1754 long bits = get_mode_size_bits(mode);
1756 if (is_Minus(left) &&
1757 tarval_is_long(tv) &&
1758 get_tarval_long(tv) == bits &&
1761 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1762 rotate = gen_Ror(node, op1, get_Minus_op(left));
1767 if (rotate == NULL) {
1768 rotate = gen_Rol(node, op1, op2);
1777 * Transforms a Minus node.
1779 * @return The created ia32 Minus node
1781 static ir_node *gen_Minus(ir_node *node)
1783 ir_node *op = get_Minus_op(node);
1784 ir_node *block = be_transform_node(get_nodes_block(node));
1785 dbg_info *dbgi = get_irn_dbg_info(node);
1786 ir_mode *mode = get_irn_mode(node);
1791 if (mode_is_float(mode)) {
1792 ir_node *new_op = be_transform_node(op);
1793 if (ia32_cg_config.use_sse2) {
1794 /* TODO: non-optimal... if we have many xXors, then we should
1795 * rather create a load for the const and use that instead of
1796 * several AM nodes... */
1797 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1799 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1800 noreg_GP, nomem, new_op, noreg_xmm);
1802 size = get_mode_size_bits(mode);
1803 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1805 set_ia32_am_sc(new_node, ent);
1806 set_ia32_op_type(new_node, ia32_AddrModeS);
1807 set_ia32_ls_mode(new_node, mode);
1809 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1812 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1815 SET_IA32_ORIG_NODE(new_node, node);
1821 * Transforms a Not node.
1823 * @return The created ia32 Not node
1825 static ir_node *gen_Not(ir_node *node)
1827 ir_node *op = get_Not_op(node);
1829 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1830 assert (! mode_is_float(get_irn_mode(node)));
1832 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1838 * Transforms an Abs node.
1840 * @return The created ia32 Abs node
1842 static ir_node *gen_Abs(ir_node *node)
1844 ir_node *block = get_nodes_block(node);
1845 ir_node *new_block = be_transform_node(block);
1846 ir_node *op = get_Abs_op(node);
1847 dbg_info *dbgi = get_irn_dbg_info(node);
1848 ir_mode *mode = get_irn_mode(node);
1854 if (mode_is_float(mode)) {
1855 new_op = be_transform_node(op);
1857 if (ia32_cg_config.use_sse2) {
1858 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1859 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1860 noreg_GP, nomem, new_op, noreg_fp);
1862 size = get_mode_size_bits(mode);
1863 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1865 set_ia32_am_sc(new_node, ent);
1867 SET_IA32_ORIG_NODE(new_node, node);
1869 set_ia32_op_type(new_node, ia32_AddrModeS);
1870 set_ia32_ls_mode(new_node, mode);
1872 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1873 SET_IA32_ORIG_NODE(new_node, node);
1876 ir_node *xor, *sign_extension;
1878 if (get_mode_size_bits(mode) == 32) {
1879 new_op = be_transform_node(op);
1881 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1884 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1886 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1887 nomem, new_op, sign_extension);
1888 SET_IA32_ORIG_NODE(xor, node);
1890 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1891 nomem, xor, sign_extension);
1892 SET_IA32_ORIG_NODE(new_node, node);
1899 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1901 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1903 dbg_info *dbgi = get_irn_dbg_info(cmp);
1904 ir_node *block = get_nodes_block(cmp);
1905 ir_node *new_block = be_transform_node(block);
1906 ir_node *op1 = be_transform_node(x);
1907 ir_node *op2 = be_transform_node(n);
1909 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1913 * Transform a node returning a "flag" result.
1915 * @param node the node to transform
1916 * @param pnc_out the compare mode to use
1918 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1925 /* we have a Cmp as input */
1926 if (is_Proj(node)) {
1927 ir_node *pred = get_Proj_pred(node);
1929 pn_Cmp pnc = get_Proj_proj(node);
1930 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1931 ir_node *l = get_Cmp_left(pred);
1932 ir_node *r = get_Cmp_right(pred);
1934 ir_node *la = get_And_left(l);
1935 ir_node *ra = get_And_right(l);
1937 ir_node *c = get_Shl_left(la);
1938 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1939 /* (1 << n) & ra) */
1940 ir_node *n = get_Shl_right(la);
1941 flags = gen_bt(pred, ra, n);
1942 /* we must generate a Jc/Jnc jump */
1943 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1946 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1951 ir_node *c = get_Shl_left(ra);
1952 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1953 /* la & (1 << n)) */
1954 ir_node *n = get_Shl_right(ra);
1955 flags = gen_bt(pred, la, n);
1956 /* we must generate a Jc/Jnc jump */
1957 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1960 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1966 /* add ia32 compare flags */
1968 ir_node *l = get_Cmp_left(pred);
1969 ir_mode *mode = get_irn_mode(l);
1970 if (mode_is_float(mode))
1971 pnc |= ia32_pn_Cmp_float;
1972 else if (! mode_is_signed(mode))
1973 pnc |= ia32_pn_Cmp_unsigned;
1976 flags = be_transform_node(pred);
1981 /* a mode_b value, we have to compare it against 0 */
1982 dbgi = get_irn_dbg_info(node);
1983 new_block = be_transform_node(get_nodes_block(node));
1984 new_op = be_transform_node(node);
1985 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1986 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1987 *pnc_out = pn_Cmp_Lg;
1992 * Transforms a Load.
1994 * @return the created ia32 Load node
1996 static ir_node *gen_Load(ir_node *node)
1998 ir_node *old_block = get_nodes_block(node);
1999 ir_node *block = be_transform_node(old_block);
2000 ir_node *ptr = get_Load_ptr(node);
2001 ir_node *mem = get_Load_mem(node);
2002 ir_node *new_mem = be_transform_node(mem);
2005 dbg_info *dbgi = get_irn_dbg_info(node);
2006 ir_mode *mode = get_Load_mode(node);
2008 ia32_address_t addr;
2010 /* construct load address */
2011 memset(&addr, 0, sizeof(addr));
2012 ia32_create_address_mode(&addr, ptr, 0);
2019 base = be_transform_node(base);
2022 if (index == NULL) {
2025 index = be_transform_node(index);
2028 if (mode_is_float(mode)) {
2029 if (ia32_cg_config.use_sse2) {
2030 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2033 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2037 assert(mode != mode_b);
2039 /* create a conv node with address mode for smaller modes */
2040 if (get_mode_size_bits(mode) < 32) {
2041 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2042 new_mem, noreg_GP, mode);
2044 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2048 set_irn_pinned(new_node, get_irn_pinned(node));
2049 set_ia32_op_type(new_node, ia32_AddrModeS);
2050 set_ia32_ls_mode(new_node, mode);
2051 set_address(new_node, &addr);
2053 if (get_irn_pinned(node) == op_pin_state_floats) {
2054 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2055 && pn_ia32_vfld_res == pn_ia32_Load_res
2056 && pn_ia32_Load_res == pn_ia32_res);
2057 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2060 SET_IA32_ORIG_NODE(new_node, node);
2062 be_dep_on_frame(new_node);
2066 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2067 ir_node *ptr, ir_node *other)
2074 /* we only use address mode if we're the only user of the load */
2075 if (get_irn_n_edges(node) > 1)
2078 load = get_Proj_pred(node);
2081 if (get_nodes_block(load) != block)
2084 /* store should have the same pointer as the load */
2085 if (get_Load_ptr(load) != ptr)
2088 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2089 if (other != NULL &&
2090 get_nodes_block(other) == block &&
2091 heights_reachable_in_block(heights, other, load)) {
2095 if (prevents_AM(block, load, mem))
2097 /* Store should be attached to the load via mem */
2098 assert(heights_reachable_in_block(heights, mem, load));
2103 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2104 ir_node *mem, ir_node *ptr, ir_mode *mode,
2105 construct_binop_dest_func *func,
2106 construct_binop_dest_func *func8bit,
2107 match_flags_t flags)
2109 ir_node *src_block = get_nodes_block(node);
2117 ia32_address_mode_t am;
2118 ia32_address_t *addr = &am.addr;
2119 memset(&am, 0, sizeof(am));
2121 assert(flags & match_immediate); /* there is no destam node without... */
2122 commutative = (flags & match_commutative) != 0;
2124 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2125 build_address(&am, op1, ia32_create_am_double_use);
2126 new_op = create_immediate_or_transform(op2, 0);
2127 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2128 build_address(&am, op2, ia32_create_am_double_use);
2129 new_op = create_immediate_or_transform(op1, 0);
2134 if (addr->base == NULL)
2135 addr->base = noreg_GP;
2136 if (addr->index == NULL)
2137 addr->index = noreg_GP;
2138 if (addr->mem == NULL)
2141 dbgi = get_irn_dbg_info(node);
2142 block = be_transform_node(src_block);
2143 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2145 if (get_mode_size_bits(mode) == 8) {
2146 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2148 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2150 set_address(new_node, addr);
2151 set_ia32_op_type(new_node, ia32_AddrModeD);
2152 set_ia32_ls_mode(new_node, mode);
2153 SET_IA32_ORIG_NODE(new_node, node);
2155 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2156 mem_proj = be_transform_node(am.mem_proj);
2157 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2162 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2163 ir_node *ptr, ir_mode *mode,
2164 construct_unop_dest_func *func)
2166 ir_node *src_block = get_nodes_block(node);
2172 ia32_address_mode_t am;
2173 ia32_address_t *addr = &am.addr;
2175 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2178 memset(&am, 0, sizeof(am));
2179 build_address(&am, op, ia32_create_am_double_use);
2181 dbgi = get_irn_dbg_info(node);
2182 block = be_transform_node(src_block);
2183 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2184 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2185 set_address(new_node, addr);
2186 set_ia32_op_type(new_node, ia32_AddrModeD);
2187 set_ia32_ls_mode(new_node, mode);
2188 SET_IA32_ORIG_NODE(new_node, node);
2190 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2191 mem_proj = be_transform_node(am.mem_proj);
2192 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2197 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2199 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2200 return get_negated_pnc(pnc, mode);
2203 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2205 ir_mode *mode = get_irn_mode(node);
2206 ir_node *mux_true = get_Mux_true(node);
2207 ir_node *mux_false = get_Mux_false(node);
2216 ia32_address_t addr;
2218 if (get_mode_size_bits(mode) != 8)
2221 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2223 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2229 cond = get_Mux_sel(node);
2230 flags = get_flags_node(cond, &pnc);
2231 /* we can't handle the float special cases with SetM */
2232 if (pnc & ia32_pn_Cmp_float)
2235 pnc = ia32_get_negated_pnc(pnc);
2237 build_address_ptr(&addr, ptr, mem);
2239 dbgi = get_irn_dbg_info(node);
2240 block = get_nodes_block(node);
2241 new_block = be_transform_node(block);
2242 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2243 addr.index, addr.mem, flags, pnc);
2244 set_address(new_node, &addr);
2245 set_ia32_op_type(new_node, ia32_AddrModeD);
2246 set_ia32_ls_mode(new_node, mode);
2247 SET_IA32_ORIG_NODE(new_node, node);
2252 static ir_node *try_create_dest_am(ir_node *node)
2254 ir_node *val = get_Store_value(node);
2255 ir_node *mem = get_Store_mem(node);
2256 ir_node *ptr = get_Store_ptr(node);
2257 ir_mode *mode = get_irn_mode(val);
2258 unsigned bits = get_mode_size_bits(mode);
2263 /* handle only GP modes for now... */
2264 if (!ia32_mode_needs_gp_reg(mode))
2268 /* store must be the only user of the val node */
2269 if (get_irn_n_edges(val) > 1)
2271 /* skip pointless convs */
2273 ir_node *conv_op = get_Conv_op(val);
2274 ir_mode *pred_mode = get_irn_mode(conv_op);
2275 if (!ia32_mode_needs_gp_reg(pred_mode))
2277 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2285 /* value must be in the same block */
2286 if (get_nodes_block(node) != get_nodes_block(val))
2289 switch (get_irn_opcode(val)) {
2291 op1 = get_Add_left(val);
2292 op2 = get_Add_right(val);
2293 if (ia32_cg_config.use_incdec) {
2294 if (is_Const_1(op2)) {
2295 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2297 } else if (is_Const_Minus_1(op2)) {
2298 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2302 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2303 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2304 match_commutative | match_immediate);
2307 op1 = get_Sub_left(val);
2308 op2 = get_Sub_right(val);
2309 if (is_Const(op2)) {
2310 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2312 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2313 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2317 op1 = get_And_left(val);
2318 op2 = get_And_right(val);
2319 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2320 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2321 match_commutative | match_immediate);
2324 op1 = get_Or_left(val);
2325 op2 = get_Or_right(val);
2326 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2327 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2328 match_commutative | match_immediate);
2331 op1 = get_Eor_left(val);
2332 op2 = get_Eor_right(val);
2333 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2334 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2335 match_commutative | match_immediate);
2338 op1 = get_Shl_left(val);
2339 op2 = get_Shl_right(val);
2340 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2341 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2345 op1 = get_Shr_left(val);
2346 op2 = get_Shr_right(val);
2347 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2348 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2352 op1 = get_Shrs_left(val);
2353 op2 = get_Shrs_right(val);
2354 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2355 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2359 op1 = get_Rotl_left(val);
2360 op2 = get_Rotl_right(val);
2361 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2362 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2365 /* TODO: match ROR patterns... */
2367 new_node = try_create_SetMem(val, ptr, mem);
2371 op1 = get_Minus_op(val);
2372 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2375 /* should be lowered already */
2376 assert(mode != mode_b);
2377 op1 = get_Not_op(val);
2378 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2384 if (new_node != NULL) {
2385 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2386 get_irn_pinned(node) == op_pin_state_pinned) {
2387 set_irn_pinned(new_node, op_pin_state_pinned);
2394 static bool possible_int_mode_for_fp(ir_mode *mode)
2398 if (!mode_is_signed(mode))
2400 size = get_mode_size_bits(mode);
2401 if (size != 16 && size != 32)
2406 static int is_float_to_int_conv(const ir_node *node)
2408 ir_mode *mode = get_irn_mode(node);
2412 if (!possible_int_mode_for_fp(mode))
2417 conv_op = get_Conv_op(node);
2418 conv_mode = get_irn_mode(conv_op);
2420 if (!mode_is_float(conv_mode))
2427 * Transform a Store(floatConst) into a sequence of
2430 * @return the created ia32 Store node
2432 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2434 ir_mode *mode = get_irn_mode(cns);
2435 unsigned size = get_mode_size_bytes(mode);
2436 tarval *tv = get_Const_tarval(cns);
2437 ir_node *block = get_nodes_block(node);
2438 ir_node *new_block = be_transform_node(block);
2439 ir_node *ptr = get_Store_ptr(node);
2440 ir_node *mem = get_Store_mem(node);
2441 dbg_info *dbgi = get_irn_dbg_info(node);
2445 ia32_address_t addr;
2447 assert(size % 4 == 0);
2450 build_address_ptr(&addr, ptr, mem);
2454 get_tarval_sub_bits(tv, ofs) |
2455 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2456 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2457 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2458 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2460 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2461 addr.index, addr.mem, imm);
2463 set_irn_pinned(new_node, get_irn_pinned(node));
2464 set_ia32_op_type(new_node, ia32_AddrModeD);
2465 set_ia32_ls_mode(new_node, mode_Iu);
2466 set_address(new_node, &addr);
2467 SET_IA32_ORIG_NODE(new_node, node);
2470 ins[i++] = new_node;
2475 } while (size != 0);
2478 return new_rd_Sync(dbgi, new_block, i, ins);
2485 * Generate a vfist or vfisttp instruction.
2487 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2488 ir_node *mem, ir_node *val, ir_node **fist)
2492 if (ia32_cg_config.use_fisttp) {
2493 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2494 if other users exists */
2495 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2496 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2497 be_new_Keep(block, 1, &value);
2499 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2502 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2505 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2511 * Transforms a general (no special case) Store.
2513 * @return the created ia32 Store node
2515 static ir_node *gen_general_Store(ir_node *node)
2517 ir_node *val = get_Store_value(node);
2518 ir_mode *mode = get_irn_mode(val);
2519 ir_node *block = get_nodes_block(node);
2520 ir_node *new_block = be_transform_node(block);
2521 ir_node *ptr = get_Store_ptr(node);
2522 ir_node *mem = get_Store_mem(node);
2523 dbg_info *dbgi = get_irn_dbg_info(node);
2524 ir_node *new_val, *new_node, *store;
2525 ia32_address_t addr;
2527 /* check for destination address mode */
2528 new_node = try_create_dest_am(node);
2529 if (new_node != NULL)
2532 /* construct store address */
2533 memset(&addr, 0, sizeof(addr));
2534 ia32_create_address_mode(&addr, ptr, 0);
2536 if (addr.base == NULL) {
2537 addr.base = noreg_GP;
2539 addr.base = be_transform_node(addr.base);
2542 if (addr.index == NULL) {
2543 addr.index = noreg_GP;
2545 addr.index = be_transform_node(addr.index);
2547 addr.mem = be_transform_node(mem);
2549 if (mode_is_float(mode)) {
2550 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2552 while (is_Conv(val) && mode == get_irn_mode(val)) {
2553 ir_node *op = get_Conv_op(val);
2554 if (!mode_is_float(get_irn_mode(op)))
2558 new_val = be_transform_node(val);
2559 if (ia32_cg_config.use_sse2) {
2560 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2561 addr.index, addr.mem, new_val);
2563 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2564 addr.index, addr.mem, new_val, mode);
2567 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2568 val = get_Conv_op(val);
2570 /* TODO: is this optimisation still necessary at all (middleend)? */
2571 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2572 while (is_Conv(val)) {
2573 ir_node *op = get_Conv_op(val);
2574 if (!mode_is_float(get_irn_mode(op)))
2576 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2580 new_val = be_transform_node(val);
2581 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2583 new_val = create_immediate_or_transform(val, 0);
2584 assert(mode != mode_b);
2586 if (get_mode_size_bits(mode) == 8) {
2587 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2588 addr.index, addr.mem, new_val);
2590 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2591 addr.index, addr.mem, new_val);
2596 set_irn_pinned(store, get_irn_pinned(node));
2597 set_ia32_op_type(store, ia32_AddrModeD);
2598 set_ia32_ls_mode(store, mode);
2600 set_address(store, &addr);
2601 SET_IA32_ORIG_NODE(store, node);
2607 * Transforms a Store.
2609 * @return the created ia32 Store node
2611 static ir_node *gen_Store(ir_node *node)
2613 ir_node *val = get_Store_value(node);
2614 ir_mode *mode = get_irn_mode(val);
2616 if (mode_is_float(mode) && is_Const(val)) {
2617 /* We can transform every floating const store
2618 into a sequence of integer stores.
2619 If the constant is already in a register,
2620 it would be better to use it, but we don't
2621 have this information here. */
2622 return gen_float_const_Store(node, val);
2624 return gen_general_Store(node);
2628 * Transforms a Switch.
2630 * @return the created ia32 SwitchJmp node
2632 static ir_node *create_Switch(ir_node *node)
2634 dbg_info *dbgi = get_irn_dbg_info(node);
2635 ir_node *block = be_transform_node(get_nodes_block(node));
2636 ir_node *sel = get_Cond_selector(node);
2637 ir_node *new_sel = be_transform_node(sel);
2638 long switch_min = LONG_MAX;
2639 long switch_max = LONG_MIN;
2640 long default_pn = get_Cond_default_proj(node);
2642 const ir_edge_t *edge;
2644 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2646 /* determine the smallest switch case value */
2647 foreach_out_edge(node, edge) {
2648 ir_node *proj = get_edge_src_irn(edge);
2649 long pn = get_Proj_proj(proj);
2650 if (pn == default_pn)
2653 if (pn < switch_min)
2655 if (pn > switch_max)
2659 if ((unsigned long) (switch_max - switch_min) > 128000) {
2660 panic("Size of switch %+F bigger than 128000", node);
2663 if (switch_min != 0) {
2664 /* if smallest switch case is not 0 we need an additional sub */
2665 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2666 add_ia32_am_offs_int(new_sel, -switch_min);
2667 set_ia32_op_type(new_sel, ia32_AddrModeS);
2669 SET_IA32_ORIG_NODE(new_sel, node);
2672 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2673 SET_IA32_ORIG_NODE(new_node, node);
2679 * Transform a Cond node.
2681 static ir_node *gen_Cond(ir_node *node)
2683 ir_node *block = get_nodes_block(node);
2684 ir_node *new_block = be_transform_node(block);
2685 dbg_info *dbgi = get_irn_dbg_info(node);
2686 ir_node *sel = get_Cond_selector(node);
2687 ir_mode *sel_mode = get_irn_mode(sel);
2688 ir_node *flags = NULL;
2692 if (sel_mode != mode_b) {
2693 return create_Switch(node);
2696 /* we get flags from a Cmp */
2697 flags = get_flags_node(sel, &pnc);
2699 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2700 SET_IA32_ORIG_NODE(new_node, node);
2706 * Transform a be_Copy.
2708 static ir_node *gen_be_Copy(ir_node *node)
2710 ir_node *new_node = be_duplicate_node(node);
2711 ir_mode *mode = get_irn_mode(new_node);
2713 if (ia32_mode_needs_gp_reg(mode)) {
2714 set_irn_mode(new_node, mode_Iu);
2720 static ir_node *create_Fucom(ir_node *node)
2722 dbg_info *dbgi = get_irn_dbg_info(node);
2723 ir_node *block = get_nodes_block(node);
2724 ir_node *new_block = be_transform_node(block);
2725 ir_node *left = get_Cmp_left(node);
2726 ir_node *new_left = be_transform_node(left);
2727 ir_node *right = get_Cmp_right(node);
2731 if (ia32_cg_config.use_fucomi) {
2732 new_right = be_transform_node(right);
2733 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2735 set_ia32_commutative(new_node);
2736 SET_IA32_ORIG_NODE(new_node, node);
2738 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2739 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2741 new_right = be_transform_node(right);
2742 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2745 set_ia32_commutative(new_node);
2747 SET_IA32_ORIG_NODE(new_node, node);
2749 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2750 SET_IA32_ORIG_NODE(new_node, node);
2756 static ir_node *create_Ucomi(ir_node *node)
2758 dbg_info *dbgi = get_irn_dbg_info(node);
2759 ir_node *src_block = get_nodes_block(node);
2760 ir_node *new_block = be_transform_node(src_block);
2761 ir_node *left = get_Cmp_left(node);
2762 ir_node *right = get_Cmp_right(node);
2764 ia32_address_mode_t am;
2765 ia32_address_t *addr = &am.addr;
2767 match_arguments(&am, src_block, left, right, NULL,
2768 match_commutative | match_am);
2770 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2771 addr->mem, am.new_op1, am.new_op2,
2773 set_am_attributes(new_node, &am);
2775 SET_IA32_ORIG_NODE(new_node, node);
2777 new_node = fix_mem_proj(new_node, &am);
2783 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2784 * to fold an and into a test node
2786 static bool can_fold_test_and(ir_node *node)
2788 const ir_edge_t *edge;
2790 /** we can only have eq and lg projs */
2791 foreach_out_edge(node, edge) {
2792 ir_node *proj = get_edge_src_irn(edge);
2793 pn_Cmp pnc = get_Proj_proj(proj);
2794 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2802 * returns true if it is assured, that the upper bits of a node are "clean"
2803 * which means for a 16 or 8 bit value, that the upper bits in the register
2804 * are 0 for unsigned and a copy of the last significant bit for signed
2807 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2809 assert(ia32_mode_needs_gp_reg(mode));
2810 if (get_mode_size_bits(mode) >= 32)
2813 if (is_Proj(transformed_node))
2814 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2816 switch (get_ia32_irn_opcode(transformed_node)) {
2817 case iro_ia32_Conv_I2I:
2818 case iro_ia32_Conv_I2I8Bit: {
2819 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2820 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2822 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2829 if (mode_is_signed(mode)) {
2830 return false; /* TODO handle signed modes */
2832 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2833 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2834 const ia32_immediate_attr_t *attr
2835 = get_ia32_immediate_attr_const(right);
2836 if (attr->symconst == 0 &&
2837 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2841 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2845 /* TODO too conservative if shift amount is constant */
2846 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2849 if (!mode_is_signed(mode)) {
2851 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2852 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2854 /* TODO if one is known to be zero extended, then || is sufficient */
2859 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2860 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2862 case iro_ia32_Const:
2863 case iro_ia32_Immediate: {
2864 const ia32_immediate_attr_t *attr =
2865 get_ia32_immediate_attr_const(transformed_node);
2866 if (mode_is_signed(mode)) {
2867 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2868 return shifted == 0 || shifted == -1;
2870 unsigned long shifted = (unsigned long)attr->offset;
2871 shifted >>= get_mode_size_bits(mode);
2872 return shifted == 0;
2882 * Generate code for a Cmp.
2884 static ir_node *gen_Cmp(ir_node *node)
2886 dbg_info *dbgi = get_irn_dbg_info(node);
2887 ir_node *block = get_nodes_block(node);
2888 ir_node *new_block = be_transform_node(block);
2889 ir_node *left = get_Cmp_left(node);
2890 ir_node *right = get_Cmp_right(node);
2891 ir_mode *cmp_mode = get_irn_mode(left);
2893 ia32_address_mode_t am;
2894 ia32_address_t *addr = &am.addr;
2897 if (mode_is_float(cmp_mode)) {
2898 if (ia32_cg_config.use_sse2) {
2899 return create_Ucomi(node);
2901 return create_Fucom(node);
2905 assert(ia32_mode_needs_gp_reg(cmp_mode));
2907 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2908 cmp_unsigned = !mode_is_signed(cmp_mode);
2909 if (is_Const_0(right) &&
2911 get_irn_n_edges(left) == 1 &&
2912 can_fold_test_and(node)) {
2913 /* Test(and_left, and_right) */
2914 ir_node *and_left = get_And_left(left);
2915 ir_node *and_right = get_And_right(left);
2917 /* matze: code here used mode instead of cmd_mode, I think it is always
2918 * the same as cmp_mode, but I leave this here to see if this is really
2921 assert(get_irn_mode(and_left) == cmp_mode);
2923 match_arguments(&am, block, and_left, and_right, NULL,
2925 match_am | match_8bit_am | match_16bit_am |
2926 match_am_and_immediates | match_immediate);
2928 /* use 32bit compare mode if possible since the opcode is smaller */
2929 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2930 upper_bits_clean(am.new_op2, cmp_mode)) {
2931 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2934 if (get_mode_size_bits(cmp_mode) == 8) {
2935 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2936 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2939 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2940 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2943 /* Cmp(left, right) */
2944 match_arguments(&am, block, left, right, NULL,
2945 match_commutative | match_am | match_8bit_am |
2946 match_16bit_am | match_am_and_immediates |
2948 /* use 32bit compare mode if possible since the opcode is smaller */
2949 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2950 upper_bits_clean(am.new_op2, cmp_mode)) {
2951 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2954 if (get_mode_size_bits(cmp_mode) == 8) {
2955 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2956 addr->index, addr->mem, am.new_op1,
2957 am.new_op2, am.ins_permuted,
2960 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2961 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2964 set_am_attributes(new_node, &am);
2965 set_ia32_ls_mode(new_node, cmp_mode);
2967 SET_IA32_ORIG_NODE(new_node, node);
2969 new_node = fix_mem_proj(new_node, &am);
2974 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2977 dbg_info *dbgi = get_irn_dbg_info(node);
2978 ir_node *block = get_nodes_block(node);
2979 ir_node *new_block = be_transform_node(block);
2980 ir_node *val_true = get_Mux_true(node);
2981 ir_node *val_false = get_Mux_false(node);
2983 ia32_address_mode_t am;
2984 ia32_address_t *addr;
2986 assert(ia32_cg_config.use_cmov);
2987 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2991 match_arguments(&am, block, val_false, val_true, flags,
2992 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2994 if (am.ins_permuted)
2995 pnc = ia32_get_negated_pnc(pnc);
2997 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
2998 addr->mem, am.new_op1, am.new_op2, new_flags,
3000 set_am_attributes(new_node, &am);
3002 SET_IA32_ORIG_NODE(new_node, node);
3004 new_node = fix_mem_proj(new_node, &am);
3010 * Creates a ia32 Setcc instruction.
3012 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3013 ir_node *flags, pn_Cmp pnc,
3016 ir_mode *mode = get_irn_mode(orig_node);
3019 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3020 SET_IA32_ORIG_NODE(new_node, orig_node);
3022 /* we might need to conv the result up */
3023 if (get_mode_size_bits(mode) > 8) {
3024 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3025 nomem, new_node, mode_Bu);
3026 SET_IA32_ORIG_NODE(new_node, orig_node);
3033 * Create instruction for an unsigned Difference or Zero.
3035 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3037 ir_mode *mode = get_irn_mode(psi);
3047 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3048 match_mode_neutral | match_am | match_immediate | match_two_users);
3050 block = get_nodes_block(new_node);
3052 if (is_Proj(new_node)) {
3053 sub = get_Proj_pred(new_node);
3054 assert(is_ia32_Sub(sub));
3057 set_irn_mode(sub, mode_T);
3058 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3060 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3062 dbgi = get_irn_dbg_info(psi);
3063 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3064 not = new_bd_ia32_Not(dbgi, block, sbb);
3066 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3067 set_ia32_commutative(new_node);
3072 * Create an const array of two float consts.
3074 * @param c0 the first constant
3075 * @param c1 the second constant
3076 * @param new_mode IN/OUT for the mode of the constants, if NULL
3077 * smallest possible mode will be used
3079 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3082 ir_mode *mode = *new_mode;
3084 ir_initializer_t *initializer;
3085 tarval *tv0 = get_Const_tarval(c0);
3086 tarval *tv1 = get_Const_tarval(c1);
3089 /* detect the best mode for the constants */
3090 mode = get_tarval_mode(tv0);
3092 if (mode != mode_F) {
3093 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3094 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3096 tv0 = tarval_convert_to(tv0, mode);
3097 tv1 = tarval_convert_to(tv1, mode);
3098 } else if (mode != mode_D) {
3099 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3100 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3102 tv0 = tarval_convert_to(tv0, mode);
3103 tv1 = tarval_convert_to(tv1, mode);
3110 tp = ia32_create_float_type(mode, 4);
3111 tp = ia32_create_float_array(tp);
3113 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3115 set_entity_ld_ident(ent, get_entity_ident(ent));
3116 set_entity_visibility(ent, ir_visibility_local);
3117 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3119 initializer = create_initializer_compound(2);
3121 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3122 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3124 set_entity_initializer(ent, initializer);
3131 * Possible transformations for creating a Setcc.
3133 enum setcc_transform_insn {
3146 typedef struct setcc_transform {
3148 unsigned permutate_cmp_ins;
3151 enum setcc_transform_insn transform;
3155 } setcc_transform_t;
3158 * Setcc can only handle 0 and 1 result.
3159 * Find a transformation that creates 0 and 1 from
3162 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f,
3163 setcc_transform_t *res)
3168 res->permutate_cmp_ins = 0;
3170 if (tarval_is_null(t)) {
3174 pnc = ia32_get_negated_pnc(pnc);
3175 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3176 // now, t is the bigger one
3180 pnc = ia32_get_negated_pnc(pnc);
3184 if (! tarval_is_null(f)) {
3185 tarval *t_sub = tarval_sub(t, f, NULL);
3188 res->steps[step].transform = SETCC_TR_ADD;
3190 if (t == tarval_bad)
3191 panic("constant subtract failed");
3192 if (! tarval_is_long(f))
3193 panic("tarval is not long");
3195 res->steps[step].val = get_tarval_long(f);
3197 f = tarval_sub(f, f, NULL);
3198 assert(tarval_is_null(f));
3201 if (tarval_is_one(t)) {
3202 res->steps[step].transform = SETCC_TR_SET;
3203 res->num_steps = ++step;
3207 if (tarval_is_minus_one(t)) {
3208 res->steps[step].transform = SETCC_TR_NEG;
3210 res->steps[step].transform = SETCC_TR_SET;
3211 res->num_steps = ++step;
3214 if (tarval_is_long(t)) {
3215 long v = get_tarval_long(t);
3217 res->steps[step].val = 0;
3220 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3222 res->steps[step].transform = SETCC_TR_LEAxx;
3223 res->steps[step].scale = 3; /* (a << 3) + a */
3226 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3228 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3229 res->steps[step].scale = 3; /* (a << 3) */
3232 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3234 res->steps[step].transform = SETCC_TR_LEAxx;
3235 res->steps[step].scale = 2; /* (a << 2) + a */
3238 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3240 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3241 res->steps[step].scale = 2; /* (a << 2) */
3244 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3246 res->steps[step].transform = SETCC_TR_LEAxx;
3247 res->steps[step].scale = 1; /* (a << 1) + a */
3250 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3252 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3253 res->steps[step].scale = 1; /* (a << 1) */
3256 res->num_steps = step;
3259 if (! tarval_is_single_bit(t)) {
3260 res->steps[step].transform = SETCC_TR_AND;
3261 res->steps[step].val = v;
3263 res->steps[step].transform = SETCC_TR_NEG;
3265 int v = get_tarval_lowest_bit(t);
3268 res->steps[step].transform = SETCC_TR_SHL;
3269 res->steps[step].scale = v;
3273 res->steps[step].transform = SETCC_TR_SET;
3274 res->num_steps = ++step;
3277 panic("tarval is not long");
3281 * Transforms a Mux node into some code sequence.
3283 * @return The transformed node.
3285 static ir_node *gen_Mux(ir_node *node)
3287 dbg_info *dbgi = get_irn_dbg_info(node);
3288 ir_node *block = get_nodes_block(node);
3289 ir_node *new_block = be_transform_node(block);
3290 ir_node *mux_true = get_Mux_true(node);
3291 ir_node *mux_false = get_Mux_false(node);
3292 ir_node *cond = get_Mux_sel(node);
3293 ir_mode *mode = get_irn_mode(node);
3298 assert(get_irn_mode(cond) == mode_b);
3300 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3301 if (mode_is_float(mode)) {
3302 ir_node *cmp = get_Proj_pred(cond);
3303 ir_node *cmp_left = get_Cmp_left(cmp);
3304 ir_node *cmp_right = get_Cmp_right(cmp);
3305 pn_Cmp pnc = get_Proj_proj(cond);
3307 if (ia32_cg_config.use_sse2) {
3308 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3309 if (cmp_left == mux_true && cmp_right == mux_false) {
3310 /* Mux(a <= b, a, b) => MIN */
3311 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3312 match_commutative | match_am | match_two_users);
3313 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3314 /* Mux(a <= b, b, a) => MAX */
3315 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3316 match_commutative | match_am | match_two_users);
3318 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3319 if (cmp_left == mux_true && cmp_right == mux_false) {
3320 /* Mux(a >= b, a, b) => MAX */
3321 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3322 match_commutative | match_am | match_two_users);
3323 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3324 /* Mux(a >= b, b, a) => MIN */
3325 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3326 match_commutative | match_am | match_two_users);
3331 if (is_Const(mux_true) && is_Const(mux_false)) {
3332 ia32_address_mode_t am;
3337 flags = get_flags_node(cond, &pnc);
3338 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3340 if (ia32_cg_config.use_sse2) {
3341 /* cannot load from different mode on SSE */
3344 /* x87 can load any mode */
3348 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3350 switch (get_mode_size_bytes(new_mode)) {
3360 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3361 set_ia32_am_scale(new_node, 2);
3366 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3367 set_ia32_am_scale(new_node, 1);
3370 /* arg, shift 16 NOT supported */
3372 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3375 panic("Unsupported constant size");
3378 am.ls_mode = new_mode;
3379 am.addr.base = get_symconst_base();
3380 am.addr.index = new_node;
3381 am.addr.mem = nomem;
3383 am.addr.scale = scale;
3384 am.addr.use_frame = 0;
3385 am.addr.frame_entity = NULL;
3386 am.addr.symconst_sign = 0;
3387 am.mem_proj = am.addr.mem;
3388 am.op_type = ia32_AddrModeS;
3391 am.pinned = op_pin_state_floats;
3393 am.ins_permuted = 0;
3395 if (ia32_cg_config.use_sse2)
3396 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3398 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3399 set_am_attributes(load, &am);
3401 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3403 panic("cannot transform floating point Mux");
3406 assert(ia32_mode_needs_gp_reg(mode));
3408 if (is_Proj(cond)) {
3409 ir_node *cmp = get_Proj_pred(cond);
3411 ir_node *cmp_left = get_Cmp_left(cmp);
3412 ir_node *cmp_right = get_Cmp_right(cmp);
3413 pn_Cmp pnc = get_Proj_proj(cond);
3415 /* check for unsigned Doz first */
3416 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3417 is_Const_0(mux_false) && is_Sub(mux_true) &&
3418 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3419 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3420 return create_doz(node, cmp_left, cmp_right);
3421 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3422 is_Const_0(mux_true) && is_Sub(mux_false) &&
3423 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3424 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3425 return create_doz(node, cmp_left, cmp_right);
3430 flags = get_flags_node(cond, &pnc);
3432 if (is_Const(mux_true) && is_Const(mux_false)) {
3433 /* both are const, good */
3434 tarval *tv_true = get_Const_tarval(mux_true);
3435 tarval *tv_false = get_Const_tarval(mux_false);
3436 setcc_transform_t res;
3439 find_const_transform(pnc, tv_true, tv_false, &res);
3441 if (res.permutate_cmp_ins) {
3442 ia32_attr_t *attr = get_ia32_attr(flags);
3443 attr->data.ins_permuted ^= 1;
3445 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3448 switch (res.steps[step].transform) {
3450 imm = ia32_immediate_from_long(res.steps[step].val);
3451 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3453 case SETCC_TR_ADDxx:
3454 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3457 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3458 set_ia32_am_scale(new_node, res.steps[step].scale);
3459 set_ia32_am_offs_int(new_node, res.steps[step].val);
3461 case SETCC_TR_LEAxx:
3462 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3463 set_ia32_am_scale(new_node, res.steps[step].scale);
3464 set_ia32_am_offs_int(new_node, res.steps[step].val);
3467 imm = ia32_immediate_from_long(res.steps[step].scale);
3468 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3471 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3474 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3477 imm = ia32_immediate_from_long(res.steps[step].val);
3478 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3481 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, new_node);
3484 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3487 panic("unknown setcc transform");
3491 new_node = create_CMov(node, cond, flags, pnc);
3499 * Create a conversion from x87 state register to general purpose.
3501 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3503 ir_node *block = be_transform_node(get_nodes_block(node));
3504 ir_node *op = get_Conv_op(node);
3505 ir_node *new_op = be_transform_node(op);
3506 ir_graph *irg = current_ir_graph;
3507 dbg_info *dbgi = get_irn_dbg_info(node);
3508 ir_mode *mode = get_irn_mode(node);
3509 ir_node *fist, *load, *mem;
3511 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3512 set_irn_pinned(fist, op_pin_state_floats);
3513 set_ia32_use_frame(fist);
3514 set_ia32_op_type(fist, ia32_AddrModeD);
3516 assert(get_mode_size_bits(mode) <= 32);
3517 /* exception we can only store signed 32 bit integers, so for unsigned
3518 we store a 64bit (signed) integer and load the lower bits */
3519 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3520 set_ia32_ls_mode(fist, mode_Ls);
3522 set_ia32_ls_mode(fist, mode_Is);
3524 SET_IA32_ORIG_NODE(fist, node);
3527 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3529 set_irn_pinned(load, op_pin_state_floats);
3530 set_ia32_use_frame(load);
3531 set_ia32_op_type(load, ia32_AddrModeS);
3532 set_ia32_ls_mode(load, mode_Is);
3533 if (get_ia32_ls_mode(fist) == mode_Ls) {
3534 ia32_attr_t *attr = get_ia32_attr(load);
3535 attr->data.need_64bit_stackent = 1;
3537 ia32_attr_t *attr = get_ia32_attr(load);
3538 attr->data.need_32bit_stackent = 1;
3540 SET_IA32_ORIG_NODE(load, node);
3542 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3546 * Creates a x87 strict Conv by placing a Store and a Load
3548 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3550 ir_node *block = get_nodes_block(node);
3551 ir_graph *irg = get_Block_irg(block);
3552 dbg_info *dbgi = get_irn_dbg_info(node);
3553 ir_node *frame = get_irg_frame(irg);
3554 ir_node *store, *load;
3557 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3558 set_ia32_use_frame(store);
3559 set_ia32_op_type(store, ia32_AddrModeD);
3560 SET_IA32_ORIG_NODE(store, node);
3562 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3563 set_ia32_use_frame(load);
3564 set_ia32_op_type(load, ia32_AddrModeS);
3565 SET_IA32_ORIG_NODE(load, node);
3567 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3571 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3572 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3574 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3576 func = get_mode_size_bits(mode) == 8 ?
3577 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3578 return func(dbgi, block, base, index, mem, val, mode);
3582 * Create a conversion from general purpose to x87 register
3584 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3586 ir_node *src_block = get_nodes_block(node);
3587 ir_node *block = be_transform_node(src_block);
3588 ir_graph *irg = get_Block_irg(block);
3589 dbg_info *dbgi = get_irn_dbg_info(node);
3590 ir_node *op = get_Conv_op(node);
3591 ir_node *new_op = NULL;
3593 ir_mode *store_mode;
3598 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3599 if (possible_int_mode_for_fp(src_mode)) {
3600 ia32_address_mode_t am;
3602 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3603 if (am.op_type == ia32_AddrModeS) {
3604 ia32_address_t *addr = &am.addr;
3606 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3607 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3609 set_am_attributes(fild, &am);
3610 SET_IA32_ORIG_NODE(fild, node);
3612 fix_mem_proj(fild, &am);
3617 if (new_op == NULL) {
3618 new_op = be_transform_node(op);
3621 mode = get_irn_mode(op);
3623 /* first convert to 32 bit signed if necessary */
3624 if (get_mode_size_bits(src_mode) < 32) {
3625 if (!upper_bits_clean(new_op, src_mode)) {
3626 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3627 SET_IA32_ORIG_NODE(new_op, node);
3632 assert(get_mode_size_bits(mode) == 32);
3635 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3637 set_ia32_use_frame(store);
3638 set_ia32_op_type(store, ia32_AddrModeD);
3639 set_ia32_ls_mode(store, mode_Iu);
3641 /* exception for 32bit unsigned, do a 64bit spill+load */
3642 if (!mode_is_signed(mode)) {
3645 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3647 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3648 noreg_GP, nomem, zero_const);
3650 set_ia32_use_frame(zero_store);
3651 set_ia32_op_type(zero_store, ia32_AddrModeD);
3652 add_ia32_am_offs_int(zero_store, 4);
3653 set_ia32_ls_mode(zero_store, mode_Iu);
3658 store = new_rd_Sync(dbgi, block, 2, in);
3659 store_mode = mode_Ls;
3661 store_mode = mode_Is;
3665 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3667 set_ia32_use_frame(fild);
3668 set_ia32_op_type(fild, ia32_AddrModeS);
3669 set_ia32_ls_mode(fild, store_mode);
3671 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3677 * Create a conversion from one integer mode into another one
3679 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3680 dbg_info *dbgi, ir_node *block, ir_node *op,
3683 ir_node *new_block = be_transform_node(block);
3685 ir_mode *smaller_mode;
3686 ia32_address_mode_t am;
3687 ia32_address_t *addr = &am.addr;
3690 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3691 smaller_mode = src_mode;
3693 smaller_mode = tgt_mode;
3696 #ifdef DEBUG_libfirm
3698 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3703 match_arguments(&am, block, NULL, op, NULL,
3704 match_am | match_8bit_am | match_16bit_am);
3706 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3707 /* unnecessary conv. in theory it shouldn't have been AM */
3708 assert(is_ia32_NoReg_GP(addr->base));
3709 assert(is_ia32_NoReg_GP(addr->index));
3710 assert(is_NoMem(addr->mem));
3711 assert(am.addr.offset == 0);
3712 assert(am.addr.symconst_ent == NULL);
3716 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3717 addr->mem, am.new_op2, smaller_mode);
3718 set_am_attributes(new_node, &am);
3719 /* match_arguments assume that out-mode = in-mode, this isn't true here
3721 set_ia32_ls_mode(new_node, smaller_mode);
3722 SET_IA32_ORIG_NODE(new_node, node);
3723 new_node = fix_mem_proj(new_node, &am);
3728 * Transforms a Conv node.
3730 * @return The created ia32 Conv node
3732 static ir_node *gen_Conv(ir_node *node)
3734 ir_node *block = get_nodes_block(node);
3735 ir_node *new_block = be_transform_node(block);
3736 ir_node *op = get_Conv_op(node);
3737 ir_node *new_op = NULL;
3738 dbg_info *dbgi = get_irn_dbg_info(node);
3739 ir_mode *src_mode = get_irn_mode(op);
3740 ir_mode *tgt_mode = get_irn_mode(node);
3741 int src_bits = get_mode_size_bits(src_mode);
3742 int tgt_bits = get_mode_size_bits(tgt_mode);
3743 ir_node *res = NULL;
3745 assert(!mode_is_int(src_mode) || src_bits <= 32);
3746 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3748 /* modeB -> X should already be lowered by the lower_mode_b pass */
3749 if (src_mode == mode_b) {
3750 panic("ConvB not lowered %+F", node);
3753 if (src_mode == tgt_mode) {
3754 if (get_Conv_strict(node)) {
3755 if (ia32_cg_config.use_sse2) {
3756 /* when we are in SSE mode, we can kill all strict no-op conversion */
3757 return be_transform_node(op);
3760 /* this should be optimized already, but who knows... */
3761 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3762 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3763 return be_transform_node(op);
3767 if (mode_is_float(src_mode)) {
3768 new_op = be_transform_node(op);
3769 /* we convert from float ... */
3770 if (mode_is_float(tgt_mode)) {
3772 if (ia32_cg_config.use_sse2) {
3773 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3774 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3776 set_ia32_ls_mode(res, tgt_mode);
3778 if (get_Conv_strict(node)) {
3779 /* if fp_no_float_fold is not set then we assume that we
3780 * don't have any float operations in a non
3781 * mode_float_arithmetic mode and can skip strict upconvs */
3782 if (src_bits < tgt_bits
3783 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3784 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3787 res = gen_x87_strict_conv(tgt_mode, new_op);
3788 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3792 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3797 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3798 if (ia32_cg_config.use_sse2) {
3799 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3801 set_ia32_ls_mode(res, src_mode);
3803 return gen_x87_fp_to_gp(node);
3807 /* we convert from int ... */
3808 if (mode_is_float(tgt_mode)) {
3810 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3811 if (ia32_cg_config.use_sse2) {
3812 new_op = be_transform_node(op);
3813 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3815 set_ia32_ls_mode(res, tgt_mode);
3817 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3818 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3819 res = gen_x87_gp_to_fp(node, src_mode);
3821 /* we need a strict-Conv, if the int mode has more bits than the
3823 if (float_mantissa < int_mantissa) {
3824 res = gen_x87_strict_conv(tgt_mode, res);
3825 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3829 } else if (tgt_mode == mode_b) {
3830 /* mode_b lowering already took care that we only have 0/1 values */
3831 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3832 src_mode, tgt_mode));
3833 return be_transform_node(op);
3836 if (src_bits == tgt_bits) {
3837 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3838 src_mode, tgt_mode));
3839 return be_transform_node(op);
3842 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3850 static ir_node *create_immediate_or_transform(ir_node *node,
3851 char immediate_constraint_type)
3853 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3854 if (new_node == NULL) {
3855 new_node = be_transform_node(node);
3861 * Transforms a FrameAddr into an ia32 Add.
3863 static ir_node *gen_be_FrameAddr(ir_node *node)
3865 ir_node *block = be_transform_node(get_nodes_block(node));
3866 ir_node *op = be_get_FrameAddr_frame(node);
3867 ir_node *new_op = be_transform_node(op);
3868 dbg_info *dbgi = get_irn_dbg_info(node);
3871 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3872 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3873 set_ia32_use_frame(new_node);
3875 SET_IA32_ORIG_NODE(new_node, node);
3881 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3883 static ir_node *gen_be_Return(ir_node *node)
3885 ir_graph *irg = current_ir_graph;
3886 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3887 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3888 ir_entity *ent = get_irg_entity(irg);
3889 ir_type *tp = get_entity_type(ent);
3894 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3895 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3897 int pn_ret_val, pn_ret_mem, arity, i;
3899 assert(ret_val != NULL);
3900 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3901 return be_duplicate_node(node);
3904 res_type = get_method_res_type(tp, 0);
3906 if (! is_Primitive_type(res_type)) {
3907 return be_duplicate_node(node);
3910 mode = get_type_mode(res_type);
3911 if (! mode_is_float(mode)) {
3912 return be_duplicate_node(node);
3915 assert(get_method_n_ress(tp) == 1);
3917 pn_ret_val = get_Proj_proj(ret_val);
3918 pn_ret_mem = get_Proj_proj(ret_mem);
3920 /* get the Barrier */
3921 barrier = get_Proj_pred(ret_val);
3923 /* get result input of the Barrier */
3924 ret_val = get_irn_n(barrier, pn_ret_val);
3925 new_ret_val = be_transform_node(ret_val);
3927 /* get memory input of the Barrier */
3928 ret_mem = get_irn_n(barrier, pn_ret_mem);
3929 new_ret_mem = be_transform_node(ret_mem);
3931 frame = get_irg_frame(irg);
3933 dbgi = get_irn_dbg_info(barrier);
3934 block = be_transform_node(get_nodes_block(barrier));
3936 /* store xmm0 onto stack */
3937 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3938 new_ret_mem, new_ret_val);
3939 set_ia32_ls_mode(sse_store, mode);
3940 set_ia32_op_type(sse_store, ia32_AddrModeD);
3941 set_ia32_use_frame(sse_store);
3943 /* load into x87 register */
3944 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3945 set_ia32_op_type(fld, ia32_AddrModeS);
3946 set_ia32_use_frame(fld);
3948 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3949 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3951 /* create a new barrier */
3952 arity = get_irn_arity(barrier);
3953 in = ALLOCAN(ir_node*, arity);
3954 for (i = 0; i < arity; ++i) {
3957 if (i == pn_ret_val) {
3959 } else if (i == pn_ret_mem) {
3962 ir_node *in = get_irn_n(barrier, i);
3963 new_in = be_transform_node(in);
3968 new_barrier = new_ir_node(dbgi, irg, block,
3969 get_irn_op(barrier), get_irn_mode(barrier),
3971 copy_node_attr(irg, barrier, new_barrier);
3972 be_duplicate_deps(barrier, new_barrier);
3973 be_set_transformed_node(barrier, new_barrier);
3975 /* transform normally */
3976 return be_duplicate_node(node);
3980 * Transform a be_AddSP into an ia32_SubSP.
3982 static ir_node *gen_be_AddSP(ir_node *node)
3984 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3985 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3987 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3988 match_am | match_immediate);
3992 * Transform a be_SubSP into an ia32_AddSP
3994 static ir_node *gen_be_SubSP(ir_node *node)
3996 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3997 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3999 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4000 match_am | match_immediate);
4004 * Change some phi modes
4006 static ir_node *gen_Phi(ir_node *node)
4008 const arch_register_req_t *req;
4009 ir_node *block = be_transform_node(get_nodes_block(node));
4010 ir_graph *irg = current_ir_graph;
4011 dbg_info *dbgi = get_irn_dbg_info(node);
4012 ir_mode *mode = get_irn_mode(node);
4015 if (ia32_mode_needs_gp_reg(mode)) {
4016 /* we shouldn't have any 64bit stuff around anymore */
4017 assert(get_mode_size_bits(mode) <= 32);
4018 /* all integer operations are on 32bit registers now */
4020 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4021 } else if (mode_is_float(mode)) {
4022 if (ia32_cg_config.use_sse2) {
4024 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4027 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4030 req = arch_no_register_req;
4033 /* phi nodes allow loops, so we use the old arguments for now
4034 * and fix this later */
4035 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4036 get_irn_in(node) + 1);
4037 copy_node_attr(irg, node, phi);
4038 be_duplicate_deps(node, phi);
4040 arch_set_out_register_req(phi, 0, req);
4042 be_enqueue_preds(node);
4047 static ir_node *gen_Jmp(ir_node *node)
4049 ir_node *block = get_nodes_block(node);
4050 ir_node *new_block = be_transform_node(block);
4051 dbg_info *dbgi = get_irn_dbg_info(node);
4054 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4055 SET_IA32_ORIG_NODE(new_node, node);
4063 static ir_node *gen_IJmp(ir_node *node)
4065 ir_node *block = get_nodes_block(node);
4066 ir_node *new_block = be_transform_node(block);
4067 dbg_info *dbgi = get_irn_dbg_info(node);
4068 ir_node *op = get_IJmp_target(node);
4070 ia32_address_mode_t am;
4071 ia32_address_t *addr = &am.addr;
4073 assert(get_irn_mode(op) == mode_P);
4075 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4077 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4078 addr->mem, am.new_op2);
4079 set_am_attributes(new_node, &am);
4080 SET_IA32_ORIG_NODE(new_node, node);
4082 new_node = fix_mem_proj(new_node, &am);
4088 * Transform a Bound node.
4090 static ir_node *gen_Bound(ir_node *node)
4093 ir_node *lower = get_Bound_lower(node);
4094 dbg_info *dbgi = get_irn_dbg_info(node);
4096 if (is_Const_0(lower)) {
4097 /* typical case for Java */
4098 ir_node *sub, *res, *flags, *block;
4100 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4101 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
4103 block = get_nodes_block(res);
4104 if (! is_Proj(res)) {
4106 set_irn_mode(sub, mode_T);
4107 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4109 sub = get_Proj_pred(res);
4111 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4112 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4113 SET_IA32_ORIG_NODE(new_node, node);
4115 panic("generic Bound not supported in ia32 Backend");
4121 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4123 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4124 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4126 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4127 match_immediate | match_mode_neutral);
4130 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4132 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4133 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4134 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4138 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4140 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4141 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4142 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4146 static ir_node *gen_ia32_l_Add(ir_node *node)
4148 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4149 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4150 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4151 match_commutative | match_am | match_immediate |
4152 match_mode_neutral);
4154 if (is_Proj(lowered)) {
4155 lowered = get_Proj_pred(lowered);
4157 assert(is_ia32_Add(lowered));
4158 set_irn_mode(lowered, mode_T);
4164 static ir_node *gen_ia32_l_Adc(ir_node *node)
4166 return gen_binop_flags(node, new_bd_ia32_Adc,
4167 match_commutative | match_am | match_immediate |
4168 match_mode_neutral);
4172 * Transforms a l_MulS into a "real" MulS node.
4174 * @return the created ia32 Mul node
4176 static ir_node *gen_ia32_l_Mul(ir_node *node)
4178 ir_node *left = get_binop_left(node);
4179 ir_node *right = get_binop_right(node);
4181 return gen_binop(node, left, right, new_bd_ia32_Mul,
4182 match_commutative | match_am | match_mode_neutral);
4186 * Transforms a l_IMulS into a "real" IMul1OPS node.
4188 * @return the created ia32 IMul1OP node
4190 static ir_node *gen_ia32_l_IMul(ir_node *node)
4192 ir_node *left = get_binop_left(node);
4193 ir_node *right = get_binop_right(node);
4195 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4196 match_commutative | match_am | match_mode_neutral);
4199 static ir_node *gen_ia32_l_Sub(ir_node *node)
4201 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4202 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4203 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4204 match_am | match_immediate | match_mode_neutral);
4206 if (is_Proj(lowered)) {
4207 lowered = get_Proj_pred(lowered);
4209 assert(is_ia32_Sub(lowered));
4210 set_irn_mode(lowered, mode_T);
4216 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4218 return gen_binop_flags(node, new_bd_ia32_Sbb,
4219 match_am | match_immediate | match_mode_neutral);
4223 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4224 * op1 - target to be shifted
4225 * op2 - contains bits to be shifted into target
4227 * Only op3 can be an immediate.
4229 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4230 ir_node *low, ir_node *count)
4232 ir_node *block = get_nodes_block(node);
4233 ir_node *new_block = be_transform_node(block);
4234 dbg_info *dbgi = get_irn_dbg_info(node);
4235 ir_node *new_high = be_transform_node(high);
4236 ir_node *new_low = be_transform_node(low);
4240 /* the shift amount can be any mode that is bigger than 5 bits, since all
4241 * other bits are ignored anyway */
4242 while (is_Conv(count) &&
4243 get_irn_n_edges(count) == 1 &&
4244 mode_is_int(get_irn_mode(count))) {
4245 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4246 count = get_Conv_op(count);
4248 new_count = create_immediate_or_transform(count, 0);
4250 if (is_ia32_l_ShlD(node)) {
4251 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4254 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4257 SET_IA32_ORIG_NODE(new_node, node);
4262 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4264 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4265 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4266 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4267 return gen_lowered_64bit_shifts(node, high, low, count);
4270 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4272 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4273 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4274 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4275 return gen_lowered_64bit_shifts(node, high, low, count);
4278 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4280 ir_node *src_block = get_nodes_block(node);
4281 ir_node *block = be_transform_node(src_block);
4282 ir_graph *irg = current_ir_graph;
4283 dbg_info *dbgi = get_irn_dbg_info(node);
4284 ir_node *frame = get_irg_frame(irg);
4285 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4286 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4287 ir_node *new_val_low = be_transform_node(val_low);
4288 ir_node *new_val_high = be_transform_node(val_high);
4290 ir_node *sync, *fild, *res;
4291 ir_node *store_low, *store_high;
4293 if (ia32_cg_config.use_sse2) {
4294 panic("ia32_l_LLtoFloat not implemented for SSE2");
4298 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4300 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4302 SET_IA32_ORIG_NODE(store_low, node);
4303 SET_IA32_ORIG_NODE(store_high, node);
4305 set_ia32_use_frame(store_low);
4306 set_ia32_use_frame(store_high);
4307 set_ia32_op_type(store_low, ia32_AddrModeD);
4308 set_ia32_op_type(store_high, ia32_AddrModeD);
4309 set_ia32_ls_mode(store_low, mode_Iu);
4310 set_ia32_ls_mode(store_high, mode_Is);
4311 add_ia32_am_offs_int(store_high, 4);
4315 sync = new_rd_Sync(dbgi, block, 2, in);
4318 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4320 set_ia32_use_frame(fild);
4321 set_ia32_op_type(fild, ia32_AddrModeS);
4322 set_ia32_ls_mode(fild, mode_Ls);
4324 SET_IA32_ORIG_NODE(fild, node);
4326 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4328 if (! mode_is_signed(get_irn_mode(val_high))) {
4329 ia32_address_mode_t am;
4331 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4334 am.addr.base = get_symconst_base();
4335 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4336 am.addr.mem = nomem;
4339 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4340 am.addr.use_frame = 0;
4341 am.addr.frame_entity = NULL;
4342 am.addr.symconst_sign = 0;
4343 am.ls_mode = mode_F;
4344 am.mem_proj = nomem;
4345 am.op_type = ia32_AddrModeS;
4347 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4348 am.pinned = op_pin_state_floats;
4350 am.ins_permuted = 0;
4352 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4353 am.new_op1, am.new_op2, get_fpcw());
4354 set_am_attributes(fadd, &am);
4356 set_irn_mode(fadd, mode_T);
4357 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4362 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4364 ir_node *src_block = get_nodes_block(node);
4365 ir_node *block = be_transform_node(src_block);
4366 ir_graph *irg = get_Block_irg(block);
4367 dbg_info *dbgi = get_irn_dbg_info(node);
4368 ir_node *frame = get_irg_frame(irg);
4369 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4370 ir_node *new_val = be_transform_node(val);
4371 ir_node *fist, *mem;
4373 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4374 SET_IA32_ORIG_NODE(fist, node);
4375 set_ia32_use_frame(fist);
4376 set_ia32_op_type(fist, ia32_AddrModeD);
4377 set_ia32_ls_mode(fist, mode_Ls);
4383 * the BAD transformer.
4385 static ir_node *bad_transform(ir_node *node)
4387 panic("No transform function for %+F available.", node);
4390 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4392 ir_node *block = be_transform_node(get_nodes_block(node));
4393 ir_graph *irg = get_Block_irg(block);
4394 ir_node *pred = get_Proj_pred(node);
4395 ir_node *new_pred = be_transform_node(pred);
4396 ir_node *frame = get_irg_frame(irg);
4397 dbg_info *dbgi = get_irn_dbg_info(node);
4398 long pn = get_Proj_proj(node);
4403 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4404 SET_IA32_ORIG_NODE(load, node);
4405 set_ia32_use_frame(load);
4406 set_ia32_op_type(load, ia32_AddrModeS);
4407 set_ia32_ls_mode(load, mode_Iu);
4408 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4409 * 32 bit from it with this particular load */
4410 attr = get_ia32_attr(load);
4411 attr->data.need_64bit_stackent = 1;
4413 if (pn == pn_ia32_l_FloattoLL_res_high) {
4414 add_ia32_am_offs_int(load, 4);
4416 assert(pn == pn_ia32_l_FloattoLL_res_low);
4419 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4425 * Transform the Projs of an AddSP.
4427 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4429 ir_node *pred = get_Proj_pred(node);
4430 ir_node *new_pred = be_transform_node(pred);
4431 dbg_info *dbgi = get_irn_dbg_info(node);
4432 long proj = get_Proj_proj(node);
4434 if (proj == pn_be_AddSP_sp) {
4435 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4436 pn_ia32_SubSP_stack);
4437 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4439 } else if (proj == pn_be_AddSP_res) {
4440 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4441 pn_ia32_SubSP_addr);
4442 } else if (proj == pn_be_AddSP_M) {
4443 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4446 panic("No idea how to transform proj->AddSP");
4450 * Transform the Projs of a SubSP.
4452 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4454 ir_node *pred = get_Proj_pred(node);
4455 ir_node *new_pred = be_transform_node(pred);
4456 dbg_info *dbgi = get_irn_dbg_info(node);
4457 long proj = get_Proj_proj(node);
4459 if (proj == pn_be_SubSP_sp) {
4460 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4461 pn_ia32_AddSP_stack);
4462 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4464 } else if (proj == pn_be_SubSP_M) {
4465 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4468 panic("No idea how to transform proj->SubSP");
4472 * Transform and renumber the Projs from a Load.
4474 static ir_node *gen_Proj_Load(ir_node *node)
4477 ir_node *block = be_transform_node(get_nodes_block(node));
4478 ir_node *pred = get_Proj_pred(node);
4479 dbg_info *dbgi = get_irn_dbg_info(node);
4480 long proj = get_Proj_proj(node);
4482 /* loads might be part of source address mode matches, so we don't
4483 * transform the ProjMs yet (with the exception of loads whose result is
4486 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4489 /* this is needed, because sometimes we have loops that are only
4490 reachable through the ProjM */
4491 be_enqueue_preds(node);
4492 /* do it in 2 steps, to silence firm verifier */
4493 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4494 set_Proj_proj(res, pn_ia32_mem);
4498 /* renumber the proj */
4499 new_pred = be_transform_node(pred);
4500 if (is_ia32_Load(new_pred)) {
4503 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4505 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4506 case pn_Load_X_regular:
4507 return new_rd_Jmp(dbgi, block);
4508 case pn_Load_X_except:
4509 /* This Load might raise an exception. Mark it. */
4510 set_ia32_exc_label(new_pred, 1);
4511 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4515 } else if (is_ia32_Conv_I2I(new_pred) ||
4516 is_ia32_Conv_I2I8Bit(new_pred)) {
4517 set_irn_mode(new_pred, mode_T);
4518 if (proj == pn_Load_res) {
4519 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4520 } else if (proj == pn_Load_M) {
4521 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4523 } else if (is_ia32_xLoad(new_pred)) {
4526 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4528 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4529 case pn_Load_X_regular:
4530 return new_rd_Jmp(dbgi, block);
4531 case pn_Load_X_except:
4532 /* This Load might raise an exception. Mark it. */
4533 set_ia32_exc_label(new_pred, 1);
4534 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4538 } else if (is_ia32_vfld(new_pred)) {
4541 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4543 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4544 case pn_Load_X_regular:
4545 return new_rd_Jmp(dbgi, block);
4546 case pn_Load_X_except:
4547 /* This Load might raise an exception. Mark it. */
4548 set_ia32_exc_label(new_pred, 1);
4549 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4554 /* can happen for ProJMs when source address mode happened for the
4557 /* however it should not be the result proj, as that would mean the
4558 load had multiple users and should not have been used for
4560 if (proj != pn_Load_M) {
4561 panic("internal error: transformed node not a Load");
4563 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4566 panic("No idea how to transform proj");
4570 * Transform and renumber the Projs from a DivMod like instruction.
4572 static ir_node *gen_Proj_DivMod(ir_node *node)
4574 ir_node *block = be_transform_node(get_nodes_block(node));
4575 ir_node *pred = get_Proj_pred(node);
4576 ir_node *new_pred = be_transform_node(pred);
4577 dbg_info *dbgi = get_irn_dbg_info(node);
4578 long proj = get_Proj_proj(node);
4580 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4582 switch (get_irn_opcode(pred)) {
4586 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4588 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4589 case pn_Div_X_regular:
4590 return new_rd_Jmp(dbgi, block);
4591 case pn_Div_X_except:
4592 set_ia32_exc_label(new_pred, 1);
4593 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4601 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4603 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4604 case pn_Mod_X_except:
4605 set_ia32_exc_label(new_pred, 1);
4606 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4614 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4615 case pn_DivMod_res_div:
4616 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4617 case pn_DivMod_res_mod:
4618 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4619 case pn_DivMod_X_regular:
4620 return new_rd_Jmp(dbgi, block);
4621 case pn_DivMod_X_except:
4622 set_ia32_exc_label(new_pred, 1);
4623 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4632 panic("No idea how to transform proj->DivMod");
4636 * Transform and renumber the Projs from a CopyB.
4638 static ir_node *gen_Proj_CopyB(ir_node *node)
4640 ir_node *pred = get_Proj_pred(node);
4641 ir_node *new_pred = be_transform_node(pred);
4642 dbg_info *dbgi = get_irn_dbg_info(node);
4643 long proj = get_Proj_proj(node);
4646 case pn_CopyB_M_regular:
4647 if (is_ia32_CopyB_i(new_pred)) {
4648 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4649 } else if (is_ia32_CopyB(new_pred)) {
4650 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4657 panic("No idea how to transform proj->CopyB");
4661 * Transform and renumber the Projs from a Quot.
4663 static ir_node *gen_Proj_Quot(ir_node *node)
4665 ir_node *pred = get_Proj_pred(node);
4666 ir_node *new_pred = be_transform_node(pred);
4667 dbg_info *dbgi = get_irn_dbg_info(node);
4668 long proj = get_Proj_proj(node);
4672 if (is_ia32_xDiv(new_pred)) {
4673 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4674 } else if (is_ia32_vfdiv(new_pred)) {
4675 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4679 if (is_ia32_xDiv(new_pred)) {
4680 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4681 } else if (is_ia32_vfdiv(new_pred)) {
4682 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4685 case pn_Quot_X_regular:
4686 case pn_Quot_X_except:
4691 panic("No idea how to transform proj->Quot");
4694 static ir_node *gen_be_Call(ir_node *node)
4696 dbg_info *const dbgi = get_irn_dbg_info(node);
4697 ir_node *const src_block = get_nodes_block(node);
4698 ir_node *const block = be_transform_node(src_block);
4699 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4700 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4701 ir_node *const sp = be_transform_node(src_sp);
4702 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4703 ia32_address_mode_t am;
4704 ia32_address_t *const addr = &am.addr;
4709 ir_node * eax = noreg_GP;
4710 ir_node * ecx = noreg_GP;
4711 ir_node * edx = noreg_GP;
4712 unsigned const pop = be_Call_get_pop(node);
4713 ir_type *const call_tp = be_Call_get_type(node);
4714 int old_no_pic_adjust;
4716 /* Run the x87 simulator if the call returns a float value */
4717 if (get_method_n_ress(call_tp) > 0) {
4718 ir_type *const res_type = get_method_res_type(call_tp, 0);
4719 ir_mode *const res_mode = get_type_mode(res_type);
4721 if (res_mode != NULL && mode_is_float(res_mode)) {
4722 env_cg->do_x87_sim = 1;
4726 /* We do not want be_Call direct calls */
4727 assert(be_Call_get_entity(node) == NULL);
4729 /* special case for PIC trampoline calls */
4730 old_no_pic_adjust = no_pic_adjust;
4731 no_pic_adjust = env_cg->birg->main_env->options->pic;
4733 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4734 match_am | match_immediate);
4736 no_pic_adjust = old_no_pic_adjust;
4738 i = get_irn_arity(node) - 1;
4739 fpcw = be_transform_node(get_irn_n(node, i--));
4740 for (; i >= be_pos_Call_first_arg; --i) {
4741 arch_register_req_t const *const req = arch_get_register_req(node, i);
4742 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4744 assert(req->type == arch_register_req_type_limited);
4745 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4747 switch (*req->limited) {
4748 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4749 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4750 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4751 default: panic("Invalid GP register for register parameter");
4755 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4756 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4757 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4758 set_am_attributes(call, &am);
4759 call = fix_mem_proj(call, &am);
4761 if (get_irn_pinned(node) == op_pin_state_pinned)
4762 set_irn_pinned(call, op_pin_state_pinned);
4764 SET_IA32_ORIG_NODE(call, node);
4766 if (ia32_cg_config.use_sse2) {
4767 /* remember this call for post-processing */
4768 ARR_APP1(ir_node *, call_list, call);
4769 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4776 * Transform Builtin trap
4778 static ir_node *gen_trap(ir_node *node)
4780 dbg_info *dbgi = get_irn_dbg_info(node);
4781 ir_node *block = be_transform_node(get_nodes_block(node));
4782 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4784 return new_bd_ia32_UD2(dbgi, block, mem);
4788 * Transform Builtin debugbreak
4790 static ir_node *gen_debugbreak(ir_node *node)
4792 dbg_info *dbgi = get_irn_dbg_info(node);
4793 ir_node *block = be_transform_node(get_nodes_block(node));
4794 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4796 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4800 * Transform Builtin return_address
4802 static ir_node *gen_return_address(ir_node *node)
4804 ir_node *param = get_Builtin_param(node, 0);
4805 ir_node *frame = get_Builtin_param(node, 1);
4806 dbg_info *dbgi = get_irn_dbg_info(node);
4807 tarval *tv = get_Const_tarval(param);
4808 unsigned long value = get_tarval_long(tv);
4810 ir_node *block = be_transform_node(get_nodes_block(node));
4811 ir_node *ptr = be_transform_node(frame);
4815 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4816 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4817 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4820 /* load the return address from this frame */
4821 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4823 set_irn_pinned(load, get_irn_pinned(node));
4824 set_ia32_op_type(load, ia32_AddrModeS);
4825 set_ia32_ls_mode(load, mode_Iu);
4827 set_ia32_am_offs_int(load, 0);
4828 set_ia32_use_frame(load);
4829 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4831 if (get_irn_pinned(node) == op_pin_state_floats) {
4832 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4833 && pn_ia32_vfld_res == pn_ia32_Load_res
4834 && pn_ia32_Load_res == pn_ia32_res);
4835 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4838 SET_IA32_ORIG_NODE(load, node);
4839 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4843 * Transform Builtin frame_address
4845 static ir_node *gen_frame_address(ir_node *node)
4847 ir_node *param = get_Builtin_param(node, 0);
4848 ir_node *frame = get_Builtin_param(node, 1);
4849 dbg_info *dbgi = get_irn_dbg_info(node);
4850 tarval *tv = get_Const_tarval(param);
4851 unsigned long value = get_tarval_long(tv);
4853 ir_node *block = be_transform_node(get_nodes_block(node));
4854 ir_node *ptr = be_transform_node(frame);
4859 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4860 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4861 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4864 /* load the frame address from this frame */
4865 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4867 set_irn_pinned(load, get_irn_pinned(node));
4868 set_ia32_op_type(load, ia32_AddrModeS);
4869 set_ia32_ls_mode(load, mode_Iu);
4871 ent = ia32_get_frame_address_entity();
4873 set_ia32_am_offs_int(load, 0);
4874 set_ia32_use_frame(load);
4875 set_ia32_frame_ent(load, ent);
4877 /* will fail anyway, but gcc does this: */
4878 set_ia32_am_offs_int(load, 0);
4881 if (get_irn_pinned(node) == op_pin_state_floats) {
4882 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4883 && pn_ia32_vfld_res == pn_ia32_Load_res
4884 && pn_ia32_Load_res == pn_ia32_res);
4885 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4888 SET_IA32_ORIG_NODE(load, node);
4889 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4893 * Transform Builtin frame_address
4895 static ir_node *gen_prefetch(ir_node *node)
4898 ir_node *ptr, *block, *mem, *base, *index;
4899 ir_node *param, *new_node;
4902 ia32_address_t addr;
4904 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4905 /* no prefetch at all, route memory */
4906 return be_transform_node(get_Builtin_mem(node));
4909 param = get_Builtin_param(node, 1);
4910 tv = get_Const_tarval(param);
4911 rw = get_tarval_long(tv);
4913 /* construct load address */
4914 memset(&addr, 0, sizeof(addr));
4915 ptr = get_Builtin_param(node, 0);
4916 ia32_create_address_mode(&addr, ptr, 0);
4923 base = be_transform_node(base);
4926 if (index == NULL) {
4929 index = be_transform_node(index);
4932 dbgi = get_irn_dbg_info(node);
4933 block = be_transform_node(get_nodes_block(node));
4934 mem = be_transform_node(get_Builtin_mem(node));
4936 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4937 /* we have 3DNow!, this was already checked above */
4938 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4939 } else if (ia32_cg_config.use_sse_prefetch) {
4940 /* note: rw == 1 is IGNORED in that case */
4941 param = get_Builtin_param(node, 2);
4942 tv = get_Const_tarval(param);
4943 locality = get_tarval_long(tv);
4945 /* SSE style prefetch */
4948 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4951 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4954 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4957 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4961 assert(ia32_cg_config.use_3dnow_prefetch);
4962 /* 3DNow! style prefetch */
4963 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4966 set_irn_pinned(new_node, get_irn_pinned(node));
4967 set_ia32_op_type(new_node, ia32_AddrModeS);
4968 set_ia32_ls_mode(new_node, mode_Bu);
4969 set_address(new_node, &addr);
4971 SET_IA32_ORIG_NODE(new_node, node);
4973 be_dep_on_frame(new_node);
4974 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4978 * Transform bsf like node
4980 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4982 ir_node *param = get_Builtin_param(node, 0);
4983 dbg_info *dbgi = get_irn_dbg_info(node);
4985 ir_node *block = get_nodes_block(node);
4986 ir_node *new_block = be_transform_node(block);
4988 ia32_address_mode_t am;
4989 ia32_address_t *addr = &am.addr;
4992 match_arguments(&am, block, NULL, param, NULL, match_am);
4994 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4995 set_am_attributes(cnt, &am);
4996 set_ia32_ls_mode(cnt, get_irn_mode(param));
4998 SET_IA32_ORIG_NODE(cnt, node);
4999 return fix_mem_proj(cnt, &am);
5003 * Transform builtin ffs.
5005 static ir_node *gen_ffs(ir_node *node)
5007 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5008 ir_node *real = skip_Proj(bsf);
5009 dbg_info *dbgi = get_irn_dbg_info(real);
5010 ir_node *block = get_nodes_block(real);
5011 ir_node *flag, *set, *conv, *neg, *or;
5014 if (get_irn_mode(real) != mode_T) {
5015 set_irn_mode(real, mode_T);
5016 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5019 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5022 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5023 SET_IA32_ORIG_NODE(set, node);
5026 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5027 SET_IA32_ORIG_NODE(conv, node);
5030 neg = new_bd_ia32_Neg(dbgi, block, conv);
5033 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5034 set_ia32_commutative(or);
5037 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5041 * Transform builtin clz.
5043 static ir_node *gen_clz(ir_node *node)
5045 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5046 ir_node *real = skip_Proj(bsr);
5047 dbg_info *dbgi = get_irn_dbg_info(real);
5048 ir_node *block = get_nodes_block(real);
5049 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5051 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5055 * Transform builtin ctz.
5057 static ir_node *gen_ctz(ir_node *node)
5059 return gen_unop_AM(node, new_bd_ia32_Bsf);
5063 * Transform builtin parity.
5065 static ir_node *gen_parity(ir_node *node)
5067 ir_node *param = get_Builtin_param(node, 0);
5068 dbg_info *dbgi = get_irn_dbg_info(node);
5070 ir_node *block = get_nodes_block(node);
5072 ir_node *new_block = be_transform_node(block);
5073 ir_node *imm, *cmp, *new_node;
5075 ia32_address_mode_t am;
5076 ia32_address_t *addr = &am.addr;
5080 match_arguments(&am, block, NULL, param, NULL, match_am);
5081 imm = ia32_create_Immediate(NULL, 0, 0);
5082 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5083 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5084 set_am_attributes(cmp, &am);
5085 set_ia32_ls_mode(cmp, mode_Iu);
5087 SET_IA32_ORIG_NODE(cmp, node);
5089 cmp = fix_mem_proj(cmp, &am);
5092 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5093 SET_IA32_ORIG_NODE(new_node, node);
5096 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5097 nomem, new_node, mode_Bu);
5098 SET_IA32_ORIG_NODE(new_node, node);
5103 * Transform builtin popcount
5105 static ir_node *gen_popcount(ir_node *node)
5107 ir_node *param = get_Builtin_param(node, 0);
5108 dbg_info *dbgi = get_irn_dbg_info(node);
5110 ir_node *block = get_nodes_block(node);
5111 ir_node *new_block = be_transform_node(block);
5114 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5116 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5117 if (ia32_cg_config.use_popcnt) {
5118 ia32_address_mode_t am;
5119 ia32_address_t *addr = &am.addr;
5122 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5124 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5125 set_am_attributes(cnt, &am);
5126 set_ia32_ls_mode(cnt, get_irn_mode(param));
5128 SET_IA32_ORIG_NODE(cnt, node);
5129 return fix_mem_proj(cnt, &am);
5132 new_param = be_transform_node(param);
5134 /* do the standard popcount algo */
5136 /* m1 = x & 0x55555555 */
5137 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5138 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5141 simm = ia32_create_Immediate(NULL, 0, 1);
5142 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5144 /* m2 = s1 & 0x55555555 */
5145 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5148 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5150 /* m4 = m3 & 0x33333333 */
5151 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5152 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5155 simm = ia32_create_Immediate(NULL, 0, 2);
5156 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5158 /* m5 = s2 & 0x33333333 */
5159 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5162 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5164 /* m7 = m6 & 0x0F0F0F0F */
5165 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5166 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5169 simm = ia32_create_Immediate(NULL, 0, 4);
5170 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5172 /* m8 = s3 & 0x0F0F0F0F */
5173 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5176 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5178 /* m10 = m9 & 0x00FF00FF */
5179 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5180 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5183 simm = ia32_create_Immediate(NULL, 0, 8);
5184 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5186 /* m11 = s4 & 0x00FF00FF */
5187 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5189 /* m12 = m10 + m11 */
5190 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5192 /* m13 = m12 & 0x0000FFFF */
5193 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5194 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5196 /* s5 = m12 >> 16 */
5197 simm = ia32_create_Immediate(NULL, 0, 16);
5198 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5200 /* res = m13 + s5 */
5201 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5205 * Transform builtin byte swap.
5207 static ir_node *gen_bswap(ir_node *node)
5209 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5210 dbg_info *dbgi = get_irn_dbg_info(node);
5212 ir_node *block = get_nodes_block(node);
5213 ir_node *new_block = be_transform_node(block);
5214 ir_mode *mode = get_irn_mode(param);
5215 unsigned size = get_mode_size_bits(mode);
5216 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5220 if (ia32_cg_config.use_i486) {
5221 /* swap available */
5222 return new_bd_ia32_Bswap(dbgi, new_block, param);
5224 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5225 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5227 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5228 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5230 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5232 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5233 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5235 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5236 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5239 /* swap16 always available */
5240 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5243 panic("Invalid bswap size (%d)", size);
5248 * Transform builtin outport.
5250 static ir_node *gen_outport(ir_node *node)
5252 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5253 ir_node *oldv = get_Builtin_param(node, 1);
5254 ir_mode *mode = get_irn_mode(oldv);
5255 ir_node *value = be_transform_node(oldv);
5256 ir_node *block = be_transform_node(get_nodes_block(node));
5257 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5258 dbg_info *dbgi = get_irn_dbg_info(node);
5260 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5261 set_ia32_ls_mode(res, mode);
5266 * Transform builtin inport.
5268 static ir_node *gen_inport(ir_node *node)
5270 ir_type *tp = get_Builtin_type(node);
5271 ir_type *rstp = get_method_res_type(tp, 0);
5272 ir_mode *mode = get_type_mode(rstp);
5273 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5274 ir_node *block = be_transform_node(get_nodes_block(node));
5275 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5276 dbg_info *dbgi = get_irn_dbg_info(node);
5278 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5279 set_ia32_ls_mode(res, mode);
5281 /* check for missing Result Proj */
5286 * Transform a builtin inner trampoline
5288 static ir_node *gen_inner_trampoline(ir_node *node)
5290 ir_node *ptr = get_Builtin_param(node, 0);
5291 ir_node *callee = get_Builtin_param(node, 1);
5292 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5293 ir_node *mem = get_Builtin_mem(node);
5294 ir_node *block = get_nodes_block(node);
5295 ir_node *new_block = be_transform_node(block);
5299 ir_node *trampoline;
5301 dbg_info *dbgi = get_irn_dbg_info(node);
5302 ia32_address_t addr;
5304 /* construct store address */
5305 memset(&addr, 0, sizeof(addr));
5306 ia32_create_address_mode(&addr, ptr, 0);
5308 if (addr.base == NULL) {
5309 addr.base = noreg_GP;
5311 addr.base = be_transform_node(addr.base);
5314 if (addr.index == NULL) {
5315 addr.index = noreg_GP;
5317 addr.index = be_transform_node(addr.index);
5319 addr.mem = be_transform_node(mem);
5321 /* mov ecx, <env> */
5322 val = ia32_create_Immediate(NULL, 0, 0xB9);
5323 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5324 addr.index, addr.mem, val);
5325 set_irn_pinned(store, get_irn_pinned(node));
5326 set_ia32_op_type(store, ia32_AddrModeD);
5327 set_ia32_ls_mode(store, mode_Bu);
5328 set_address(store, &addr);
5332 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5333 addr.index, addr.mem, env);
5334 set_irn_pinned(store, get_irn_pinned(node));
5335 set_ia32_op_type(store, ia32_AddrModeD);
5336 set_ia32_ls_mode(store, mode_Iu);
5337 set_address(store, &addr);
5341 /* jmp rel <callee> */
5342 val = ia32_create_Immediate(NULL, 0, 0xE9);
5343 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5344 addr.index, addr.mem, val);
5345 set_irn_pinned(store, get_irn_pinned(node));
5346 set_ia32_op_type(store, ia32_AddrModeD);
5347 set_ia32_ls_mode(store, mode_Bu);
5348 set_address(store, &addr);
5352 trampoline = be_transform_node(ptr);
5354 /* the callee is typically an immediate */
5355 if (is_SymConst(callee)) {
5356 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5358 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5360 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5362 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5363 addr.index, addr.mem, rel);
5364 set_irn_pinned(store, get_irn_pinned(node));
5365 set_ia32_op_type(store, ia32_AddrModeD);
5366 set_ia32_ls_mode(store, mode_Iu);
5367 set_address(store, &addr);
5372 return new_r_Tuple(new_block, 2, in);
5376 * Transform Builtin node.
5378 static ir_node *gen_Builtin(ir_node *node)
5380 ir_builtin_kind kind = get_Builtin_kind(node);
5384 return gen_trap(node);
5385 case ir_bk_debugbreak:
5386 return gen_debugbreak(node);
5387 case ir_bk_return_address:
5388 return gen_return_address(node);
5389 case ir_bk_frame_address:
5390 return gen_frame_address(node);
5391 case ir_bk_prefetch:
5392 return gen_prefetch(node);
5394 return gen_ffs(node);
5396 return gen_clz(node);
5398 return gen_ctz(node);
5400 return gen_parity(node);
5401 case ir_bk_popcount:
5402 return gen_popcount(node);
5404 return gen_bswap(node);
5406 return gen_outport(node);
5408 return gen_inport(node);
5409 case ir_bk_inner_trampoline:
5410 return gen_inner_trampoline(node);
5412 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5416 * Transform Proj(Builtin) node.
5418 static ir_node *gen_Proj_Builtin(ir_node *proj)
5420 ir_node *node = get_Proj_pred(proj);
5421 ir_node *new_node = be_transform_node(node);
5422 ir_builtin_kind kind = get_Builtin_kind(node);
5425 case ir_bk_return_address:
5426 case ir_bk_frame_address:
5431 case ir_bk_popcount:
5433 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5436 case ir_bk_debugbreak:
5437 case ir_bk_prefetch:
5439 assert(get_Proj_proj(proj) == pn_Builtin_M);
5442 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5443 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5445 assert(get_Proj_proj(proj) == pn_Builtin_M);
5446 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5448 case ir_bk_inner_trampoline:
5449 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5450 return get_Tuple_pred(new_node, 1);
5452 assert(get_Proj_proj(proj) == pn_Builtin_M);
5453 return get_Tuple_pred(new_node, 0);
5456 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5459 static ir_node *gen_be_IncSP(ir_node *node)
5461 ir_node *res = be_duplicate_node(node);
5462 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5468 * Transform the Projs from a be_Call.
5470 static ir_node *gen_Proj_be_Call(ir_node *node)
5472 ir_node *call = get_Proj_pred(node);
5473 ir_node *new_call = be_transform_node(call);
5474 dbg_info *dbgi = get_irn_dbg_info(node);
5475 long proj = get_Proj_proj(node);
5476 ir_mode *mode = get_irn_mode(node);
5479 if (proj == pn_be_Call_M_regular) {
5480 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5482 /* transform call modes */
5483 if (mode_is_data(mode)) {
5484 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5488 /* Map from be_Call to ia32_Call proj number */
5489 if (proj == pn_be_Call_sp) {
5490 proj = pn_ia32_Call_stack;
5491 } else if (proj == pn_be_Call_M_regular) {
5492 proj = pn_ia32_Call_M;
5494 arch_register_req_t const *const req = arch_get_register_req_out(node);
5495 int const n_outs = arch_irn_get_n_outs(new_call);
5498 assert(proj >= pn_be_Call_first_res);
5499 assert(req->type & arch_register_req_type_limited);
5501 for (i = 0; i < n_outs; ++i) {
5502 arch_register_req_t const *const new_req
5503 = arch_get_out_register_req(new_call, i);
5505 if (!(new_req->type & arch_register_req_type_limited) ||
5506 new_req->cls != req->cls ||
5507 *new_req->limited != *req->limited)
5516 res = new_rd_Proj(dbgi, new_call, mode, proj);
5518 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5520 case pn_ia32_Call_stack:
5521 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5524 case pn_ia32_Call_fpcw:
5525 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5533 * Transform the Projs from a Cmp.
5535 static ir_node *gen_Proj_Cmp(ir_node *node)
5537 /* this probably means not all mode_b nodes were lowered... */
5538 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5543 * Transform the Projs from a Bound.
5545 static ir_node *gen_Proj_Bound(ir_node *node)
5548 ir_node *pred = get_Proj_pred(node);
5550 switch (get_Proj_proj(node)) {
5552 return be_transform_node(get_Bound_mem(pred));
5553 case pn_Bound_X_regular:
5554 new_node = be_transform_node(pred);
5555 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5556 case pn_Bound_X_except:
5557 new_node = be_transform_node(pred);
5558 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5560 return be_transform_node(get_Bound_index(pred));
5562 panic("unsupported Proj from Bound");
5566 static ir_node *gen_Proj_ASM(ir_node *node)
5568 ir_mode *mode = get_irn_mode(node);
5569 ir_node *pred = get_Proj_pred(node);
5570 ir_node *new_pred = be_transform_node(pred);
5571 long pos = get_Proj_proj(node);
5573 if (mode == mode_M) {
5574 pos = arch_irn_get_n_outs(new_pred)-1;
5575 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5577 } else if (mode_is_float(mode)) {
5580 panic("unexpected proj mode at ASM");
5583 return new_r_Proj(new_pred, mode, pos);
5587 * Transform and potentially renumber Proj nodes.
5589 static ir_node *gen_Proj(ir_node *node)
5591 ir_node *pred = get_Proj_pred(node);
5594 switch (get_irn_opcode(pred)) {
5596 proj = get_Proj_proj(node);
5597 if (proj == pn_Store_M) {
5598 return be_transform_node(pred);
5600 panic("No idea how to transform proj->Store");
5603 return gen_Proj_Load(node);
5605 return gen_Proj_ASM(node);
5607 return gen_Proj_Builtin(node);
5611 return gen_Proj_DivMod(node);
5613 return gen_Proj_CopyB(node);
5615 return gen_Proj_Quot(node);
5617 return gen_Proj_be_SubSP(node);
5619 return gen_Proj_be_AddSP(node);
5621 return gen_Proj_be_Call(node);
5623 return gen_Proj_Cmp(node);
5625 return gen_Proj_Bound(node);
5627 proj = get_Proj_proj(node);
5629 case pn_Start_X_initial_exec: {
5630 ir_node *block = get_nodes_block(pred);
5631 ir_node *new_block = be_transform_node(block);
5632 dbg_info *dbgi = get_irn_dbg_info(node);
5633 /* we exchange the ProjX with a jump */
5634 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5639 case pn_Start_P_tls:
5640 return gen_Proj_tls(node);
5645 if (is_ia32_l_FloattoLL(pred)) {
5646 return gen_Proj_l_FloattoLL(node);
5648 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5652 ir_mode *mode = get_irn_mode(node);
5653 if (ia32_mode_needs_gp_reg(mode)) {
5654 ir_node *new_pred = be_transform_node(pred);
5655 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5656 get_Proj_proj(node));
5657 new_proj->node_nr = node->node_nr;
5662 return be_duplicate_node(node);
5666 * Enters all transform functions into the generic pointer
5668 static void register_transformers(void)
5670 /* first clear the generic function pointer for all ops */
5671 clear_irp_opcodes_generic_func();
5673 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5674 #define BAD(a) { op_##a->ops.generic = (op_func)bad_transform; }
5714 /* transform ops from intrinsic lowering */
5726 GEN(ia32_l_LLtoFloat)
5727 GEN(ia32_l_FloattoLL)
5733 /* we should never see these nodes */
5748 /* handle builtins */
5751 /* handle generic backend nodes */
5765 * Pre-transform all unknown and noreg nodes.
5767 static void ia32_pretransform_node(void)
5769 ia32_code_gen_t *cg = env_cg;
5771 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5772 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5773 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5775 nomem = get_irg_no_mem(current_ir_graph);
5776 noreg_GP = ia32_new_NoReg_gp(cg);
5782 * Walker, checks if all ia32 nodes producing more than one result have their
5783 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5785 static void add_missing_keep_walker(ir_node *node, void *data)
5788 unsigned found_projs = 0;
5789 const ir_edge_t *edge;
5790 ir_mode *mode = get_irn_mode(node);
5795 if (!is_ia32_irn(node))
5798 n_outs = arch_irn_get_n_outs(node);
5801 if (is_ia32_SwitchJmp(node))
5804 assert(n_outs < (int) sizeof(unsigned) * 8);
5805 foreach_out_edge(node, edge) {
5806 ir_node *proj = get_edge_src_irn(edge);
5809 /* The node could be kept */
5813 if (get_irn_mode(proj) == mode_M)
5816 pn = get_Proj_proj(proj);
5817 assert(pn < n_outs);
5818 found_projs |= 1 << pn;
5822 /* are keeps missing? */
5824 for (i = 0; i < n_outs; ++i) {
5827 const arch_register_req_t *req;
5828 const arch_register_class_t *cls;
5830 if (found_projs & (1 << i)) {
5834 req = arch_get_out_register_req(node, i);
5839 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5843 block = get_nodes_block(node);
5844 in[0] = new_r_Proj(node, arch_register_class_mode(cls), i);
5845 if (last_keep != NULL) {
5846 be_Keep_add_node(last_keep, cls, in[0]);
5848 last_keep = be_new_Keep(block, 1, in);
5849 if (sched_is_scheduled(node)) {
5850 sched_add_after(node, last_keep);
5857 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5860 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5862 ir_graph *irg = be_get_birg_irg(cg->birg);
5863 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5867 * Post-process all calls if we are in SSE mode.
5868 * The ABI requires that the results are in st0, copy them
5869 * to a xmm register.
5871 static void postprocess_fp_call_results(void)
5875 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5876 ir_node *call = call_list[i];
5877 ir_type *mtp = call_types[i];
5880 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5881 ir_type *res_tp = get_method_res_type(mtp, j);
5882 ir_node *res, *new_res;
5883 const ir_edge_t *edge, *next;
5886 if (! is_atomic_type(res_tp)) {
5887 /* no floating point return */
5890 mode = get_type_mode(res_tp);
5891 if (! mode_is_float(mode)) {
5892 /* no floating point return */
5896 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5899 /* now patch the users */
5900 foreach_out_edge_safe(res, edge, next) {
5901 ir_node *succ = get_edge_src_irn(edge);
5904 if (be_is_Keep(succ))
5907 if (is_ia32_xStore(succ)) {
5908 /* an xStore can be patched into an vfst */
5909 dbg_info *db = get_irn_dbg_info(succ);
5910 ir_node *block = get_nodes_block(succ);
5911 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5912 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5913 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5914 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5915 ir_mode *mode = get_ia32_ls_mode(succ);
5917 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5918 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5919 if (is_ia32_use_frame(succ))
5920 set_ia32_use_frame(st);
5921 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5922 set_irn_pinned(st, get_irn_pinned(succ));
5923 set_ia32_op_type(st, ia32_AddrModeD);
5927 if (new_res == NULL) {
5928 dbg_info *db = get_irn_dbg_info(call);
5929 ir_node *block = get_nodes_block(call);
5930 ir_node *frame = get_irg_frame(current_ir_graph);
5931 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5932 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5933 ir_node *vfst, *xld, *new_mem;
5935 /* store st(0) on stack */
5936 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5937 set_ia32_op_type(vfst, ia32_AddrModeD);
5938 set_ia32_use_frame(vfst);
5940 /* load into SSE register */
5941 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5942 set_ia32_op_type(xld, ia32_AddrModeS);
5943 set_ia32_use_frame(xld);
5945 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5946 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5948 if (old_mem != NULL) {
5949 edges_reroute(old_mem, new_mem, current_ir_graph);
5953 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5960 /* do the transformation */
5961 void ia32_transform_graph(ia32_code_gen_t *cg)
5965 register_transformers();
5967 initial_fpcw = NULL;
5970 be_timer_push(T_HEIGHTS);
5971 heights = heights_new(cg->irg);
5972 be_timer_pop(T_HEIGHTS);
5973 ia32_calculate_non_address_mode_nodes(cg->birg);
5975 /* the transform phase is not safe for CSE (yet) because several nodes get
5976 * attributes set after their creation */
5977 cse_last = get_opt_cse();
5980 call_list = NEW_ARR_F(ir_node *, 0);
5981 call_types = NEW_ARR_F(ir_type *, 0);
5982 be_transform_graph(cg->birg, ia32_pretransform_node);
5984 if (ia32_cg_config.use_sse2)
5985 postprocess_fp_call_results();
5986 DEL_ARR_F(call_types);
5987 DEL_ARR_F(call_list);
5989 set_opt_cse(cse_last);
5991 ia32_free_non_address_mode_nodes();
5992 heights_free(heights);
5996 void ia32_init_transform(void)
5998 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");