2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * return NoREG or pic_base in case of PIC.
200 * This is necessary as base address for newly created symbols
202 static ir_node *get_symconst_base(void)
204 if (env_cg->birg->main_env->options->pic) {
205 return arch_code_generator_get_pic_base(env_cg);
212 * Transforms a Const.
214 static ir_node *gen_Const(ir_node *node)
216 ir_node *old_block = get_nodes_block(node);
217 ir_node *block = be_transform_node(old_block);
218 dbg_info *dbgi = get_irn_dbg_info(node);
219 ir_mode *mode = get_irn_mode(node);
221 assert(is_Const(node));
223 if (mode_is_float(mode)) {
229 if (ia32_cg_config.use_sse2) {
230 tarval *tv = get_Const_tarval(node);
231 if (tarval_is_null(tv)) {
232 load = new_bd_ia32_xZero(dbgi, block);
233 set_ia32_ls_mode(load, mode);
235 #ifdef CONSTRUCT_SSE_CONST
236 } else if (tarval_is_one(tv)) {
237 int cnst = mode == mode_F ? 26 : 55;
238 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
239 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
240 ir_node *pslld, *psrld;
242 load = new_bd_ia32_xAllOnes(dbgi, block);
243 set_ia32_ls_mode(load, mode);
244 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
245 set_ia32_ls_mode(pslld, mode);
246 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
247 set_ia32_ls_mode(psrld, mode);
249 #endif /* CONSTRUCT_SSE_CONST */
250 } else if (mode == mode_F) {
251 /* we can place any 32bit constant by using a movd gp, sse */
252 unsigned val = get_tarval_sub_bits(tv, 0) |
253 (get_tarval_sub_bits(tv, 1) << 8) |
254 (get_tarval_sub_bits(tv, 2) << 16) |
255 (get_tarval_sub_bits(tv, 3) << 24);
256 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
257 load = new_bd_ia32_xMovd(dbgi, block, cnst);
258 set_ia32_ls_mode(load, mode);
261 #ifdef CONSTRUCT_SSE_CONST
262 if (mode == mode_D) {
263 unsigned val = get_tarval_sub_bits(tv, 0) |
264 (get_tarval_sub_bits(tv, 1) << 8) |
265 (get_tarval_sub_bits(tv, 2) << 16) |
266 (get_tarval_sub_bits(tv, 3) << 24);
268 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
269 ir_node *cnst, *psllq;
271 /* fine, lower 32bit are zero, produce 32bit value */
272 val = get_tarval_sub_bits(tv, 4) |
273 (get_tarval_sub_bits(tv, 5) << 8) |
274 (get_tarval_sub_bits(tv, 6) << 16) |
275 (get_tarval_sub_bits(tv, 7) << 24);
276 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
277 load = new_bd_ia32_xMovd(dbgi, block, cnst);
278 set_ia32_ls_mode(load, mode);
279 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
280 set_ia32_ls_mode(psllq, mode);
285 #endif /* CONSTRUCT_SSE_CONST */
286 floatent = create_float_const_entity(node);
288 base = get_symconst_base();
289 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
291 set_ia32_op_type(load, ia32_AddrModeS);
292 set_ia32_am_sc(load, floatent);
293 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
294 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
297 if (is_Const_null(node)) {
298 load = new_bd_ia32_vfldz(dbgi, block);
300 set_ia32_ls_mode(load, mode);
301 } else if (is_Const_one(node)) {
302 load = new_bd_ia32_vfld1(dbgi, block);
304 set_ia32_ls_mode(load, mode);
309 floatent = create_float_const_entity(node);
310 /* create_float_const_ent is smart and sometimes creates
312 ls_mode = get_type_mode(get_entity_type(floatent));
313 base = get_symconst_base();
314 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
316 set_ia32_op_type(load, ia32_AddrModeS);
317 set_ia32_am_sc(load, floatent);
318 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
319 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
322 #ifdef CONSTRUCT_SSE_CONST
324 #endif /* CONSTRUCT_SSE_CONST */
325 SET_IA32_ORIG_NODE(load, node);
327 be_dep_on_frame(load);
329 } else { /* non-float mode */
331 tarval *tv = get_Const_tarval(node);
334 tv = tarval_convert_to(tv, mode_Iu);
336 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
338 panic("couldn't convert constant tarval (%+F)", node);
340 val = get_tarval_long(tv);
342 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
343 SET_IA32_ORIG_NODE(cnst, node);
345 be_dep_on_frame(cnst);
351 * Transforms a SymConst.
353 static ir_node *gen_SymConst(ir_node *node)
355 ir_node *old_block = get_nodes_block(node);
356 ir_node *block = be_transform_node(old_block);
357 dbg_info *dbgi = get_irn_dbg_info(node);
358 ir_mode *mode = get_irn_mode(node);
361 if (mode_is_float(mode)) {
362 if (ia32_cg_config.use_sse2)
363 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
366 set_ia32_am_sc(cnst, get_SymConst_entity(node));
367 set_ia32_use_frame(cnst);
371 if (get_SymConst_kind(node) != symconst_addr_ent) {
372 panic("backend only support symconst_addr_ent (at %+F)", node);
374 entity = get_SymConst_entity(node);
375 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
378 SET_IA32_ORIG_NODE(cnst, node);
380 be_dep_on_frame(cnst);
385 * Create a float type for the given mode and cache it.
387 * @param mode the mode for the float type (might be integer mode for SSE2 types)
388 * @param align alignment
390 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
396 if (mode == mode_Iu) {
397 static ir_type *int_Iu[16] = {NULL, };
399 if (int_Iu[align] == NULL) {
400 int_Iu[align] = tp = new_type_primitive(mode);
401 /* set the specified alignment */
402 set_type_alignment_bytes(tp, align);
404 return int_Iu[align];
405 } else if (mode == mode_Lu) {
406 static ir_type *int_Lu[16] = {NULL, };
408 if (int_Lu[align] == NULL) {
409 int_Lu[align] = tp = new_type_primitive(mode);
410 /* set the specified alignment */
411 set_type_alignment_bytes(tp, align);
413 return int_Lu[align];
414 } else if (mode == mode_F) {
415 static ir_type *float_F[16] = {NULL, };
417 if (float_F[align] == NULL) {
418 float_F[align] = tp = new_type_primitive(mode);
419 /* set the specified alignment */
420 set_type_alignment_bytes(tp, align);
422 return float_F[align];
423 } else if (mode == mode_D) {
424 static ir_type *float_D[16] = {NULL, };
426 if (float_D[align] == NULL) {
427 float_D[align] = tp = new_type_primitive(mode);
428 /* set the specified alignment */
429 set_type_alignment_bytes(tp, align);
431 return float_D[align];
433 static ir_type *float_E[16] = {NULL, };
435 if (float_E[align] == NULL) {
436 float_E[align] = tp = new_type_primitive(mode);
437 /* set the specified alignment */
438 set_type_alignment_bytes(tp, align);
440 return float_E[align];
445 * Create a float[2] array type for the given atomic type.
447 * @param tp the atomic type
449 static ir_type *ia32_create_float_array(ir_type *tp)
451 ir_mode *mode = get_type_mode(tp);
452 unsigned align = get_type_alignment_bytes(tp);
457 if (mode == mode_F) {
458 static ir_type *float_F[16] = {NULL, };
460 if (float_F[align] != NULL)
461 return float_F[align];
462 arr = float_F[align] = new_type_array(1, tp);
463 } else if (mode == mode_D) {
464 static ir_type *float_D[16] = {NULL, };
466 if (float_D[align] != NULL)
467 return float_D[align];
468 arr = float_D[align] = new_type_array(1, tp);
470 static ir_type *float_E[16] = {NULL, };
472 if (float_E[align] != NULL)
473 return float_E[align];
474 arr = float_E[align] = new_type_array(1, tp);
476 set_type_alignment_bytes(arr, align);
477 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
478 set_type_state(arr, layout_fixed);
482 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
483 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
485 static const struct {
486 const char *ent_name;
487 const char *cnst_str;
490 } names [ia32_known_const_max] = {
491 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
492 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
493 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
494 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
495 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
497 static ir_entity *ent_cache[ia32_known_const_max];
499 const char *ent_name, *cnst_str;
505 ent_name = names[kct].ent_name;
506 if (! ent_cache[kct]) {
507 cnst_str = names[kct].cnst_str;
509 switch (names[kct].mode) {
510 case 0: mode = mode_Iu; break;
511 case 1: mode = mode_Lu; break;
512 default: mode = mode_F; break;
514 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
515 tp = ia32_create_float_type(mode, names[kct].align);
517 if (kct == ia32_ULLBIAS)
518 tp = ia32_create_float_array(tp);
519 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
521 set_entity_ld_ident(ent, get_entity_ident(ent));
522 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
523 set_entity_visibility(ent, ir_visibility_local);
525 if (kct == ia32_ULLBIAS) {
526 ir_initializer_t *initializer = create_initializer_compound(2);
528 set_initializer_compound_value(initializer, 0,
529 create_initializer_tarval(get_mode_null(mode)));
530 set_initializer_compound_value(initializer, 1,
531 create_initializer_tarval(tv));
533 set_entity_initializer(ent, initializer);
535 set_entity_initializer(ent, create_initializer_tarval(tv));
538 /* cache the entry */
539 ent_cache[kct] = ent;
542 return ent_cache[kct];
546 * return true if the node is a Proj(Load) and could be used in source address
547 * mode for another node. Will return only true if the @p other node is not
548 * dependent on the memory of the Load (for binary operations use the other
549 * input here, for unary operations use NULL).
551 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
552 ir_node *other, ir_node *other2, match_flags_t flags)
557 /* float constants are always available */
558 if (is_Const(node)) {
559 ir_mode *mode = get_irn_mode(node);
560 if (mode_is_float(mode)) {
561 if (ia32_cg_config.use_sse2) {
562 if (is_simple_sse_Const(node))
565 if (is_simple_x87_Const(node))
568 if (get_irn_n_edges(node) > 1)
576 load = get_Proj_pred(node);
577 pn = get_Proj_proj(node);
578 if (!is_Load(load) || pn != pn_Load_res)
580 if (get_nodes_block(load) != block)
582 /* we only use address mode if we're the only user of the load */
583 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
585 /* in some edge cases with address mode we might reach the load normally
586 * and through some AM sequence, if it is already materialized then we
587 * can't create an AM node from it */
588 if (be_is_transformed(node))
591 /* don't do AM if other node inputs depend on the load (via mem-proj) */
592 if (other != NULL && prevents_AM(block, load, other))
595 if (other2 != NULL && prevents_AM(block, load, other2))
601 typedef struct ia32_address_mode_t ia32_address_mode_t;
602 struct ia32_address_mode_t {
607 ia32_op_type_t op_type;
611 unsigned commutative : 1;
612 unsigned ins_permuted : 1;
615 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
617 /* construct load address */
618 memset(addr, 0, sizeof(addr[0]));
619 ia32_create_address_mode(addr, ptr, 0);
621 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
622 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
623 addr->mem = be_transform_node(mem);
626 static void build_address(ia32_address_mode_t *am, ir_node *node,
627 ia32_create_am_flags_t flags)
629 ia32_address_t *addr = &am->addr;
635 /* floating point immediates */
636 if (is_Const(node)) {
637 ir_entity *entity = create_float_const_entity(node);
638 addr->base = get_symconst_base();
639 addr->index = noreg_GP;
641 addr->symconst_ent = entity;
643 am->ls_mode = get_type_mode(get_entity_type(entity));
644 am->pinned = op_pin_state_floats;
648 load = get_Proj_pred(node);
649 ptr = get_Load_ptr(load);
650 mem = get_Load_mem(load);
651 new_mem = be_transform_node(mem);
652 am->pinned = get_irn_pinned(load);
653 am->ls_mode = get_Load_mode(load);
654 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
657 /* construct load address */
658 ia32_create_address_mode(addr, ptr, flags);
660 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
661 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
665 static void set_address(ir_node *node, const ia32_address_t *addr)
667 set_ia32_am_scale(node, addr->scale);
668 set_ia32_am_sc(node, addr->symconst_ent);
669 set_ia32_am_offs_int(node, addr->offset);
670 if (addr->symconst_sign)
671 set_ia32_am_sc_sign(node);
673 set_ia32_use_frame(node);
674 set_ia32_frame_ent(node, addr->frame_entity);
678 * Apply attributes of a given address mode to a node.
680 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
682 set_address(node, &am->addr);
684 set_ia32_op_type(node, am->op_type);
685 set_ia32_ls_mode(node, am->ls_mode);
686 if (am->pinned == op_pin_state_pinned) {
687 /* beware: some nodes are already pinned and did not allow to change the state */
688 if (get_irn_pinned(node) != op_pin_state_pinned)
689 set_irn_pinned(node, op_pin_state_pinned);
692 set_ia32_commutative(node);
696 * Check, if a given node is a Down-Conv, ie. a integer Conv
697 * from a mode with a mode with more bits to a mode with lesser bits.
698 * Moreover, we return only true if the node has not more than 1 user.
700 * @param node the node
701 * @return non-zero if node is a Down-Conv
703 static int is_downconv(const ir_node *node)
711 /* we only want to skip the conv when we're the only user
712 * (not optimal but for now...)
714 if (get_irn_n_edges(node) > 1)
717 src_mode = get_irn_mode(get_Conv_op(node));
718 dest_mode = get_irn_mode(node);
720 ia32_mode_needs_gp_reg(src_mode) &&
721 ia32_mode_needs_gp_reg(dest_mode) &&
722 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
725 /* Skip all Down-Conv's on a given node and return the resulting node. */
726 ir_node *ia32_skip_downconv(ir_node *node)
728 while (is_downconv(node))
729 node = get_Conv_op(node);
734 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
736 ir_mode *mode = get_irn_mode(node);
741 if (mode_is_signed(mode)) {
746 block = get_nodes_block(node);
747 dbgi = get_irn_dbg_info(node);
749 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
753 * matches operands of a node into ia32 addressing/operand modes. This covers
754 * usage of source address mode, immediates, operations with non 32-bit modes,
756 * The resulting data is filled into the @p am struct. block is the block
757 * of the node whose arguments are matched. op1, op2 are the first and second
758 * input that are matched (op1 may be NULL). other_op is another unrelated
759 * input that is not matched! but which is needed sometimes to check if AM
760 * for op1/op2 is legal.
761 * @p flags describes the supported modes of the operation in detail.
763 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
764 ir_node *op1, ir_node *op2, ir_node *other_op,
767 ia32_address_t *addr = &am->addr;
768 ir_mode *mode = get_irn_mode(op2);
769 int mode_bits = get_mode_size_bits(mode);
770 ir_node *new_op1, *new_op2;
772 unsigned commutative;
773 int use_am_and_immediates;
776 memset(am, 0, sizeof(am[0]));
778 commutative = (flags & match_commutative) != 0;
779 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
780 use_am = (flags & match_am) != 0;
781 use_immediate = (flags & match_immediate) != 0;
782 assert(!use_am_and_immediates || use_immediate);
785 assert(!commutative || op1 != NULL);
786 assert(use_am || !(flags & match_8bit_am));
787 assert(use_am || !(flags & match_16bit_am));
789 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
790 (mode_bits == 16 && !(flags & match_16bit_am))) {
794 /* we can simply skip downconvs for mode neutral nodes: the upper bits
795 * can be random for these operations */
796 if (flags & match_mode_neutral) {
797 op2 = ia32_skip_downconv(op2);
799 op1 = ia32_skip_downconv(op1);
803 /* match immediates. firm nodes are normalized: constants are always on the
806 if (!(flags & match_try_am) && use_immediate) {
807 new_op2 = try_create_Immediate(op2, 0);
810 if (new_op2 == NULL &&
811 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
812 build_address(am, op2, 0);
813 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
814 if (mode_is_float(mode)) {
815 new_op2 = ia32_new_NoReg_vfp(env_cg);
819 am->op_type = ia32_AddrModeS;
820 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
822 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
824 build_address(am, op1, 0);
826 if (mode_is_float(mode)) {
827 noreg = ia32_new_NoReg_vfp(env_cg);
832 if (new_op2 != NULL) {
835 new_op1 = be_transform_node(op2);
837 am->ins_permuted = 1;
839 am->op_type = ia32_AddrModeS;
842 am->op_type = ia32_Normal;
844 if (flags & match_try_am) {
850 mode = get_irn_mode(op2);
851 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
852 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
854 new_op2 = create_upconv(op2, NULL);
855 am->ls_mode = mode_Iu;
857 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
859 new_op2 = be_transform_node(op2);
860 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
863 if (addr->base == NULL)
864 addr->base = noreg_GP;
865 if (addr->index == NULL)
866 addr->index = noreg_GP;
867 if (addr->mem == NULL)
870 am->new_op1 = new_op1;
871 am->new_op2 = new_op2;
872 am->commutative = commutative;
876 * "Fixes" a node that uses address mode by turning it into mode_T
877 * and returning a pn_ia32_res Proj.
879 * @param node the node
880 * @param am its address mode
882 * @return a Proj(pn_ia32_res) if a memory address mode is used,
885 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
890 if (am->mem_proj == NULL)
893 /* we have to create a mode_T so the old MemProj can attach to us */
894 mode = get_irn_mode(node);
895 load = get_Proj_pred(am->mem_proj);
897 be_set_transformed_node(load, node);
899 if (mode != mode_T) {
900 set_irn_mode(node, mode_T);
901 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
908 * Construct a standard binary operation, set AM and immediate if required.
910 * @param node The original node for which the binop is created
911 * @param op1 The first operand
912 * @param op2 The second operand
913 * @param func The node constructor function
914 * @return The constructed ia32 node.
916 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
917 construct_binop_func *func, match_flags_t flags)
920 ir_node *block, *new_block, *new_node;
921 ia32_address_mode_t am;
922 ia32_address_t *addr = &am.addr;
924 block = get_nodes_block(node);
925 match_arguments(&am, block, op1, op2, NULL, flags);
927 dbgi = get_irn_dbg_info(node);
928 new_block = be_transform_node(block);
929 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
930 am.new_op1, am.new_op2);
931 set_am_attributes(new_node, &am);
932 /* we can't use source address mode anymore when using immediates */
933 if (!(flags & match_am_and_immediates) &&
934 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
935 set_ia32_am_support(new_node, ia32_am_none);
936 SET_IA32_ORIG_NODE(new_node, node);
938 new_node = fix_mem_proj(new_node, &am);
944 * Generic names for the inputs of an ia32 binary op.
947 n_ia32_l_binop_left, /**< ia32 left input */
948 n_ia32_l_binop_right, /**< ia32 right input */
949 n_ia32_l_binop_eflags /**< ia32 eflags input */
951 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
952 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
953 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
954 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
955 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
956 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
959 * Construct a binary operation which also consumes the eflags.
961 * @param node The node to transform
962 * @param func The node constructor function
963 * @param flags The match flags
964 * @return The constructor ia32 node
966 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
969 ir_node *src_block = get_nodes_block(node);
970 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
971 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
972 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
974 ir_node *block, *new_node, *new_eflags;
975 ia32_address_mode_t am;
976 ia32_address_t *addr = &am.addr;
978 match_arguments(&am, src_block, op1, op2, eflags, flags);
980 dbgi = get_irn_dbg_info(node);
981 block = be_transform_node(src_block);
982 new_eflags = be_transform_node(eflags);
983 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
984 am.new_op1, am.new_op2, new_eflags);
985 set_am_attributes(new_node, &am);
986 /* we can't use source address mode anymore when using immediates */
987 if (!(flags & match_am_and_immediates) &&
988 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
989 set_ia32_am_support(new_node, ia32_am_none);
990 SET_IA32_ORIG_NODE(new_node, node);
992 new_node = fix_mem_proj(new_node, &am);
997 static ir_node *get_fpcw(void)
1000 if (initial_fpcw != NULL)
1001 return initial_fpcw;
1003 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
1004 &ia32_fp_cw_regs[REG_FPCW]);
1005 initial_fpcw = be_transform_node(fpcw);
1007 return initial_fpcw;
1011 * Construct a standard binary operation, set AM and immediate if required.
1013 * @param op1 The first operand
1014 * @param op2 The second operand
1015 * @param func The node constructor function
1016 * @return The constructed ia32 node.
1018 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1019 construct_binop_float_func *func)
1021 ir_mode *mode = get_irn_mode(node);
1023 ir_node *block, *new_block, *new_node;
1024 ia32_address_mode_t am;
1025 ia32_address_t *addr = &am.addr;
1026 ia32_x87_attr_t *attr;
1027 /* All operations are considered commutative, because there are reverse
1029 match_flags_t flags = match_commutative;
1031 /* happens for div nodes... */
1033 mode = get_divop_resmod(node);
1035 /* cannot use address mode with long double on x87 */
1036 if (get_mode_size_bits(mode) <= 64)
1039 block = get_nodes_block(node);
1040 match_arguments(&am, block, op1, op2, NULL, flags);
1042 dbgi = get_irn_dbg_info(node);
1043 new_block = be_transform_node(block);
1044 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1045 am.new_op1, am.new_op2, get_fpcw());
1046 set_am_attributes(new_node, &am);
1048 attr = get_ia32_x87_attr(new_node);
1049 attr->attr.data.ins_permuted = am.ins_permuted;
1051 SET_IA32_ORIG_NODE(new_node, node);
1053 new_node = fix_mem_proj(new_node, &am);
1059 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1061 * @param op1 The first operand
1062 * @param op2 The second operand
1063 * @param func The node constructor function
1064 * @return The constructed ia32 node.
1066 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1067 construct_shift_func *func,
1068 match_flags_t flags)
1071 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1073 assert(! mode_is_float(get_irn_mode(node)));
1074 assert(flags & match_immediate);
1075 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1077 if (flags & match_mode_neutral) {
1078 op1 = ia32_skip_downconv(op1);
1079 new_op1 = be_transform_node(op1);
1080 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1081 new_op1 = create_upconv(op1, node);
1083 new_op1 = be_transform_node(op1);
1086 /* the shift amount can be any mode that is bigger than 5 bits, since all
1087 * other bits are ignored anyway */
1088 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1089 ir_node *const op = get_Conv_op(op2);
1090 if (mode_is_float(get_irn_mode(op)))
1093 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1095 new_op2 = create_immediate_or_transform(op2, 0);
1097 dbgi = get_irn_dbg_info(node);
1098 block = get_nodes_block(node);
1099 new_block = be_transform_node(block);
1100 new_node = func(dbgi, new_block, new_op1, new_op2);
1101 SET_IA32_ORIG_NODE(new_node, node);
1103 /* lowered shift instruction may have a dependency operand, handle it here */
1104 if (get_irn_arity(node) == 3) {
1105 /* we have a dependency */
1106 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1107 add_irn_dep(new_node, new_dep);
1115 * Construct a standard unary operation, set AM and immediate if required.
1117 * @param op The operand
1118 * @param func The node constructor function
1119 * @return The constructed ia32 node.
1121 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1122 match_flags_t flags)
1125 ir_node *block, *new_block, *new_op, *new_node;
1127 assert(flags == 0 || flags == match_mode_neutral);
1128 if (flags & match_mode_neutral) {
1129 op = ia32_skip_downconv(op);
1132 new_op = be_transform_node(op);
1133 dbgi = get_irn_dbg_info(node);
1134 block = get_nodes_block(node);
1135 new_block = be_transform_node(block);
1136 new_node = func(dbgi, new_block, new_op);
1138 SET_IA32_ORIG_NODE(new_node, node);
1143 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1144 ia32_address_t *addr)
1146 ir_node *base, *index, *res;
1152 base = be_transform_node(base);
1155 index = addr->index;
1156 if (index == NULL) {
1159 index = be_transform_node(index);
1162 res = new_bd_ia32_Lea(dbgi, block, base, index);
1163 set_address(res, addr);
1169 * Returns non-zero if a given address mode has a symbolic or
1170 * numerical offset != 0.
1172 static int am_has_immediates(const ia32_address_t *addr)
1174 return addr->offset != 0 || addr->symconst_ent != NULL
1175 || addr->frame_entity || addr->use_frame;
1179 * Creates an ia32 Add.
1181 * @return the created ia32 Add node
1183 static ir_node *gen_Add(ir_node *node)
1185 ir_mode *mode = get_irn_mode(node);
1186 ir_node *op1 = get_Add_left(node);
1187 ir_node *op2 = get_Add_right(node);
1189 ir_node *block, *new_block, *new_node, *add_immediate_op;
1190 ia32_address_t addr;
1191 ia32_address_mode_t am;
1193 if (mode_is_float(mode)) {
1194 if (ia32_cg_config.use_sse2)
1195 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1196 match_commutative | match_am);
1198 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1201 ia32_mark_non_am(node);
1203 op2 = ia32_skip_downconv(op2);
1204 op1 = ia32_skip_downconv(op1);
1208 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1209 * 1. Add with immediate -> Lea
1210 * 2. Add with possible source address mode -> Add
1211 * 3. Otherwise -> Lea
1213 memset(&addr, 0, sizeof(addr));
1214 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1215 add_immediate_op = NULL;
1217 dbgi = get_irn_dbg_info(node);
1218 block = get_nodes_block(node);
1219 new_block = be_transform_node(block);
1222 if (addr.base == NULL && addr.index == NULL) {
1223 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1224 addr.symconst_sign, 0, addr.offset);
1225 be_dep_on_frame(new_node);
1226 SET_IA32_ORIG_NODE(new_node, node);
1229 /* add with immediate? */
1230 if (addr.index == NULL) {
1231 add_immediate_op = addr.base;
1232 } else if (addr.base == NULL && addr.scale == 0) {
1233 add_immediate_op = addr.index;
1236 if (add_immediate_op != NULL) {
1237 if (!am_has_immediates(&addr)) {
1238 #ifdef DEBUG_libfirm
1239 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1242 return be_transform_node(add_immediate_op);
1245 new_node = create_lea_from_address(dbgi, new_block, &addr);
1246 SET_IA32_ORIG_NODE(new_node, node);
1250 /* test if we can use source address mode */
1251 match_arguments(&am, block, op1, op2, NULL, match_commutative
1252 | match_mode_neutral | match_am | match_immediate | match_try_am);
1254 /* construct an Add with source address mode */
1255 if (am.op_type == ia32_AddrModeS) {
1256 ia32_address_t *am_addr = &am.addr;
1257 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1258 am_addr->index, am_addr->mem, am.new_op1,
1260 set_am_attributes(new_node, &am);
1261 SET_IA32_ORIG_NODE(new_node, node);
1263 new_node = fix_mem_proj(new_node, &am);
1268 /* otherwise construct a lea */
1269 new_node = create_lea_from_address(dbgi, new_block, &addr);
1270 SET_IA32_ORIG_NODE(new_node, node);
1275 * Creates an ia32 Mul.
1277 * @return the created ia32 Mul node
1279 static ir_node *gen_Mul(ir_node *node)
1281 ir_node *op1 = get_Mul_left(node);
1282 ir_node *op2 = get_Mul_right(node);
1283 ir_mode *mode = get_irn_mode(node);
1285 if (mode_is_float(mode)) {
1286 if (ia32_cg_config.use_sse2)
1287 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1288 match_commutative | match_am);
1290 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1292 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1293 match_commutative | match_am | match_mode_neutral |
1294 match_immediate | match_am_and_immediates);
1298 * Creates an ia32 Mulh.
1299 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1300 * this result while Mul returns the lower 32 bit.
1302 * @return the created ia32 Mulh node
1304 static ir_node *gen_Mulh(ir_node *node)
1306 dbg_info *dbgi = get_irn_dbg_info(node);
1307 ir_node *op1 = get_Mulh_left(node);
1308 ir_node *op2 = get_Mulh_right(node);
1309 ir_mode *mode = get_irn_mode(node);
1311 ir_node *proj_res_high;
1313 if (get_mode_size_bits(mode) != 32) {
1314 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1317 if (mode_is_signed(mode)) {
1318 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1319 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1321 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1322 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1324 return proj_res_high;
1328 * Creates an ia32 And.
1330 * @return The created ia32 And node
1332 static ir_node *gen_And(ir_node *node)
1334 ir_node *op1 = get_And_left(node);
1335 ir_node *op2 = get_And_right(node);
1336 assert(! mode_is_float(get_irn_mode(node)));
1338 /* is it a zero extension? */
1339 if (is_Const(op2)) {
1340 tarval *tv = get_Const_tarval(op2);
1341 long v = get_tarval_long(tv);
1343 if (v == 0xFF || v == 0xFFFF) {
1344 dbg_info *dbgi = get_irn_dbg_info(node);
1345 ir_node *block = get_nodes_block(node);
1352 assert(v == 0xFFFF);
1355 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1360 return gen_binop(node, op1, op2, new_bd_ia32_And,
1361 match_commutative | match_mode_neutral | match_am | match_immediate);
1367 * Creates an ia32 Or.
1369 * @return The created ia32 Or node
1371 static ir_node *gen_Or(ir_node *node)
1373 ir_node *op1 = get_Or_left(node);
1374 ir_node *op2 = get_Or_right(node);
1376 assert (! mode_is_float(get_irn_mode(node)));
1377 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1378 | match_mode_neutral | match_am | match_immediate);
1384 * Creates an ia32 Eor.
1386 * @return The created ia32 Eor node
1388 static ir_node *gen_Eor(ir_node *node)
1390 ir_node *op1 = get_Eor_left(node);
1391 ir_node *op2 = get_Eor_right(node);
1393 assert(! mode_is_float(get_irn_mode(node)));
1394 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1395 | match_mode_neutral | match_am | match_immediate);
1400 * Creates an ia32 Sub.
1402 * @return The created ia32 Sub node
1404 static ir_node *gen_Sub(ir_node *node)
1406 ir_node *op1 = get_Sub_left(node);
1407 ir_node *op2 = get_Sub_right(node);
1408 ir_mode *mode = get_irn_mode(node);
1410 if (mode_is_float(mode)) {
1411 if (ia32_cg_config.use_sse2)
1412 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1414 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1417 if (is_Const(op2)) {
1418 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1422 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1423 | match_am | match_immediate);
1426 static ir_node *transform_AM_mem(ir_node *const block,
1427 ir_node *const src_val,
1428 ir_node *const src_mem,
1429 ir_node *const am_mem)
1431 if (is_NoMem(am_mem)) {
1432 return be_transform_node(src_mem);
1433 } else if (is_Proj(src_val) &&
1435 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1436 /* avoid memory loop */
1438 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1439 ir_node *const ptr_pred = get_Proj_pred(src_val);
1440 int const arity = get_Sync_n_preds(src_mem);
1445 NEW_ARR_A(ir_node*, ins, arity + 1);
1447 /* NOTE: This sometimes produces dead-code because the old sync in
1448 * src_mem might not be used anymore, we should detect this case
1449 * and kill the sync... */
1450 for (i = arity - 1; i >= 0; --i) {
1451 ir_node *const pred = get_Sync_pred(src_mem, i);
1453 /* avoid memory loop */
1454 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1457 ins[n++] = be_transform_node(pred);
1462 return new_r_Sync(block, n, ins);
1466 ins[0] = be_transform_node(src_mem);
1468 return new_r_Sync(block, 2, ins);
1473 * Create a 32bit to 64bit signed extension.
1475 * @param dbgi debug info
1476 * @param block the block where node nodes should be placed
1477 * @param val the value to extend
1478 * @param orig the original node
1480 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1481 ir_node *val, const ir_node *orig)
1486 if (ia32_cg_config.use_short_sex_eax) {
1487 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1488 be_dep_on_frame(pval);
1489 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1491 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1492 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1494 SET_IA32_ORIG_NODE(res, orig);
1499 * Generates an ia32 DivMod with additional infrastructure for the
1500 * register allocator if needed.
1502 static ir_node *create_Div(ir_node *node)
1504 dbg_info *dbgi = get_irn_dbg_info(node);
1505 ir_node *block = get_nodes_block(node);
1506 ir_node *new_block = be_transform_node(block);
1513 ir_node *sign_extension;
1514 ia32_address_mode_t am;
1515 ia32_address_t *addr = &am.addr;
1517 /* the upper bits have random contents for smaller modes */
1518 switch (get_irn_opcode(node)) {
1520 op1 = get_Div_left(node);
1521 op2 = get_Div_right(node);
1522 mem = get_Div_mem(node);
1523 mode = get_Div_resmode(node);
1526 op1 = get_Mod_left(node);
1527 op2 = get_Mod_right(node);
1528 mem = get_Mod_mem(node);
1529 mode = get_Mod_resmode(node);
1532 op1 = get_DivMod_left(node);
1533 op2 = get_DivMod_right(node);
1534 mem = get_DivMod_mem(node);
1535 mode = get_DivMod_resmode(node);
1538 panic("invalid divmod node %+F", node);
1541 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1543 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1544 is the memory of the consumed address. We can have only the second op as address
1545 in Div nodes, so check only op2. */
1546 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1548 if (mode_is_signed(mode)) {
1549 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1550 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1551 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1553 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1554 be_dep_on_frame(sign_extension);
1556 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1557 addr->index, new_mem, am.new_op2,
1558 am.new_op1, sign_extension);
1561 set_irn_pinned(new_node, get_irn_pinned(node));
1563 set_am_attributes(new_node, &am);
1564 SET_IA32_ORIG_NODE(new_node, node);
1566 new_node = fix_mem_proj(new_node, &am);
1572 * Generates an ia32 Mod.
1574 static ir_node *gen_Mod(ir_node *node)
1576 return create_Div(node);
1580 * Generates an ia32 Div.
1582 static ir_node *gen_Div(ir_node *node)
1584 return create_Div(node);
1588 * Generates an ia32 DivMod.
1590 static ir_node *gen_DivMod(ir_node *node)
1592 return create_Div(node);
1598 * Creates an ia32 floating Div.
1600 * @return The created ia32 xDiv node
1602 static ir_node *gen_Quot(ir_node *node)
1604 ir_node *op1 = get_Quot_left(node);
1605 ir_node *op2 = get_Quot_right(node);
1607 if (ia32_cg_config.use_sse2) {
1608 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1610 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1616 * Creates an ia32 Shl.
1618 * @return The created ia32 Shl node
1620 static ir_node *gen_Shl(ir_node *node)
1622 ir_node *left = get_Shl_left(node);
1623 ir_node *right = get_Shl_right(node);
1625 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1626 match_mode_neutral | match_immediate);
1630 * Creates an ia32 Shr.
1632 * @return The created ia32 Shr node
1634 static ir_node *gen_Shr(ir_node *node)
1636 ir_node *left = get_Shr_left(node);
1637 ir_node *right = get_Shr_right(node);
1639 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1645 * Creates an ia32 Sar.
1647 * @return The created ia32 Shrs node
1649 static ir_node *gen_Shrs(ir_node *node)
1651 ir_node *left = get_Shrs_left(node);
1652 ir_node *right = get_Shrs_right(node);
1654 if (is_Const(right)) {
1655 tarval *tv = get_Const_tarval(right);
1656 long val = get_tarval_long(tv);
1658 /* this is a sign extension */
1659 dbg_info *dbgi = get_irn_dbg_info(node);
1660 ir_node *block = be_transform_node(get_nodes_block(node));
1661 ir_node *new_op = be_transform_node(left);
1663 return create_sex_32_64(dbgi, block, new_op, node);
1667 /* 8 or 16 bit sign extension? */
1668 if (is_Const(right) && is_Shl(left)) {
1669 ir_node *shl_left = get_Shl_left(left);
1670 ir_node *shl_right = get_Shl_right(left);
1671 if (is_Const(shl_right)) {
1672 tarval *tv1 = get_Const_tarval(right);
1673 tarval *tv2 = get_Const_tarval(shl_right);
1674 if (tv1 == tv2 && tarval_is_long(tv1)) {
1675 long val = get_tarval_long(tv1);
1676 if (val == 16 || val == 24) {
1677 dbg_info *dbgi = get_irn_dbg_info(node);
1678 ir_node *block = get_nodes_block(node);
1688 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1697 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1703 * Creates an ia32 Rol.
1705 * @param op1 The first operator
1706 * @param op2 The second operator
1707 * @return The created ia32 RotL node
1709 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1711 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1717 * Creates an ia32 Ror.
1718 * NOTE: There is no RotR with immediate because this would always be a RotL
1719 * "imm-mode_size_bits" which can be pre-calculated.
1721 * @param op1 The first operator
1722 * @param op2 The second operator
1723 * @return The created ia32 RotR node
1725 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1727 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1733 * Creates an ia32 RotR or RotL (depending on the found pattern).
1735 * @return The created ia32 RotL or RotR node
1737 static ir_node *gen_Rotl(ir_node *node)
1739 ir_node *rotate = NULL;
1740 ir_node *op1 = get_Rotl_left(node);
1741 ir_node *op2 = get_Rotl_right(node);
1743 /* Firm has only RotL, so we are looking for a right (op2)
1744 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1745 that means we can create a RotR instead of an Add and a RotL */
1749 ir_node *left = get_Add_left(add);
1750 ir_node *right = get_Add_right(add);
1751 if (is_Const(right)) {
1752 tarval *tv = get_Const_tarval(right);
1753 ir_mode *mode = get_irn_mode(node);
1754 long bits = get_mode_size_bits(mode);
1756 if (is_Minus(left) &&
1757 tarval_is_long(tv) &&
1758 get_tarval_long(tv) == bits &&
1761 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1762 rotate = gen_Ror(node, op1, get_Minus_op(left));
1767 if (rotate == NULL) {
1768 rotate = gen_Rol(node, op1, op2);
1777 * Transforms a Minus node.
1779 * @return The created ia32 Minus node
1781 static ir_node *gen_Minus(ir_node *node)
1783 ir_node *op = get_Minus_op(node);
1784 ir_node *block = be_transform_node(get_nodes_block(node));
1785 dbg_info *dbgi = get_irn_dbg_info(node);
1786 ir_mode *mode = get_irn_mode(node);
1791 if (mode_is_float(mode)) {
1792 ir_node *new_op = be_transform_node(op);
1793 if (ia32_cg_config.use_sse2) {
1794 /* TODO: non-optimal... if we have many xXors, then we should
1795 * rather create a load for the const and use that instead of
1796 * several AM nodes... */
1797 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1799 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1800 noreg_GP, nomem, new_op, noreg_xmm);
1802 size = get_mode_size_bits(mode);
1803 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1805 set_ia32_am_sc(new_node, ent);
1806 set_ia32_op_type(new_node, ia32_AddrModeS);
1807 set_ia32_ls_mode(new_node, mode);
1809 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1812 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1815 SET_IA32_ORIG_NODE(new_node, node);
1821 * Transforms a Not node.
1823 * @return The created ia32 Not node
1825 static ir_node *gen_Not(ir_node *node)
1827 ir_node *op = get_Not_op(node);
1829 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1830 assert (! mode_is_float(get_irn_mode(node)));
1832 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1838 * Transforms an Abs node.
1840 * @return The created ia32 Abs node
1842 static ir_node *gen_Abs(ir_node *node)
1844 ir_node *block = get_nodes_block(node);
1845 ir_node *new_block = be_transform_node(block);
1846 ir_node *op = get_Abs_op(node);
1847 dbg_info *dbgi = get_irn_dbg_info(node);
1848 ir_mode *mode = get_irn_mode(node);
1854 if (mode_is_float(mode)) {
1855 new_op = be_transform_node(op);
1857 if (ia32_cg_config.use_sse2) {
1858 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1859 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1860 noreg_GP, nomem, new_op, noreg_fp);
1862 size = get_mode_size_bits(mode);
1863 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1865 set_ia32_am_sc(new_node, ent);
1867 SET_IA32_ORIG_NODE(new_node, node);
1869 set_ia32_op_type(new_node, ia32_AddrModeS);
1870 set_ia32_ls_mode(new_node, mode);
1872 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1873 SET_IA32_ORIG_NODE(new_node, node);
1876 ir_node *xor, *sign_extension;
1878 if (get_mode_size_bits(mode) == 32) {
1879 new_op = be_transform_node(op);
1881 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1884 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1886 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1887 nomem, new_op, sign_extension);
1888 SET_IA32_ORIG_NODE(xor, node);
1890 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1891 nomem, xor, sign_extension);
1892 SET_IA32_ORIG_NODE(new_node, node);
1899 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1901 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1903 dbg_info *dbgi = get_irn_dbg_info(cmp);
1904 ir_node *block = get_nodes_block(cmp);
1905 ir_node *new_block = be_transform_node(block);
1906 ir_node *op1 = be_transform_node(x);
1907 ir_node *op2 = be_transform_node(n);
1909 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1913 * Transform a node returning a "flag" result.
1915 * @param node the node to transform
1916 * @param pnc_out the compare mode to use
1918 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1925 /* we have a Cmp as input */
1926 if (is_Proj(node)) {
1927 ir_node *pred = get_Proj_pred(node);
1929 pn_Cmp pnc = get_Proj_proj(node);
1930 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1931 ir_node *l = get_Cmp_left(pred);
1932 ir_node *r = get_Cmp_right(pred);
1934 ir_node *la = get_And_left(l);
1935 ir_node *ra = get_And_right(l);
1937 ir_node *c = get_Shl_left(la);
1938 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1939 /* (1 << n) & ra) */
1940 ir_node *n = get_Shl_right(la);
1941 flags = gen_bt(pred, ra, n);
1942 /* we must generate a Jc/Jnc jump */
1943 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1946 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1951 ir_node *c = get_Shl_left(ra);
1952 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1953 /* la & (1 << n)) */
1954 ir_node *n = get_Shl_right(ra);
1955 flags = gen_bt(pred, la, n);
1956 /* we must generate a Jc/Jnc jump */
1957 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1960 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1966 /* add ia32 compare flags */
1968 ir_node *l = get_Cmp_left(pred);
1969 ir_mode *mode = get_irn_mode(l);
1970 if (mode_is_float(mode))
1971 pnc |= ia32_pn_Cmp_float;
1972 else if (! mode_is_signed(mode))
1973 pnc |= ia32_pn_Cmp_unsigned;
1976 flags = be_transform_node(pred);
1981 /* a mode_b value, we have to compare it against 0 */
1982 dbgi = get_irn_dbg_info(node);
1983 new_block = be_transform_node(get_nodes_block(node));
1984 new_op = be_transform_node(node);
1985 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1986 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1987 *pnc_out = pn_Cmp_Lg;
1992 * Transforms a Load.
1994 * @return the created ia32 Load node
1996 static ir_node *gen_Load(ir_node *node)
1998 ir_node *old_block = get_nodes_block(node);
1999 ir_node *block = be_transform_node(old_block);
2000 ir_node *ptr = get_Load_ptr(node);
2001 ir_node *mem = get_Load_mem(node);
2002 ir_node *new_mem = be_transform_node(mem);
2005 dbg_info *dbgi = get_irn_dbg_info(node);
2006 ir_mode *mode = get_Load_mode(node);
2009 ia32_address_t addr;
2011 /* construct load address */
2012 memset(&addr, 0, sizeof(addr));
2013 ia32_create_address_mode(&addr, ptr, 0);
2020 base = be_transform_node(base);
2023 if (index == NULL) {
2026 index = be_transform_node(index);
2029 if (mode_is_float(mode)) {
2030 if (ia32_cg_config.use_sse2) {
2031 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2033 res_mode = mode_xmm;
2035 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2037 res_mode = mode_vfp;
2040 assert(mode != mode_b);
2042 /* create a conv node with address mode for smaller modes */
2043 if (get_mode_size_bits(mode) < 32) {
2044 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2045 new_mem, noreg_GP, mode);
2047 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2052 set_irn_pinned(new_node, get_irn_pinned(node));
2053 set_ia32_op_type(new_node, ia32_AddrModeS);
2054 set_ia32_ls_mode(new_node, mode);
2055 set_address(new_node, &addr);
2057 if (get_irn_pinned(node) == op_pin_state_floats) {
2058 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2059 && pn_ia32_vfld_res == pn_ia32_Load_res
2060 && pn_ia32_Load_res == pn_ia32_res);
2061 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2064 SET_IA32_ORIG_NODE(new_node, node);
2066 be_dep_on_frame(new_node);
2070 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2071 ir_node *ptr, ir_node *other)
2078 /* we only use address mode if we're the only user of the load */
2079 if (get_irn_n_edges(node) > 1)
2082 load = get_Proj_pred(node);
2085 if (get_nodes_block(load) != block)
2088 /* store should have the same pointer as the load */
2089 if (get_Load_ptr(load) != ptr)
2092 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2093 if (other != NULL &&
2094 get_nodes_block(other) == block &&
2095 heights_reachable_in_block(heights, other, load)) {
2099 if (prevents_AM(block, load, mem))
2101 /* Store should be attached to the load via mem */
2102 assert(heights_reachable_in_block(heights, mem, load));
2107 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2108 ir_node *mem, ir_node *ptr, ir_mode *mode,
2109 construct_binop_dest_func *func,
2110 construct_binop_dest_func *func8bit,
2111 match_flags_t flags)
2113 ir_node *src_block = get_nodes_block(node);
2121 ia32_address_mode_t am;
2122 ia32_address_t *addr = &am.addr;
2123 memset(&am, 0, sizeof(am));
2125 assert(flags & match_immediate); /* there is no destam node without... */
2126 commutative = (flags & match_commutative) != 0;
2128 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2129 build_address(&am, op1, ia32_create_am_double_use);
2130 new_op = create_immediate_or_transform(op2, 0);
2131 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2132 build_address(&am, op2, ia32_create_am_double_use);
2133 new_op = create_immediate_or_transform(op1, 0);
2138 if (addr->base == NULL)
2139 addr->base = noreg_GP;
2140 if (addr->index == NULL)
2141 addr->index = noreg_GP;
2142 if (addr->mem == NULL)
2145 dbgi = get_irn_dbg_info(node);
2146 block = be_transform_node(src_block);
2147 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2149 if (get_mode_size_bits(mode) == 8) {
2150 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2152 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2154 set_address(new_node, addr);
2155 set_ia32_op_type(new_node, ia32_AddrModeD);
2156 set_ia32_ls_mode(new_node, mode);
2157 SET_IA32_ORIG_NODE(new_node, node);
2159 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2160 mem_proj = be_transform_node(am.mem_proj);
2161 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2166 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2167 ir_node *ptr, ir_mode *mode,
2168 construct_unop_dest_func *func)
2170 ir_node *src_block = get_nodes_block(node);
2176 ia32_address_mode_t am;
2177 ia32_address_t *addr = &am.addr;
2179 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2182 memset(&am, 0, sizeof(am));
2183 build_address(&am, op, ia32_create_am_double_use);
2185 dbgi = get_irn_dbg_info(node);
2186 block = be_transform_node(src_block);
2187 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2188 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2189 set_address(new_node, addr);
2190 set_ia32_op_type(new_node, ia32_AddrModeD);
2191 set_ia32_ls_mode(new_node, mode);
2192 SET_IA32_ORIG_NODE(new_node, node);
2194 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2195 mem_proj = be_transform_node(am.mem_proj);
2196 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2201 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2203 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2204 return get_negated_pnc(pnc, mode);
2207 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2209 ir_mode *mode = get_irn_mode(node);
2210 ir_node *mux_true = get_Mux_true(node);
2211 ir_node *mux_false = get_Mux_false(node);
2221 ia32_address_t addr;
2223 if (get_mode_size_bits(mode) != 8)
2226 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2228 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2234 cond = get_Mux_sel(node);
2235 flags = get_flags_node(cond, &pnc);
2236 /* we can't handle the float special cases with SetM */
2237 if (pnc & ia32_pn_Cmp_float)
2240 pnc = ia32_get_negated_pnc(pnc);
2242 build_address_ptr(&addr, ptr, mem);
2244 dbgi = get_irn_dbg_info(node);
2245 block = get_nodes_block(node);
2246 new_block = be_transform_node(block);
2247 new_mem = be_transform_node(mem);
2248 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2249 addr.index, addr.mem, flags, pnc);
2250 set_address(new_node, &addr);
2251 set_ia32_op_type(new_node, ia32_AddrModeD);
2252 set_ia32_ls_mode(new_node, mode);
2253 SET_IA32_ORIG_NODE(new_node, node);
2258 static ir_node *try_create_dest_am(ir_node *node)
2260 ir_node *val = get_Store_value(node);
2261 ir_node *mem = get_Store_mem(node);
2262 ir_node *ptr = get_Store_ptr(node);
2263 ir_mode *mode = get_irn_mode(val);
2264 unsigned bits = get_mode_size_bits(mode);
2269 /* handle only GP modes for now... */
2270 if (!ia32_mode_needs_gp_reg(mode))
2274 /* store must be the only user of the val node */
2275 if (get_irn_n_edges(val) > 1)
2277 /* skip pointless convs */
2279 ir_node *conv_op = get_Conv_op(val);
2280 ir_mode *pred_mode = get_irn_mode(conv_op);
2281 if (!ia32_mode_needs_gp_reg(pred_mode))
2283 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2291 /* value must be in the same block */
2292 if (get_nodes_block(node) != get_nodes_block(val))
2295 switch (get_irn_opcode(val)) {
2297 op1 = get_Add_left(val);
2298 op2 = get_Add_right(val);
2299 if (ia32_cg_config.use_incdec) {
2300 if (is_Const_1(op2)) {
2301 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2303 } else if (is_Const_Minus_1(op2)) {
2304 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2308 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2309 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2310 match_commutative | match_immediate);
2313 op1 = get_Sub_left(val);
2314 op2 = get_Sub_right(val);
2315 if (is_Const(op2)) {
2316 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2318 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2319 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2323 op1 = get_And_left(val);
2324 op2 = get_And_right(val);
2325 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2326 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2327 match_commutative | match_immediate);
2330 op1 = get_Or_left(val);
2331 op2 = get_Or_right(val);
2332 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2333 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2334 match_commutative | match_immediate);
2337 op1 = get_Eor_left(val);
2338 op2 = get_Eor_right(val);
2339 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2340 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2341 match_commutative | match_immediate);
2344 op1 = get_Shl_left(val);
2345 op2 = get_Shl_right(val);
2346 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2347 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2351 op1 = get_Shr_left(val);
2352 op2 = get_Shr_right(val);
2353 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2354 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2358 op1 = get_Shrs_left(val);
2359 op2 = get_Shrs_right(val);
2360 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2361 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2365 op1 = get_Rotl_left(val);
2366 op2 = get_Rotl_right(val);
2367 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2368 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2371 /* TODO: match ROR patterns... */
2373 new_node = try_create_SetMem(val, ptr, mem);
2377 op1 = get_Minus_op(val);
2378 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2381 /* should be lowered already */
2382 assert(mode != mode_b);
2383 op1 = get_Not_op(val);
2384 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2390 if (new_node != NULL) {
2391 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2392 get_irn_pinned(node) == op_pin_state_pinned) {
2393 set_irn_pinned(new_node, op_pin_state_pinned);
2400 static bool possible_int_mode_for_fp(ir_mode *mode)
2404 if (!mode_is_signed(mode))
2406 size = get_mode_size_bits(mode);
2407 if (size != 16 && size != 32)
2412 static int is_float_to_int_conv(const ir_node *node)
2414 ir_mode *mode = get_irn_mode(node);
2418 if (!possible_int_mode_for_fp(mode))
2423 conv_op = get_Conv_op(node);
2424 conv_mode = get_irn_mode(conv_op);
2426 if (!mode_is_float(conv_mode))
2433 * Transform a Store(floatConst) into a sequence of
2436 * @return the created ia32 Store node
2438 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2440 ir_mode *mode = get_irn_mode(cns);
2441 unsigned size = get_mode_size_bytes(mode);
2442 tarval *tv = get_Const_tarval(cns);
2443 ir_node *block = get_nodes_block(node);
2444 ir_node *new_block = be_transform_node(block);
2445 ir_node *ptr = get_Store_ptr(node);
2446 ir_node *mem = get_Store_mem(node);
2447 dbg_info *dbgi = get_irn_dbg_info(node);
2451 ia32_address_t addr;
2453 assert(size % 4 == 0);
2456 build_address_ptr(&addr, ptr, mem);
2460 get_tarval_sub_bits(tv, ofs) |
2461 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2462 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2463 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2464 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2466 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2467 addr.index, addr.mem, imm);
2469 set_irn_pinned(new_node, get_irn_pinned(node));
2470 set_ia32_op_type(new_node, ia32_AddrModeD);
2471 set_ia32_ls_mode(new_node, mode_Iu);
2472 set_address(new_node, &addr);
2473 SET_IA32_ORIG_NODE(new_node, node);
2476 ins[i++] = new_node;
2481 } while (size != 0);
2484 return new_rd_Sync(dbgi, new_block, i, ins);
2491 * Generate a vfist or vfisttp instruction.
2493 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2494 ir_node *mem, ir_node *val, ir_node **fist)
2498 if (ia32_cg_config.use_fisttp) {
2499 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2500 if other users exists */
2501 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2502 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2503 be_new_Keep(block, 1, &value);
2505 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2508 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2511 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2517 * Transforms a general (no special case) Store.
2519 * @return the created ia32 Store node
2521 static ir_node *gen_general_Store(ir_node *node)
2523 ir_node *val = get_Store_value(node);
2524 ir_mode *mode = get_irn_mode(val);
2525 ir_node *block = get_nodes_block(node);
2526 ir_node *new_block = be_transform_node(block);
2527 ir_node *ptr = get_Store_ptr(node);
2528 ir_node *mem = get_Store_mem(node);
2529 dbg_info *dbgi = get_irn_dbg_info(node);
2530 ir_node *new_val, *new_node, *store;
2531 ia32_address_t addr;
2533 /* check for destination address mode */
2534 new_node = try_create_dest_am(node);
2535 if (new_node != NULL)
2538 /* construct store address */
2539 memset(&addr, 0, sizeof(addr));
2540 ia32_create_address_mode(&addr, ptr, 0);
2542 if (addr.base == NULL) {
2543 addr.base = noreg_GP;
2545 addr.base = be_transform_node(addr.base);
2548 if (addr.index == NULL) {
2549 addr.index = noreg_GP;
2551 addr.index = be_transform_node(addr.index);
2553 addr.mem = be_transform_node(mem);
2555 if (mode_is_float(mode)) {
2556 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2558 while (is_Conv(val) && mode == get_irn_mode(val)) {
2559 ir_node *op = get_Conv_op(val);
2560 if (!mode_is_float(get_irn_mode(op)))
2564 new_val = be_transform_node(val);
2565 if (ia32_cg_config.use_sse2) {
2566 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2567 addr.index, addr.mem, new_val);
2569 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2570 addr.index, addr.mem, new_val, mode);
2573 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2574 val = get_Conv_op(val);
2576 /* TODO: is this optimisation still necessary at all (middleend)? */
2577 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2578 while (is_Conv(val)) {
2579 ir_node *op = get_Conv_op(val);
2580 if (!mode_is_float(get_irn_mode(op)))
2582 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2586 new_val = be_transform_node(val);
2587 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2589 new_val = create_immediate_or_transform(val, 0);
2590 assert(mode != mode_b);
2592 if (get_mode_size_bits(mode) == 8) {
2593 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2594 addr.index, addr.mem, new_val);
2596 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2597 addr.index, addr.mem, new_val);
2602 set_irn_pinned(store, get_irn_pinned(node));
2603 set_ia32_op_type(store, ia32_AddrModeD);
2604 set_ia32_ls_mode(store, mode);
2606 set_address(store, &addr);
2607 SET_IA32_ORIG_NODE(store, node);
2613 * Transforms a Store.
2615 * @return the created ia32 Store node
2617 static ir_node *gen_Store(ir_node *node)
2619 ir_node *val = get_Store_value(node);
2620 ir_mode *mode = get_irn_mode(val);
2622 if (mode_is_float(mode) && is_Const(val)) {
2623 /* We can transform every floating const store
2624 into a sequence of integer stores.
2625 If the constant is already in a register,
2626 it would be better to use it, but we don't
2627 have this information here. */
2628 return gen_float_const_Store(node, val);
2630 return gen_general_Store(node);
2634 * Transforms a Switch.
2636 * @return the created ia32 SwitchJmp node
2638 static ir_node *create_Switch(ir_node *node)
2640 dbg_info *dbgi = get_irn_dbg_info(node);
2641 ir_node *block = be_transform_node(get_nodes_block(node));
2642 ir_node *sel = get_Cond_selector(node);
2643 ir_node *new_sel = be_transform_node(sel);
2644 long switch_min = LONG_MAX;
2645 long switch_max = LONG_MIN;
2646 long default_pn = get_Cond_default_proj(node);
2648 const ir_edge_t *edge;
2650 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2652 /* determine the smallest switch case value */
2653 foreach_out_edge(node, edge) {
2654 ir_node *proj = get_edge_src_irn(edge);
2655 long pn = get_Proj_proj(proj);
2656 if (pn == default_pn)
2659 if (pn < switch_min)
2661 if (pn > switch_max)
2665 if ((unsigned long) (switch_max - switch_min) > 128000) {
2666 panic("Size of switch %+F bigger than 128000", node);
2669 if (switch_min != 0) {
2670 /* if smallest switch case is not 0 we need an additional sub */
2671 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2672 add_ia32_am_offs_int(new_sel, -switch_min);
2673 set_ia32_op_type(new_sel, ia32_AddrModeS);
2675 SET_IA32_ORIG_NODE(new_sel, node);
2678 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2679 SET_IA32_ORIG_NODE(new_node, node);
2685 * Transform a Cond node.
2687 static ir_node *gen_Cond(ir_node *node)
2689 ir_node *block = get_nodes_block(node);
2690 ir_node *new_block = be_transform_node(block);
2691 dbg_info *dbgi = get_irn_dbg_info(node);
2692 ir_node *sel = get_Cond_selector(node);
2693 ir_mode *sel_mode = get_irn_mode(sel);
2694 ir_node *flags = NULL;
2698 if (sel_mode != mode_b) {
2699 return create_Switch(node);
2702 /* we get flags from a Cmp */
2703 flags = get_flags_node(sel, &pnc);
2705 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2706 SET_IA32_ORIG_NODE(new_node, node);
2712 * Transform a be_Copy.
2714 static ir_node *gen_be_Copy(ir_node *node)
2716 ir_node *new_node = be_duplicate_node(node);
2717 ir_mode *mode = get_irn_mode(new_node);
2719 if (ia32_mode_needs_gp_reg(mode)) {
2720 set_irn_mode(new_node, mode_Iu);
2726 static ir_node *create_Fucom(ir_node *node)
2728 dbg_info *dbgi = get_irn_dbg_info(node);
2729 ir_node *block = get_nodes_block(node);
2730 ir_node *new_block = be_transform_node(block);
2731 ir_node *left = get_Cmp_left(node);
2732 ir_node *new_left = be_transform_node(left);
2733 ir_node *right = get_Cmp_right(node);
2737 if (ia32_cg_config.use_fucomi) {
2738 new_right = be_transform_node(right);
2739 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2741 set_ia32_commutative(new_node);
2742 SET_IA32_ORIG_NODE(new_node, node);
2744 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2745 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2747 new_right = be_transform_node(right);
2748 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2751 set_ia32_commutative(new_node);
2753 SET_IA32_ORIG_NODE(new_node, node);
2755 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2756 SET_IA32_ORIG_NODE(new_node, node);
2762 static ir_node *create_Ucomi(ir_node *node)
2764 dbg_info *dbgi = get_irn_dbg_info(node);
2765 ir_node *src_block = get_nodes_block(node);
2766 ir_node *new_block = be_transform_node(src_block);
2767 ir_node *left = get_Cmp_left(node);
2768 ir_node *right = get_Cmp_right(node);
2770 ia32_address_mode_t am;
2771 ia32_address_t *addr = &am.addr;
2773 match_arguments(&am, src_block, left, right, NULL,
2774 match_commutative | match_am);
2776 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2777 addr->mem, am.new_op1, am.new_op2,
2779 set_am_attributes(new_node, &am);
2781 SET_IA32_ORIG_NODE(new_node, node);
2783 new_node = fix_mem_proj(new_node, &am);
2789 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2790 * to fold an and into a test node
2792 static bool can_fold_test_and(ir_node *node)
2794 const ir_edge_t *edge;
2796 /** we can only have eq and lg projs */
2797 foreach_out_edge(node, edge) {
2798 ir_node *proj = get_edge_src_irn(edge);
2799 pn_Cmp pnc = get_Proj_proj(proj);
2800 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2808 * returns true if it is assured, that the upper bits of a node are "clean"
2809 * which means for a 16 or 8 bit value, that the upper bits in the register
2810 * are 0 for unsigned and a copy of the last significant bit for signed
2813 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2815 assert(ia32_mode_needs_gp_reg(mode));
2816 if (get_mode_size_bits(mode) >= 32)
2819 if (is_Proj(transformed_node))
2820 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2822 switch (get_ia32_irn_opcode(transformed_node)) {
2823 case iro_ia32_Conv_I2I:
2824 case iro_ia32_Conv_I2I8Bit: {
2825 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2826 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2828 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2835 if (mode_is_signed(mode)) {
2836 return false; /* TODO handle signed modes */
2838 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2839 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2840 const ia32_immediate_attr_t *attr
2841 = get_ia32_immediate_attr_const(right);
2842 if (attr->symconst == 0 &&
2843 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2847 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2851 /* TODO too conservative if shift amount is constant */
2852 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2855 if (!mode_is_signed(mode)) {
2857 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2858 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2860 /* TODO if one is known to be zero extended, then || is sufficient */
2865 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2866 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2868 case iro_ia32_Const:
2869 case iro_ia32_Immediate: {
2870 const ia32_immediate_attr_t *attr =
2871 get_ia32_immediate_attr_const(transformed_node);
2872 if (mode_is_signed(mode)) {
2873 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2874 return shifted == 0 || shifted == -1;
2876 unsigned long shifted = (unsigned long)attr->offset;
2877 shifted >>= get_mode_size_bits(mode);
2878 return shifted == 0;
2888 * Generate code for a Cmp.
2890 static ir_node *gen_Cmp(ir_node *node)
2892 dbg_info *dbgi = get_irn_dbg_info(node);
2893 ir_node *block = get_nodes_block(node);
2894 ir_node *new_block = be_transform_node(block);
2895 ir_node *left = get_Cmp_left(node);
2896 ir_node *right = get_Cmp_right(node);
2897 ir_mode *cmp_mode = get_irn_mode(left);
2899 ia32_address_mode_t am;
2900 ia32_address_t *addr = &am.addr;
2903 if (mode_is_float(cmp_mode)) {
2904 if (ia32_cg_config.use_sse2) {
2905 return create_Ucomi(node);
2907 return create_Fucom(node);
2911 assert(ia32_mode_needs_gp_reg(cmp_mode));
2913 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2914 cmp_unsigned = !mode_is_signed(cmp_mode);
2915 if (is_Const_0(right) &&
2917 get_irn_n_edges(left) == 1 &&
2918 can_fold_test_and(node)) {
2919 /* Test(and_left, and_right) */
2920 ir_node *and_left = get_And_left(left);
2921 ir_node *and_right = get_And_right(left);
2923 /* matze: code here used mode instead of cmd_mode, I think it is always
2924 * the same as cmp_mode, but I leave this here to see if this is really
2927 assert(get_irn_mode(and_left) == cmp_mode);
2929 match_arguments(&am, block, and_left, and_right, NULL,
2931 match_am | match_8bit_am | match_16bit_am |
2932 match_am_and_immediates | match_immediate);
2934 /* use 32bit compare mode if possible since the opcode is smaller */
2935 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2936 upper_bits_clean(am.new_op2, cmp_mode)) {
2937 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2940 if (get_mode_size_bits(cmp_mode) == 8) {
2941 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2942 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2945 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2946 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2949 /* Cmp(left, right) */
2950 match_arguments(&am, block, left, right, NULL,
2951 match_commutative | match_am | match_8bit_am |
2952 match_16bit_am | match_am_and_immediates |
2954 /* use 32bit compare mode if possible since the opcode is smaller */
2955 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2956 upper_bits_clean(am.new_op2, cmp_mode)) {
2957 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2960 if (get_mode_size_bits(cmp_mode) == 8) {
2961 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2962 addr->index, addr->mem, am.new_op1,
2963 am.new_op2, am.ins_permuted,
2966 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2967 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2970 set_am_attributes(new_node, &am);
2971 set_ia32_ls_mode(new_node, cmp_mode);
2973 SET_IA32_ORIG_NODE(new_node, node);
2975 new_node = fix_mem_proj(new_node, &am);
2980 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2983 dbg_info *dbgi = get_irn_dbg_info(node);
2984 ir_node *block = get_nodes_block(node);
2985 ir_node *new_block = be_transform_node(block);
2986 ir_node *val_true = get_Mux_true(node);
2987 ir_node *val_false = get_Mux_false(node);
2989 ia32_address_mode_t am;
2990 ia32_address_t *addr;
2992 assert(ia32_cg_config.use_cmov);
2993 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2997 match_arguments(&am, block, val_false, val_true, flags,
2998 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3000 if (am.ins_permuted)
3001 pnc = ia32_get_negated_pnc(pnc);
3003 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3004 addr->mem, am.new_op1, am.new_op2, new_flags,
3006 set_am_attributes(new_node, &am);
3008 SET_IA32_ORIG_NODE(new_node, node);
3010 new_node = fix_mem_proj(new_node, &am);
3016 * Creates a ia32 Setcc instruction.
3018 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3019 ir_node *flags, pn_Cmp pnc,
3022 ir_mode *mode = get_irn_mode(orig_node);
3025 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3026 SET_IA32_ORIG_NODE(new_node, orig_node);
3028 /* we might need to conv the result up */
3029 if (get_mode_size_bits(mode) > 8) {
3030 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3031 nomem, new_node, mode_Bu);
3032 SET_IA32_ORIG_NODE(new_node, orig_node);
3039 * Create instruction for an unsigned Difference or Zero.
3041 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3043 ir_mode *mode = get_irn_mode(psi);
3053 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3054 match_mode_neutral | match_am | match_immediate | match_two_users);
3056 block = get_nodes_block(new_node);
3058 if (is_Proj(new_node)) {
3059 sub = get_Proj_pred(new_node);
3060 assert(is_ia32_Sub(sub));
3063 set_irn_mode(sub, mode_T);
3064 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3066 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3068 dbgi = get_irn_dbg_info(psi);
3069 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3070 not = new_bd_ia32_Not(dbgi, block, sbb);
3072 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3073 set_ia32_commutative(new_node);
3078 * Create an const array of two float consts.
3080 * @param c0 the first constant
3081 * @param c1 the second constant
3082 * @param new_mode IN/OUT for the mode of the constants, if NULL
3083 * smallest possible mode will be used
3085 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3088 ir_mode *mode = *new_mode;
3090 ir_initializer_t *initializer;
3091 tarval *tv0 = get_Const_tarval(c0);
3092 tarval *tv1 = get_Const_tarval(c1);
3095 /* detect the best mode for the constants */
3096 mode = get_tarval_mode(tv0);
3098 if (mode != mode_F) {
3099 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3100 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3102 tv0 = tarval_convert_to(tv0, mode);
3103 tv1 = tarval_convert_to(tv1, mode);
3104 } else if (mode != mode_D) {
3105 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3106 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3108 tv0 = tarval_convert_to(tv0, mode);
3109 tv1 = tarval_convert_to(tv1, mode);
3116 tp = ia32_create_float_type(mode, 4);
3117 tp = ia32_create_float_array(tp);
3119 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3121 set_entity_ld_ident(ent, get_entity_ident(ent));
3122 set_entity_visibility(ent, ir_visibility_local);
3123 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3125 initializer = create_initializer_compound(2);
3127 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3128 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3130 set_entity_initializer(ent, initializer);
3137 * Possible transformations for creating a Setcc.
3139 enum setcc_transform_insn {
3152 typedef struct setcc_transform {
3154 unsigned permutate_cmp_ins;
3157 enum setcc_transform_insn transform;
3161 } setcc_transform_t;
3164 * Setcc can only handle 0 and 1 result.
3165 * Find a transformation that creates 0 and 1 from
3168 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f,
3169 setcc_transform_t *res)
3174 res->permutate_cmp_ins = 0;
3176 if (tarval_is_null(t)) {
3180 pnc = ia32_get_negated_pnc(pnc);
3181 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3182 // now, t is the bigger one
3186 pnc = ia32_get_negated_pnc(pnc);
3190 if (! tarval_is_null(f)) {
3191 tarval *t_sub = tarval_sub(t, f, NULL);
3194 res->steps[step].transform = SETCC_TR_ADD;
3196 if (t == tarval_bad)
3197 panic("constant subtract failed");
3198 if (! tarval_is_long(f))
3199 panic("tarval is not long");
3201 res->steps[step].val = get_tarval_long(f);
3203 f = tarval_sub(f, f, NULL);
3204 assert(tarval_is_null(f));
3207 if (tarval_is_one(t)) {
3208 res->steps[step].transform = SETCC_TR_SET;
3209 res->num_steps = ++step;
3213 if (tarval_is_minus_one(t)) {
3214 res->steps[step].transform = SETCC_TR_NEG;
3216 res->steps[step].transform = SETCC_TR_SET;
3217 res->num_steps = ++step;
3220 if (tarval_is_long(t)) {
3221 long v = get_tarval_long(t);
3223 res->steps[step].val = 0;
3226 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3228 res->steps[step].transform = SETCC_TR_LEAxx;
3229 res->steps[step].scale = 3; /* (a << 3) + a */
3232 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3234 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3235 res->steps[step].scale = 3; /* (a << 3) */
3238 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3240 res->steps[step].transform = SETCC_TR_LEAxx;
3241 res->steps[step].scale = 2; /* (a << 2) + a */
3244 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3246 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3247 res->steps[step].scale = 2; /* (a << 2) */
3250 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3252 res->steps[step].transform = SETCC_TR_LEAxx;
3253 res->steps[step].scale = 1; /* (a << 1) + a */
3256 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3258 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3259 res->steps[step].scale = 1; /* (a << 1) */
3262 res->num_steps = step;
3265 if (! tarval_is_single_bit(t)) {
3266 res->steps[step].transform = SETCC_TR_AND;
3267 res->steps[step].val = v;
3269 res->steps[step].transform = SETCC_TR_NEG;
3271 int v = get_tarval_lowest_bit(t);
3274 res->steps[step].transform = SETCC_TR_SHL;
3275 res->steps[step].scale = v;
3279 res->steps[step].transform = SETCC_TR_SET;
3280 res->num_steps = ++step;
3283 panic("tarval is not long");
3287 * Transforms a Mux node into some code sequence.
3289 * @return The transformed node.
3291 static ir_node *gen_Mux(ir_node *node)
3293 dbg_info *dbgi = get_irn_dbg_info(node);
3294 ir_node *block = get_nodes_block(node);
3295 ir_node *new_block = be_transform_node(block);
3296 ir_node *mux_true = get_Mux_true(node);
3297 ir_node *mux_false = get_Mux_false(node);
3298 ir_node *cond = get_Mux_sel(node);
3299 ir_mode *mode = get_irn_mode(node);
3304 assert(get_irn_mode(cond) == mode_b);
3306 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3307 if (mode_is_float(mode)) {
3308 ir_node *cmp = get_Proj_pred(cond);
3309 ir_node *cmp_left = get_Cmp_left(cmp);
3310 ir_node *cmp_right = get_Cmp_right(cmp);
3311 pn_Cmp pnc = get_Proj_proj(cond);
3313 if (ia32_cg_config.use_sse2) {
3314 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3315 if (cmp_left == mux_true && cmp_right == mux_false) {
3316 /* Mux(a <= b, a, b) => MIN */
3317 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3318 match_commutative | match_am | match_two_users);
3319 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3320 /* Mux(a <= b, b, a) => MAX */
3321 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3322 match_commutative | match_am | match_two_users);
3324 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3325 if (cmp_left == mux_true && cmp_right == mux_false) {
3326 /* Mux(a >= b, a, b) => MAX */
3327 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3328 match_commutative | match_am | match_two_users);
3329 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3330 /* Mux(a >= b, b, a) => MIN */
3331 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3332 match_commutative | match_am | match_two_users);
3337 if (is_Const(mux_true) && is_Const(mux_false)) {
3338 ia32_address_mode_t am;
3343 flags = get_flags_node(cond, &pnc);
3344 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3346 if (ia32_cg_config.use_sse2) {
3347 /* cannot load from different mode on SSE */
3350 /* x87 can load any mode */
3354 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3356 switch (get_mode_size_bytes(new_mode)) {
3366 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3367 set_ia32_am_scale(new_node, 2);
3372 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3373 set_ia32_am_scale(new_node, 1);
3376 /* arg, shift 16 NOT supported */
3378 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3381 panic("Unsupported constant size");
3384 am.ls_mode = new_mode;
3385 am.addr.base = get_symconst_base();
3386 am.addr.index = new_node;
3387 am.addr.mem = nomem;
3389 am.addr.scale = scale;
3390 am.addr.use_frame = 0;
3391 am.addr.frame_entity = NULL;
3392 am.addr.symconst_sign = 0;
3393 am.mem_proj = am.addr.mem;
3394 am.op_type = ia32_AddrModeS;
3397 am.pinned = op_pin_state_floats;
3399 am.ins_permuted = 0;
3401 if (ia32_cg_config.use_sse2)
3402 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3404 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3405 set_am_attributes(load, &am);
3407 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3409 panic("cannot transform floating point Mux");
3412 assert(ia32_mode_needs_gp_reg(mode));
3414 if (is_Proj(cond)) {
3415 ir_node *cmp = get_Proj_pred(cond);
3417 ir_node *cmp_left = get_Cmp_left(cmp);
3418 ir_node *cmp_right = get_Cmp_right(cmp);
3419 pn_Cmp pnc = get_Proj_proj(cond);
3421 /* check for unsigned Doz first */
3422 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3423 is_Const_0(mux_false) && is_Sub(mux_true) &&
3424 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3425 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3426 return create_doz(node, cmp_left, cmp_right);
3427 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3428 is_Const_0(mux_true) && is_Sub(mux_false) &&
3429 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3430 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3431 return create_doz(node, cmp_left, cmp_right);
3436 flags = get_flags_node(cond, &pnc);
3438 if (is_Const(mux_true) && is_Const(mux_false)) {
3439 /* both are const, good */
3440 tarval *tv_true = get_Const_tarval(mux_true);
3441 tarval *tv_false = get_Const_tarval(mux_false);
3442 setcc_transform_t res;
3445 /* check if flags is a cmp node and we are the only user,
3446 i.e no other user yet */
3447 int permutate_allowed = 0;
3448 if (is_ia32_Cmp(flags) && get_irn_n_edges(flags) == 0) {
3449 /* yes, we can permutate its inputs */
3450 permutate_allowed = 1;
3452 find_const_transform(pnc, tv_true, tv_false, &res);
3454 if (res.permutate_cmp_ins) {
3455 ia32_attr_t *attr = get_ia32_attr(flags);
3456 attr->data.ins_permuted ^= 1;
3458 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3461 switch (res.steps[step].transform) {
3463 imm = ia32_immediate_from_long(res.steps[step].val);
3464 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3466 case SETCC_TR_ADDxx:
3467 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3470 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3471 set_ia32_am_scale(new_node, res.steps[step].scale);
3472 set_ia32_am_offs_int(new_node, res.steps[step].val);
3474 case SETCC_TR_LEAxx:
3475 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3476 set_ia32_am_scale(new_node, res.steps[step].scale);
3477 set_ia32_am_offs_int(new_node, res.steps[step].val);
3480 imm = ia32_immediate_from_long(res.steps[step].scale);
3481 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3484 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3487 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3490 imm = ia32_immediate_from_long(res.steps[step].val);
3491 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3494 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, new_node);
3497 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3500 panic("unknown setcc transform");
3504 new_node = create_CMov(node, cond, flags, pnc);
3512 * Create a conversion from x87 state register to general purpose.
3514 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3516 ir_node *block = be_transform_node(get_nodes_block(node));
3517 ir_node *op = get_Conv_op(node);
3518 ir_node *new_op = be_transform_node(op);
3519 ir_graph *irg = current_ir_graph;
3520 dbg_info *dbgi = get_irn_dbg_info(node);
3521 ir_mode *mode = get_irn_mode(node);
3522 ir_node *fist, *load, *mem;
3524 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3525 set_irn_pinned(fist, op_pin_state_floats);
3526 set_ia32_use_frame(fist);
3527 set_ia32_op_type(fist, ia32_AddrModeD);
3529 assert(get_mode_size_bits(mode) <= 32);
3530 /* exception we can only store signed 32 bit integers, so for unsigned
3531 we store a 64bit (signed) integer and load the lower bits */
3532 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3533 set_ia32_ls_mode(fist, mode_Ls);
3535 set_ia32_ls_mode(fist, mode_Is);
3537 SET_IA32_ORIG_NODE(fist, node);
3540 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3542 set_irn_pinned(load, op_pin_state_floats);
3543 set_ia32_use_frame(load);
3544 set_ia32_op_type(load, ia32_AddrModeS);
3545 set_ia32_ls_mode(load, mode_Is);
3546 if (get_ia32_ls_mode(fist) == mode_Ls) {
3547 ia32_attr_t *attr = get_ia32_attr(load);
3548 attr->data.need_64bit_stackent = 1;
3550 ia32_attr_t *attr = get_ia32_attr(load);
3551 attr->data.need_32bit_stackent = 1;
3553 SET_IA32_ORIG_NODE(load, node);
3555 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3559 * Creates a x87 strict Conv by placing a Store and a Load
3561 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3563 ir_node *block = get_nodes_block(node);
3564 ir_graph *irg = get_Block_irg(block);
3565 dbg_info *dbgi = get_irn_dbg_info(node);
3566 ir_node *frame = get_irg_frame(irg);
3567 ir_node *store, *load;
3570 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3571 set_ia32_use_frame(store);
3572 set_ia32_op_type(store, ia32_AddrModeD);
3573 SET_IA32_ORIG_NODE(store, node);
3575 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3576 set_ia32_use_frame(load);
3577 set_ia32_op_type(load, ia32_AddrModeS);
3578 SET_IA32_ORIG_NODE(load, node);
3580 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3584 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3585 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3587 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3589 func = get_mode_size_bits(mode) == 8 ?
3590 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3591 return func(dbgi, block, base, index, mem, val, mode);
3595 * Create a conversion from general purpose to x87 register
3597 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3599 ir_node *src_block = get_nodes_block(node);
3600 ir_node *block = be_transform_node(src_block);
3601 ir_graph *irg = get_Block_irg(block);
3602 dbg_info *dbgi = get_irn_dbg_info(node);
3603 ir_node *op = get_Conv_op(node);
3604 ir_node *new_op = NULL;
3606 ir_mode *store_mode;
3611 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3612 if (possible_int_mode_for_fp(src_mode)) {
3613 ia32_address_mode_t am;
3615 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3616 if (am.op_type == ia32_AddrModeS) {
3617 ia32_address_t *addr = &am.addr;
3619 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3620 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3622 set_am_attributes(fild, &am);
3623 SET_IA32_ORIG_NODE(fild, node);
3625 fix_mem_proj(fild, &am);
3630 if (new_op == NULL) {
3631 new_op = be_transform_node(op);
3634 mode = get_irn_mode(op);
3636 /* first convert to 32 bit signed if necessary */
3637 if (get_mode_size_bits(src_mode) < 32) {
3638 if (!upper_bits_clean(new_op, src_mode)) {
3639 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3640 SET_IA32_ORIG_NODE(new_op, node);
3645 assert(get_mode_size_bits(mode) == 32);
3648 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3650 set_ia32_use_frame(store);
3651 set_ia32_op_type(store, ia32_AddrModeD);
3652 set_ia32_ls_mode(store, mode_Iu);
3654 /* exception for 32bit unsigned, do a 64bit spill+load */
3655 if (!mode_is_signed(mode)) {
3658 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3660 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3661 noreg_GP, nomem, zero_const);
3663 set_ia32_use_frame(zero_store);
3664 set_ia32_op_type(zero_store, ia32_AddrModeD);
3665 add_ia32_am_offs_int(zero_store, 4);
3666 set_ia32_ls_mode(zero_store, mode_Iu);
3671 store = new_rd_Sync(dbgi, block, 2, in);
3672 store_mode = mode_Ls;
3674 store_mode = mode_Is;
3678 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3680 set_ia32_use_frame(fild);
3681 set_ia32_op_type(fild, ia32_AddrModeS);
3682 set_ia32_ls_mode(fild, store_mode);
3684 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3690 * Create a conversion from one integer mode into another one
3692 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3693 dbg_info *dbgi, ir_node *block, ir_node *op,
3696 ir_node *new_block = be_transform_node(block);
3698 ir_mode *smaller_mode;
3699 ia32_address_mode_t am;
3700 ia32_address_t *addr = &am.addr;
3703 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3704 smaller_mode = src_mode;
3706 smaller_mode = tgt_mode;
3709 #ifdef DEBUG_libfirm
3711 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3716 match_arguments(&am, block, NULL, op, NULL,
3717 match_am | match_8bit_am | match_16bit_am);
3719 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3720 /* unnecessary conv. in theory it shouldn't have been AM */
3721 assert(is_ia32_NoReg_GP(addr->base));
3722 assert(is_ia32_NoReg_GP(addr->index));
3723 assert(is_NoMem(addr->mem));
3724 assert(am.addr.offset == 0);
3725 assert(am.addr.symconst_ent == NULL);
3729 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3730 addr->mem, am.new_op2, smaller_mode);
3731 set_am_attributes(new_node, &am);
3732 /* match_arguments assume that out-mode = in-mode, this isn't true here
3734 set_ia32_ls_mode(new_node, smaller_mode);
3735 SET_IA32_ORIG_NODE(new_node, node);
3736 new_node = fix_mem_proj(new_node, &am);
3741 * Transforms a Conv node.
3743 * @return The created ia32 Conv node
3745 static ir_node *gen_Conv(ir_node *node)
3747 ir_node *block = get_nodes_block(node);
3748 ir_node *new_block = be_transform_node(block);
3749 ir_node *op = get_Conv_op(node);
3750 ir_node *new_op = NULL;
3751 dbg_info *dbgi = get_irn_dbg_info(node);
3752 ir_mode *src_mode = get_irn_mode(op);
3753 ir_mode *tgt_mode = get_irn_mode(node);
3754 int src_bits = get_mode_size_bits(src_mode);
3755 int tgt_bits = get_mode_size_bits(tgt_mode);
3756 ir_node *res = NULL;
3758 assert(!mode_is_int(src_mode) || src_bits <= 32);
3759 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3761 /* modeB -> X should already be lowered by the lower_mode_b pass */
3762 if (src_mode == mode_b) {
3763 panic("ConvB not lowered %+F", node);
3766 if (src_mode == tgt_mode) {
3767 if (get_Conv_strict(node)) {
3768 if (ia32_cg_config.use_sse2) {
3769 /* when we are in SSE mode, we can kill all strict no-op conversion */
3770 return be_transform_node(op);
3773 /* this should be optimized already, but who knows... */
3774 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3775 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3776 return be_transform_node(op);
3780 if (mode_is_float(src_mode)) {
3781 new_op = be_transform_node(op);
3782 /* we convert from float ... */
3783 if (mode_is_float(tgt_mode)) {
3785 if (ia32_cg_config.use_sse2) {
3786 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3787 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3789 set_ia32_ls_mode(res, tgt_mode);
3791 if (get_Conv_strict(node)) {
3792 /* if fp_no_float_fold is not set then we assume that we
3793 * don't have any float operations in a non
3794 * mode_float_arithmetic mode and can skip strict upconvs */
3795 if (src_bits < tgt_bits
3796 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3797 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3800 res = gen_x87_strict_conv(tgt_mode, new_op);
3801 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3805 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3810 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3811 if (ia32_cg_config.use_sse2) {
3812 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3814 set_ia32_ls_mode(res, src_mode);
3816 return gen_x87_fp_to_gp(node);
3820 /* we convert from int ... */
3821 if (mode_is_float(tgt_mode)) {
3823 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3824 if (ia32_cg_config.use_sse2) {
3825 new_op = be_transform_node(op);
3826 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3828 set_ia32_ls_mode(res, tgt_mode);
3830 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3831 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3832 res = gen_x87_gp_to_fp(node, src_mode);
3834 /* we need a strict-Conv, if the int mode has more bits than the
3836 if (float_mantissa < int_mantissa) {
3837 res = gen_x87_strict_conv(tgt_mode, res);
3838 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3842 } else if (tgt_mode == mode_b) {
3843 /* mode_b lowering already took care that we only have 0/1 values */
3844 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3845 src_mode, tgt_mode));
3846 return be_transform_node(op);
3849 if (src_bits == tgt_bits) {
3850 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3851 src_mode, tgt_mode));
3852 return be_transform_node(op);
3855 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3863 static ir_node *create_immediate_or_transform(ir_node *node,
3864 char immediate_constraint_type)
3866 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3867 if (new_node == NULL) {
3868 new_node = be_transform_node(node);
3874 * Transforms a FrameAddr into an ia32 Add.
3876 static ir_node *gen_be_FrameAddr(ir_node *node)
3878 ir_node *block = be_transform_node(get_nodes_block(node));
3879 ir_node *op = be_get_FrameAddr_frame(node);
3880 ir_node *new_op = be_transform_node(op);
3881 dbg_info *dbgi = get_irn_dbg_info(node);
3884 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3885 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3886 set_ia32_use_frame(new_node);
3888 SET_IA32_ORIG_NODE(new_node, node);
3894 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3896 static ir_node *gen_be_Return(ir_node *node)
3898 ir_graph *irg = current_ir_graph;
3899 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3900 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3901 ir_entity *ent = get_irg_entity(irg);
3902 ir_type *tp = get_entity_type(ent);
3907 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3908 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3910 int pn_ret_val, pn_ret_mem, arity, i;
3912 assert(ret_val != NULL);
3913 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3914 return be_duplicate_node(node);
3917 res_type = get_method_res_type(tp, 0);
3919 if (! is_Primitive_type(res_type)) {
3920 return be_duplicate_node(node);
3923 mode = get_type_mode(res_type);
3924 if (! mode_is_float(mode)) {
3925 return be_duplicate_node(node);
3928 assert(get_method_n_ress(tp) == 1);
3930 pn_ret_val = get_Proj_proj(ret_val);
3931 pn_ret_mem = get_Proj_proj(ret_mem);
3933 /* get the Barrier */
3934 barrier = get_Proj_pred(ret_val);
3936 /* get result input of the Barrier */
3937 ret_val = get_irn_n(barrier, pn_ret_val);
3938 new_ret_val = be_transform_node(ret_val);
3940 /* get memory input of the Barrier */
3941 ret_mem = get_irn_n(barrier, pn_ret_mem);
3942 new_ret_mem = be_transform_node(ret_mem);
3944 frame = get_irg_frame(irg);
3946 dbgi = get_irn_dbg_info(barrier);
3947 block = be_transform_node(get_nodes_block(barrier));
3949 /* store xmm0 onto stack */
3950 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3951 new_ret_mem, new_ret_val);
3952 set_ia32_ls_mode(sse_store, mode);
3953 set_ia32_op_type(sse_store, ia32_AddrModeD);
3954 set_ia32_use_frame(sse_store);
3956 /* load into x87 register */
3957 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3958 set_ia32_op_type(fld, ia32_AddrModeS);
3959 set_ia32_use_frame(fld);
3961 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3962 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3964 /* create a new barrier */
3965 arity = get_irn_arity(barrier);
3966 in = ALLOCAN(ir_node*, arity);
3967 for (i = 0; i < arity; ++i) {
3970 if (i == pn_ret_val) {
3972 } else if (i == pn_ret_mem) {
3975 ir_node *in = get_irn_n(barrier, i);
3976 new_in = be_transform_node(in);
3981 new_barrier = new_ir_node(dbgi, irg, block,
3982 get_irn_op(barrier), get_irn_mode(barrier),
3984 copy_node_attr(irg, barrier, new_barrier);
3985 be_duplicate_deps(barrier, new_barrier);
3986 be_set_transformed_node(barrier, new_barrier);
3988 /* transform normally */
3989 return be_duplicate_node(node);
3993 * Transform a be_AddSP into an ia32_SubSP.
3995 static ir_node *gen_be_AddSP(ir_node *node)
3997 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3998 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4000 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4001 match_am | match_immediate);
4005 * Transform a be_SubSP into an ia32_AddSP
4007 static ir_node *gen_be_SubSP(ir_node *node)
4009 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4010 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4012 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4013 match_am | match_immediate);
4017 * Change some phi modes
4019 static ir_node *gen_Phi(ir_node *node)
4021 const arch_register_req_t *req;
4022 ir_node *block = be_transform_node(get_nodes_block(node));
4023 ir_graph *irg = current_ir_graph;
4024 dbg_info *dbgi = get_irn_dbg_info(node);
4025 ir_mode *mode = get_irn_mode(node);
4028 if (ia32_mode_needs_gp_reg(mode)) {
4029 /* we shouldn't have any 64bit stuff around anymore */
4030 assert(get_mode_size_bits(mode) <= 32);
4031 /* all integer operations are on 32bit registers now */
4033 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4034 } else if (mode_is_float(mode)) {
4035 if (ia32_cg_config.use_sse2) {
4037 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4040 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4043 req = arch_no_register_req;
4046 /* phi nodes allow loops, so we use the old arguments for now
4047 * and fix this later */
4048 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4049 get_irn_in(node) + 1);
4050 copy_node_attr(irg, node, phi);
4051 be_duplicate_deps(node, phi);
4053 arch_set_out_register_req(phi, 0, req);
4055 be_enqueue_preds(node);
4060 static ir_node *gen_Jmp(ir_node *node)
4062 ir_node *block = get_nodes_block(node);
4063 ir_node *new_block = be_transform_node(block);
4064 dbg_info *dbgi = get_irn_dbg_info(node);
4067 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4068 SET_IA32_ORIG_NODE(new_node, node);
4076 static ir_node *gen_IJmp(ir_node *node)
4078 ir_node *block = get_nodes_block(node);
4079 ir_node *new_block = be_transform_node(block);
4080 dbg_info *dbgi = get_irn_dbg_info(node);
4081 ir_node *op = get_IJmp_target(node);
4083 ia32_address_mode_t am;
4084 ia32_address_t *addr = &am.addr;
4086 assert(get_irn_mode(op) == mode_P);
4088 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4090 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4091 addr->mem, am.new_op2);
4092 set_am_attributes(new_node, &am);
4093 SET_IA32_ORIG_NODE(new_node, node);
4095 new_node = fix_mem_proj(new_node, &am);
4101 * Transform a Bound node.
4103 static ir_node *gen_Bound(ir_node *node)
4106 ir_node *lower = get_Bound_lower(node);
4107 dbg_info *dbgi = get_irn_dbg_info(node);
4109 if (is_Const_0(lower)) {
4110 /* typical case for Java */
4111 ir_node *sub, *res, *flags, *block;
4113 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4114 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
4116 block = get_nodes_block(res);
4117 if (! is_Proj(res)) {
4119 set_irn_mode(sub, mode_T);
4120 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4122 sub = get_Proj_pred(res);
4124 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4125 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4126 SET_IA32_ORIG_NODE(new_node, node);
4128 panic("generic Bound not supported in ia32 Backend");
4134 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4136 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4137 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4139 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4140 match_immediate | match_mode_neutral);
4143 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4145 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4146 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4147 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4151 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4153 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4154 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4155 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4159 static ir_node *gen_ia32_l_Add(ir_node *node)
4161 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4162 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4163 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4164 match_commutative | match_am | match_immediate |
4165 match_mode_neutral);
4167 if (is_Proj(lowered)) {
4168 lowered = get_Proj_pred(lowered);
4170 assert(is_ia32_Add(lowered));
4171 set_irn_mode(lowered, mode_T);
4177 static ir_node *gen_ia32_l_Adc(ir_node *node)
4179 return gen_binop_flags(node, new_bd_ia32_Adc,
4180 match_commutative | match_am | match_immediate |
4181 match_mode_neutral);
4185 * Transforms a l_MulS into a "real" MulS node.
4187 * @return the created ia32 Mul node
4189 static ir_node *gen_ia32_l_Mul(ir_node *node)
4191 ir_node *left = get_binop_left(node);
4192 ir_node *right = get_binop_right(node);
4194 return gen_binop(node, left, right, new_bd_ia32_Mul,
4195 match_commutative | match_am | match_mode_neutral);
4199 * Transforms a l_IMulS into a "real" IMul1OPS node.
4201 * @return the created ia32 IMul1OP node
4203 static ir_node *gen_ia32_l_IMul(ir_node *node)
4205 ir_node *left = get_binop_left(node);
4206 ir_node *right = get_binop_right(node);
4208 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4209 match_commutative | match_am | match_mode_neutral);
4212 static ir_node *gen_ia32_l_Sub(ir_node *node)
4214 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4215 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4216 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4217 match_am | match_immediate | match_mode_neutral);
4219 if (is_Proj(lowered)) {
4220 lowered = get_Proj_pred(lowered);
4222 assert(is_ia32_Sub(lowered));
4223 set_irn_mode(lowered, mode_T);
4229 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4231 return gen_binop_flags(node, new_bd_ia32_Sbb,
4232 match_am | match_immediate | match_mode_neutral);
4236 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4237 * op1 - target to be shifted
4238 * op2 - contains bits to be shifted into target
4240 * Only op3 can be an immediate.
4242 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4243 ir_node *low, ir_node *count)
4245 ir_node *block = get_nodes_block(node);
4246 ir_node *new_block = be_transform_node(block);
4247 dbg_info *dbgi = get_irn_dbg_info(node);
4248 ir_node *new_high = be_transform_node(high);
4249 ir_node *new_low = be_transform_node(low);
4253 /* the shift amount can be any mode that is bigger than 5 bits, since all
4254 * other bits are ignored anyway */
4255 while (is_Conv(count) &&
4256 get_irn_n_edges(count) == 1 &&
4257 mode_is_int(get_irn_mode(count))) {
4258 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4259 count = get_Conv_op(count);
4261 new_count = create_immediate_or_transform(count, 0);
4263 if (is_ia32_l_ShlD(node)) {
4264 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4267 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4270 SET_IA32_ORIG_NODE(new_node, node);
4275 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4277 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4278 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4279 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4280 return gen_lowered_64bit_shifts(node, high, low, count);
4283 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4285 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4286 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4287 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4288 return gen_lowered_64bit_shifts(node, high, low, count);
4291 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4293 ir_node *src_block = get_nodes_block(node);
4294 ir_node *block = be_transform_node(src_block);
4295 ir_graph *irg = current_ir_graph;
4296 dbg_info *dbgi = get_irn_dbg_info(node);
4297 ir_node *frame = get_irg_frame(irg);
4298 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4299 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4300 ir_node *new_val_low = be_transform_node(val_low);
4301 ir_node *new_val_high = be_transform_node(val_high);
4303 ir_node *sync, *fild, *res;
4304 ir_node *store_low, *store_high;
4306 if (ia32_cg_config.use_sse2) {
4307 panic("ia32_l_LLtoFloat not implemented for SSE2");
4311 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4313 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4315 SET_IA32_ORIG_NODE(store_low, node);
4316 SET_IA32_ORIG_NODE(store_high, node);
4318 set_ia32_use_frame(store_low);
4319 set_ia32_use_frame(store_high);
4320 set_ia32_op_type(store_low, ia32_AddrModeD);
4321 set_ia32_op_type(store_high, ia32_AddrModeD);
4322 set_ia32_ls_mode(store_low, mode_Iu);
4323 set_ia32_ls_mode(store_high, mode_Is);
4324 add_ia32_am_offs_int(store_high, 4);
4328 sync = new_rd_Sync(dbgi, block, 2, in);
4331 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4333 set_ia32_use_frame(fild);
4334 set_ia32_op_type(fild, ia32_AddrModeS);
4335 set_ia32_ls_mode(fild, mode_Ls);
4337 SET_IA32_ORIG_NODE(fild, node);
4339 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4341 if (! mode_is_signed(get_irn_mode(val_high))) {
4342 ia32_address_mode_t am;
4344 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4347 am.addr.base = get_symconst_base();
4348 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4349 am.addr.mem = nomem;
4352 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4353 am.addr.use_frame = 0;
4354 am.addr.frame_entity = NULL;
4355 am.addr.symconst_sign = 0;
4356 am.ls_mode = mode_F;
4357 am.mem_proj = nomem;
4358 am.op_type = ia32_AddrModeS;
4360 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4361 am.pinned = op_pin_state_floats;
4363 am.ins_permuted = 0;
4365 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4366 am.new_op1, am.new_op2, get_fpcw());
4367 set_am_attributes(fadd, &am);
4369 set_irn_mode(fadd, mode_T);
4370 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4375 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4377 ir_node *src_block = get_nodes_block(node);
4378 ir_node *block = be_transform_node(src_block);
4379 ir_graph *irg = get_Block_irg(block);
4380 dbg_info *dbgi = get_irn_dbg_info(node);
4381 ir_node *frame = get_irg_frame(irg);
4382 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4383 ir_node *new_val = be_transform_node(val);
4384 ir_node *fist, *mem;
4386 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4387 SET_IA32_ORIG_NODE(fist, node);
4388 set_ia32_use_frame(fist);
4389 set_ia32_op_type(fist, ia32_AddrModeD);
4390 set_ia32_ls_mode(fist, mode_Ls);
4396 * the BAD transformer.
4398 static ir_node *bad_transform(ir_node *node)
4400 panic("No transform function for %+F available.", node);
4403 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4405 ir_node *block = be_transform_node(get_nodes_block(node));
4406 ir_graph *irg = get_Block_irg(block);
4407 ir_node *pred = get_Proj_pred(node);
4408 ir_node *new_pred = be_transform_node(pred);
4409 ir_node *frame = get_irg_frame(irg);
4410 dbg_info *dbgi = get_irn_dbg_info(node);
4411 long pn = get_Proj_proj(node);
4416 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4417 SET_IA32_ORIG_NODE(load, node);
4418 set_ia32_use_frame(load);
4419 set_ia32_op_type(load, ia32_AddrModeS);
4420 set_ia32_ls_mode(load, mode_Iu);
4421 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4422 * 32 bit from it with this particular load */
4423 attr = get_ia32_attr(load);
4424 attr->data.need_64bit_stackent = 1;
4426 if (pn == pn_ia32_l_FloattoLL_res_high) {
4427 add_ia32_am_offs_int(load, 4);
4429 assert(pn == pn_ia32_l_FloattoLL_res_low);
4432 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4438 * Transform the Projs of an AddSP.
4440 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4442 ir_node *pred = get_Proj_pred(node);
4443 ir_node *new_pred = be_transform_node(pred);
4444 dbg_info *dbgi = get_irn_dbg_info(node);
4445 long proj = get_Proj_proj(node);
4447 if (proj == pn_be_AddSP_sp) {
4448 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4449 pn_ia32_SubSP_stack);
4450 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4452 } else if (proj == pn_be_AddSP_res) {
4453 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4454 pn_ia32_SubSP_addr);
4455 } else if (proj == pn_be_AddSP_M) {
4456 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4459 panic("No idea how to transform proj->AddSP");
4463 * Transform the Projs of a SubSP.
4465 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4467 ir_node *pred = get_Proj_pred(node);
4468 ir_node *new_pred = be_transform_node(pred);
4469 dbg_info *dbgi = get_irn_dbg_info(node);
4470 long proj = get_Proj_proj(node);
4472 if (proj == pn_be_SubSP_sp) {
4473 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4474 pn_ia32_AddSP_stack);
4475 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4477 } else if (proj == pn_be_SubSP_M) {
4478 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4481 panic("No idea how to transform proj->SubSP");
4485 * Transform and renumber the Projs from a Load.
4487 static ir_node *gen_Proj_Load(ir_node *node)
4490 ir_node *block = be_transform_node(get_nodes_block(node));
4491 ir_node *pred = get_Proj_pred(node);
4492 dbg_info *dbgi = get_irn_dbg_info(node);
4493 long proj = get_Proj_proj(node);
4495 /* loads might be part of source address mode matches, so we don't
4496 * transform the ProjMs yet (with the exception of loads whose result is
4499 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4502 /* this is needed, because sometimes we have loops that are only
4503 reachable through the ProjM */
4504 be_enqueue_preds(node);
4505 /* do it in 2 steps, to silence firm verifier */
4506 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4507 set_Proj_proj(res, pn_ia32_mem);
4511 /* renumber the proj */
4512 new_pred = be_transform_node(pred);
4513 if (is_ia32_Load(new_pred)) {
4516 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4518 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4519 case pn_Load_X_regular:
4520 return new_rd_Jmp(dbgi, block);
4521 case pn_Load_X_except:
4522 /* This Load might raise an exception. Mark it. */
4523 set_ia32_exc_label(new_pred, 1);
4524 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4528 } else if (is_ia32_Conv_I2I(new_pred) ||
4529 is_ia32_Conv_I2I8Bit(new_pred)) {
4530 set_irn_mode(new_pred, mode_T);
4531 if (proj == pn_Load_res) {
4532 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4533 } else if (proj == pn_Load_M) {
4534 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4536 } else if (is_ia32_xLoad(new_pred)) {
4539 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4541 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4542 case pn_Load_X_regular:
4543 return new_rd_Jmp(dbgi, block);
4544 case pn_Load_X_except:
4545 /* This Load might raise an exception. Mark it. */
4546 set_ia32_exc_label(new_pred, 1);
4547 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4551 } else if (is_ia32_vfld(new_pred)) {
4554 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4556 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4557 case pn_Load_X_regular:
4558 return new_rd_Jmp(dbgi, block);
4559 case pn_Load_X_except:
4560 /* This Load might raise an exception. Mark it. */
4561 set_ia32_exc_label(new_pred, 1);
4562 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4567 /* can happen for ProJMs when source address mode happened for the
4570 /* however it should not be the result proj, as that would mean the
4571 load had multiple users and should not have been used for
4573 if (proj != pn_Load_M) {
4574 panic("internal error: transformed node not a Load");
4576 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4579 panic("No idea how to transform proj");
4583 * Transform and renumber the Projs from a DivMod like instruction.
4585 static ir_node *gen_Proj_DivMod(ir_node *node)
4587 ir_node *block = be_transform_node(get_nodes_block(node));
4588 ir_node *pred = get_Proj_pred(node);
4589 ir_node *new_pred = be_transform_node(pred);
4590 dbg_info *dbgi = get_irn_dbg_info(node);
4591 long proj = get_Proj_proj(node);
4593 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4595 switch (get_irn_opcode(pred)) {
4599 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4601 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4602 case pn_Div_X_regular:
4603 return new_rd_Jmp(dbgi, block);
4604 case pn_Div_X_except:
4605 set_ia32_exc_label(new_pred, 1);
4606 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4614 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4616 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4617 case pn_Mod_X_except:
4618 set_ia32_exc_label(new_pred, 1);
4619 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4627 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4628 case pn_DivMod_res_div:
4629 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4630 case pn_DivMod_res_mod:
4631 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4632 case pn_DivMod_X_regular:
4633 return new_rd_Jmp(dbgi, block);
4634 case pn_DivMod_X_except:
4635 set_ia32_exc_label(new_pred, 1);
4636 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4645 panic("No idea how to transform proj->DivMod");
4649 * Transform and renumber the Projs from a CopyB.
4651 static ir_node *gen_Proj_CopyB(ir_node *node)
4653 ir_node *pred = get_Proj_pred(node);
4654 ir_node *new_pred = be_transform_node(pred);
4655 dbg_info *dbgi = get_irn_dbg_info(node);
4656 long proj = get_Proj_proj(node);
4659 case pn_CopyB_M_regular:
4660 if (is_ia32_CopyB_i(new_pred)) {
4661 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4662 } else if (is_ia32_CopyB(new_pred)) {
4663 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4670 panic("No idea how to transform proj->CopyB");
4674 * Transform and renumber the Projs from a Quot.
4676 static ir_node *gen_Proj_Quot(ir_node *node)
4678 ir_node *pred = get_Proj_pred(node);
4679 ir_node *new_pred = be_transform_node(pred);
4680 dbg_info *dbgi = get_irn_dbg_info(node);
4681 long proj = get_Proj_proj(node);
4685 if (is_ia32_xDiv(new_pred)) {
4686 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4687 } else if (is_ia32_vfdiv(new_pred)) {
4688 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4692 if (is_ia32_xDiv(new_pred)) {
4693 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4694 } else if (is_ia32_vfdiv(new_pred)) {
4695 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4698 case pn_Quot_X_regular:
4699 case pn_Quot_X_except:
4704 panic("No idea how to transform proj->Quot");
4707 static ir_node *gen_be_Call(ir_node *node)
4709 dbg_info *const dbgi = get_irn_dbg_info(node);
4710 ir_node *const src_block = get_nodes_block(node);
4711 ir_node *const block = be_transform_node(src_block);
4712 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4713 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4714 ir_node *const sp = be_transform_node(src_sp);
4715 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4716 ia32_address_mode_t am;
4717 ia32_address_t *const addr = &am.addr;
4722 ir_node * eax = noreg_GP;
4723 ir_node * ecx = noreg_GP;
4724 ir_node * edx = noreg_GP;
4725 unsigned const pop = be_Call_get_pop(node);
4726 ir_type *const call_tp = be_Call_get_type(node);
4727 int old_no_pic_adjust;
4729 /* Run the x87 simulator if the call returns a float value */
4730 if (get_method_n_ress(call_tp) > 0) {
4731 ir_type *const res_type = get_method_res_type(call_tp, 0);
4732 ir_mode *const res_mode = get_type_mode(res_type);
4734 if (res_mode != NULL && mode_is_float(res_mode)) {
4735 env_cg->do_x87_sim = 1;
4739 /* We do not want be_Call direct calls */
4740 assert(be_Call_get_entity(node) == NULL);
4742 /* special case for PIC trampoline calls */
4743 old_no_pic_adjust = no_pic_adjust;
4744 no_pic_adjust = env_cg->birg->main_env->options->pic;
4746 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4747 match_am | match_immediate);
4749 no_pic_adjust = old_no_pic_adjust;
4751 i = get_irn_arity(node) - 1;
4752 fpcw = be_transform_node(get_irn_n(node, i--));
4753 for (; i >= be_pos_Call_first_arg; --i) {
4754 arch_register_req_t const *const req = arch_get_register_req(node, i);
4755 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4757 assert(req->type == arch_register_req_type_limited);
4758 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4760 switch (*req->limited) {
4761 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4762 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4763 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4764 default: panic("Invalid GP register for register parameter");
4768 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4769 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4770 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4771 set_am_attributes(call, &am);
4772 call = fix_mem_proj(call, &am);
4774 if (get_irn_pinned(node) == op_pin_state_pinned)
4775 set_irn_pinned(call, op_pin_state_pinned);
4777 SET_IA32_ORIG_NODE(call, node);
4779 if (ia32_cg_config.use_sse2) {
4780 /* remember this call for post-processing */
4781 ARR_APP1(ir_node *, call_list, call);
4782 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4789 * Transform Builtin trap
4791 static ir_node *gen_trap(ir_node *node)
4793 dbg_info *dbgi = get_irn_dbg_info(node);
4794 ir_node *block = be_transform_node(get_nodes_block(node));
4795 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4797 return new_bd_ia32_UD2(dbgi, block, mem);
4801 * Transform Builtin debugbreak
4803 static ir_node *gen_debugbreak(ir_node *node)
4805 dbg_info *dbgi = get_irn_dbg_info(node);
4806 ir_node *block = be_transform_node(get_nodes_block(node));
4807 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4809 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4813 * Transform Builtin return_address
4815 static ir_node *gen_return_address(ir_node *node)
4817 ir_node *param = get_Builtin_param(node, 0);
4818 ir_node *frame = get_Builtin_param(node, 1);
4819 dbg_info *dbgi = get_irn_dbg_info(node);
4820 tarval *tv = get_Const_tarval(param);
4821 unsigned long value = get_tarval_long(tv);
4823 ir_node *block = be_transform_node(get_nodes_block(node));
4824 ir_node *ptr = be_transform_node(frame);
4828 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4829 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4830 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4833 /* load the return address from this frame */
4834 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4836 set_irn_pinned(load, get_irn_pinned(node));
4837 set_ia32_op_type(load, ia32_AddrModeS);
4838 set_ia32_ls_mode(load, mode_Iu);
4840 set_ia32_am_offs_int(load, 0);
4841 set_ia32_use_frame(load);
4842 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4844 if (get_irn_pinned(node) == op_pin_state_floats) {
4845 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4846 && pn_ia32_vfld_res == pn_ia32_Load_res
4847 && pn_ia32_Load_res == pn_ia32_res);
4848 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4851 SET_IA32_ORIG_NODE(load, node);
4852 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4856 * Transform Builtin frame_address
4858 static ir_node *gen_frame_address(ir_node *node)
4860 ir_node *param = get_Builtin_param(node, 0);
4861 ir_node *frame = get_Builtin_param(node, 1);
4862 dbg_info *dbgi = get_irn_dbg_info(node);
4863 tarval *tv = get_Const_tarval(param);
4864 unsigned long value = get_tarval_long(tv);
4866 ir_node *block = be_transform_node(get_nodes_block(node));
4867 ir_node *ptr = be_transform_node(frame);
4872 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4873 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4874 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4877 /* load the frame address from this frame */
4878 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4880 set_irn_pinned(load, get_irn_pinned(node));
4881 set_ia32_op_type(load, ia32_AddrModeS);
4882 set_ia32_ls_mode(load, mode_Iu);
4884 ent = ia32_get_frame_address_entity();
4886 set_ia32_am_offs_int(load, 0);
4887 set_ia32_use_frame(load);
4888 set_ia32_frame_ent(load, ent);
4890 /* will fail anyway, but gcc does this: */
4891 set_ia32_am_offs_int(load, 0);
4894 if (get_irn_pinned(node) == op_pin_state_floats) {
4895 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4896 && pn_ia32_vfld_res == pn_ia32_Load_res
4897 && pn_ia32_Load_res == pn_ia32_res);
4898 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4901 SET_IA32_ORIG_NODE(load, node);
4902 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4906 * Transform Builtin frame_address
4908 static ir_node *gen_prefetch(ir_node *node)
4911 ir_node *ptr, *block, *mem, *base, *index;
4912 ir_node *param, *new_node;
4915 ia32_address_t addr;
4917 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4918 /* no prefetch at all, route memory */
4919 return be_transform_node(get_Builtin_mem(node));
4922 param = get_Builtin_param(node, 1);
4923 tv = get_Const_tarval(param);
4924 rw = get_tarval_long(tv);
4926 /* construct load address */
4927 memset(&addr, 0, sizeof(addr));
4928 ptr = get_Builtin_param(node, 0);
4929 ia32_create_address_mode(&addr, ptr, 0);
4936 base = be_transform_node(base);
4939 if (index == NULL) {
4942 index = be_transform_node(index);
4945 dbgi = get_irn_dbg_info(node);
4946 block = be_transform_node(get_nodes_block(node));
4947 mem = be_transform_node(get_Builtin_mem(node));
4949 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4950 /* we have 3DNow!, this was already checked above */
4951 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4952 } else if (ia32_cg_config.use_sse_prefetch) {
4953 /* note: rw == 1 is IGNORED in that case */
4954 param = get_Builtin_param(node, 2);
4955 tv = get_Const_tarval(param);
4956 locality = get_tarval_long(tv);
4958 /* SSE style prefetch */
4961 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4964 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4967 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4970 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4974 assert(ia32_cg_config.use_3dnow_prefetch);
4975 /* 3DNow! style prefetch */
4976 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4979 set_irn_pinned(new_node, get_irn_pinned(node));
4980 set_ia32_op_type(new_node, ia32_AddrModeS);
4981 set_ia32_ls_mode(new_node, mode_Bu);
4982 set_address(new_node, &addr);
4984 SET_IA32_ORIG_NODE(new_node, node);
4986 be_dep_on_frame(new_node);
4987 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4991 * Transform bsf like node
4993 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4995 ir_node *param = get_Builtin_param(node, 0);
4996 dbg_info *dbgi = get_irn_dbg_info(node);
4998 ir_node *block = get_nodes_block(node);
4999 ir_node *new_block = be_transform_node(block);
5001 ia32_address_mode_t am;
5002 ia32_address_t *addr = &am.addr;
5005 match_arguments(&am, block, NULL, param, NULL, match_am);
5007 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5008 set_am_attributes(cnt, &am);
5009 set_ia32_ls_mode(cnt, get_irn_mode(param));
5011 SET_IA32_ORIG_NODE(cnt, node);
5012 return fix_mem_proj(cnt, &am);
5016 * Transform builtin ffs.
5018 static ir_node *gen_ffs(ir_node *node)
5020 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5021 ir_node *real = skip_Proj(bsf);
5022 dbg_info *dbgi = get_irn_dbg_info(real);
5023 ir_node *block = get_nodes_block(real);
5024 ir_node *flag, *set, *conv, *neg, *or;
5027 if (get_irn_mode(real) != mode_T) {
5028 set_irn_mode(real, mode_T);
5029 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5032 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5035 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5036 SET_IA32_ORIG_NODE(set, node);
5039 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5040 SET_IA32_ORIG_NODE(conv, node);
5043 neg = new_bd_ia32_Neg(dbgi, block, conv);
5046 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5047 set_ia32_commutative(or);
5050 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5054 * Transform builtin clz.
5056 static ir_node *gen_clz(ir_node *node)
5058 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5059 ir_node *real = skip_Proj(bsr);
5060 dbg_info *dbgi = get_irn_dbg_info(real);
5061 ir_node *block = get_nodes_block(real);
5062 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5064 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5068 * Transform builtin ctz.
5070 static ir_node *gen_ctz(ir_node *node)
5072 return gen_unop_AM(node, new_bd_ia32_Bsf);
5076 * Transform builtin parity.
5078 static ir_node *gen_parity(ir_node *node)
5080 ir_node *param = get_Builtin_param(node, 0);
5081 dbg_info *dbgi = get_irn_dbg_info(node);
5083 ir_node *block = get_nodes_block(node);
5085 ir_node *new_block = be_transform_node(block);
5086 ir_node *imm, *cmp, *new_node;
5088 ia32_address_mode_t am;
5089 ia32_address_t *addr = &am.addr;
5093 match_arguments(&am, block, NULL, param, NULL, match_am);
5094 imm = ia32_create_Immediate(NULL, 0, 0);
5095 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5096 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5097 set_am_attributes(cmp, &am);
5098 set_ia32_ls_mode(cmp, mode_Iu);
5100 SET_IA32_ORIG_NODE(cmp, node);
5102 cmp = fix_mem_proj(cmp, &am);
5105 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5106 SET_IA32_ORIG_NODE(new_node, node);
5109 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5110 nomem, new_node, mode_Bu);
5111 SET_IA32_ORIG_NODE(new_node, node);
5116 * Transform builtin popcount
5118 static ir_node *gen_popcount(ir_node *node)
5120 ir_node *param = get_Builtin_param(node, 0);
5121 dbg_info *dbgi = get_irn_dbg_info(node);
5123 ir_node *block = get_nodes_block(node);
5124 ir_node *new_block = be_transform_node(block);
5127 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5129 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5130 if (ia32_cg_config.use_popcnt) {
5131 ia32_address_mode_t am;
5132 ia32_address_t *addr = &am.addr;
5135 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5137 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5138 set_am_attributes(cnt, &am);
5139 set_ia32_ls_mode(cnt, get_irn_mode(param));
5141 SET_IA32_ORIG_NODE(cnt, node);
5142 return fix_mem_proj(cnt, &am);
5145 new_param = be_transform_node(param);
5147 /* do the standard popcount algo */
5149 /* m1 = x & 0x55555555 */
5150 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5151 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5154 simm = ia32_create_Immediate(NULL, 0, 1);
5155 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5157 /* m2 = s1 & 0x55555555 */
5158 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5161 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5163 /* m4 = m3 & 0x33333333 */
5164 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5165 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5168 simm = ia32_create_Immediate(NULL, 0, 2);
5169 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5171 /* m5 = s2 & 0x33333333 */
5172 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5175 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5177 /* m7 = m6 & 0x0F0F0F0F */
5178 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5179 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5182 simm = ia32_create_Immediate(NULL, 0, 4);
5183 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5185 /* m8 = s3 & 0x0F0F0F0F */
5186 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5189 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5191 /* m10 = m9 & 0x00FF00FF */
5192 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5193 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5196 simm = ia32_create_Immediate(NULL, 0, 8);
5197 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5199 /* m11 = s4 & 0x00FF00FF */
5200 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5202 /* m12 = m10 + m11 */
5203 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5205 /* m13 = m12 & 0x0000FFFF */
5206 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5207 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5209 /* s5 = m12 >> 16 */
5210 simm = ia32_create_Immediate(NULL, 0, 16);
5211 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5213 /* res = m13 + s5 */
5214 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5218 * Transform builtin byte swap.
5220 static ir_node *gen_bswap(ir_node *node)
5222 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5223 dbg_info *dbgi = get_irn_dbg_info(node);
5225 ir_node *block = get_nodes_block(node);
5226 ir_node *new_block = be_transform_node(block);
5227 ir_mode *mode = get_irn_mode(param);
5228 unsigned size = get_mode_size_bits(mode);
5229 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5233 if (ia32_cg_config.use_i486) {
5234 /* swap available */
5235 return new_bd_ia32_Bswap(dbgi, new_block, param);
5237 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5238 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5240 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5241 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5243 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5245 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5246 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5248 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5249 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5252 /* swap16 always available */
5253 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5256 panic("Invalid bswap size (%d)", size);
5261 * Transform builtin outport.
5263 static ir_node *gen_outport(ir_node *node)
5265 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5266 ir_node *oldv = get_Builtin_param(node, 1);
5267 ir_mode *mode = get_irn_mode(oldv);
5268 ir_node *value = be_transform_node(oldv);
5269 ir_node *block = be_transform_node(get_nodes_block(node));
5270 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5271 dbg_info *dbgi = get_irn_dbg_info(node);
5273 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5274 set_ia32_ls_mode(res, mode);
5279 * Transform builtin inport.
5281 static ir_node *gen_inport(ir_node *node)
5283 ir_type *tp = get_Builtin_type(node);
5284 ir_type *rstp = get_method_res_type(tp, 0);
5285 ir_mode *mode = get_type_mode(rstp);
5286 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5287 ir_node *block = be_transform_node(get_nodes_block(node));
5288 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5289 dbg_info *dbgi = get_irn_dbg_info(node);
5291 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5292 set_ia32_ls_mode(res, mode);
5294 /* check for missing Result Proj */
5299 * Transform a builtin inner trampoline
5301 static ir_node *gen_inner_trampoline(ir_node *node)
5303 ir_node *ptr = get_Builtin_param(node, 0);
5304 ir_node *callee = get_Builtin_param(node, 1);
5305 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5306 ir_node *mem = get_Builtin_mem(node);
5307 ir_node *block = get_nodes_block(node);
5308 ir_node *new_block = be_transform_node(block);
5312 ir_node *trampoline;
5314 dbg_info *dbgi = get_irn_dbg_info(node);
5315 ia32_address_t addr;
5317 /* construct store address */
5318 memset(&addr, 0, sizeof(addr));
5319 ia32_create_address_mode(&addr, ptr, 0);
5321 if (addr.base == NULL) {
5322 addr.base = noreg_GP;
5324 addr.base = be_transform_node(addr.base);
5327 if (addr.index == NULL) {
5328 addr.index = noreg_GP;
5330 addr.index = be_transform_node(addr.index);
5332 addr.mem = be_transform_node(mem);
5334 /* mov ecx, <env> */
5335 val = ia32_create_Immediate(NULL, 0, 0xB9);
5336 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5337 addr.index, addr.mem, val);
5338 set_irn_pinned(store, get_irn_pinned(node));
5339 set_ia32_op_type(store, ia32_AddrModeD);
5340 set_ia32_ls_mode(store, mode_Bu);
5341 set_address(store, &addr);
5345 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5346 addr.index, addr.mem, env);
5347 set_irn_pinned(store, get_irn_pinned(node));
5348 set_ia32_op_type(store, ia32_AddrModeD);
5349 set_ia32_ls_mode(store, mode_Iu);
5350 set_address(store, &addr);
5354 /* jmp rel <callee> */
5355 val = ia32_create_Immediate(NULL, 0, 0xE9);
5356 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5357 addr.index, addr.mem, val);
5358 set_irn_pinned(store, get_irn_pinned(node));
5359 set_ia32_op_type(store, ia32_AddrModeD);
5360 set_ia32_ls_mode(store, mode_Bu);
5361 set_address(store, &addr);
5365 trampoline = be_transform_node(ptr);
5367 /* the callee is typically an immediate */
5368 if (is_SymConst(callee)) {
5369 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5371 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5373 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5375 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5376 addr.index, addr.mem, rel);
5377 set_irn_pinned(store, get_irn_pinned(node));
5378 set_ia32_op_type(store, ia32_AddrModeD);
5379 set_ia32_ls_mode(store, mode_Iu);
5380 set_address(store, &addr);
5385 return new_r_Tuple(new_block, 2, in);
5389 * Transform Builtin node.
5391 static ir_node *gen_Builtin(ir_node *node)
5393 ir_builtin_kind kind = get_Builtin_kind(node);
5397 return gen_trap(node);
5398 case ir_bk_debugbreak:
5399 return gen_debugbreak(node);
5400 case ir_bk_return_address:
5401 return gen_return_address(node);
5402 case ir_bk_frame_address:
5403 return gen_frame_address(node);
5404 case ir_bk_prefetch:
5405 return gen_prefetch(node);
5407 return gen_ffs(node);
5409 return gen_clz(node);
5411 return gen_ctz(node);
5413 return gen_parity(node);
5414 case ir_bk_popcount:
5415 return gen_popcount(node);
5417 return gen_bswap(node);
5419 return gen_outport(node);
5421 return gen_inport(node);
5422 case ir_bk_inner_trampoline:
5423 return gen_inner_trampoline(node);
5425 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5429 * Transform Proj(Builtin) node.
5431 static ir_node *gen_Proj_Builtin(ir_node *proj)
5433 ir_node *node = get_Proj_pred(proj);
5434 ir_node *new_node = be_transform_node(node);
5435 ir_builtin_kind kind = get_Builtin_kind(node);
5438 case ir_bk_return_address:
5439 case ir_bk_frame_address:
5444 case ir_bk_popcount:
5446 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5449 case ir_bk_debugbreak:
5450 case ir_bk_prefetch:
5452 assert(get_Proj_proj(proj) == pn_Builtin_M);
5455 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5456 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5458 assert(get_Proj_proj(proj) == pn_Builtin_M);
5459 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5461 case ir_bk_inner_trampoline:
5462 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5463 return get_Tuple_pred(new_node, 1);
5465 assert(get_Proj_proj(proj) == pn_Builtin_M);
5466 return get_Tuple_pred(new_node, 0);
5469 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5472 static ir_node *gen_be_IncSP(ir_node *node)
5474 ir_node *res = be_duplicate_node(node);
5475 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5481 * Transform the Projs from a be_Call.
5483 static ir_node *gen_Proj_be_Call(ir_node *node)
5485 ir_node *call = get_Proj_pred(node);
5486 ir_node *new_call = be_transform_node(call);
5487 dbg_info *dbgi = get_irn_dbg_info(node);
5488 long proj = get_Proj_proj(node);
5489 ir_mode *mode = get_irn_mode(node);
5492 if (proj == pn_be_Call_M_regular) {
5493 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5495 /* transform call modes */
5496 if (mode_is_data(mode)) {
5497 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5501 /* Map from be_Call to ia32_Call proj number */
5502 if (proj == pn_be_Call_sp) {
5503 proj = pn_ia32_Call_stack;
5504 } else if (proj == pn_be_Call_M_regular) {
5505 proj = pn_ia32_Call_M;
5507 arch_register_req_t const *const req = arch_get_register_req_out(node);
5508 int const n_outs = arch_irn_get_n_outs(new_call);
5511 assert(proj >= pn_be_Call_first_res);
5512 assert(req->type & arch_register_req_type_limited);
5514 for (i = 0; i < n_outs; ++i) {
5515 arch_register_req_t const *const new_req
5516 = arch_get_out_register_req(new_call, i);
5518 if (!(new_req->type & arch_register_req_type_limited) ||
5519 new_req->cls != req->cls ||
5520 *new_req->limited != *req->limited)
5529 res = new_rd_Proj(dbgi, new_call, mode, proj);
5531 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5533 case pn_ia32_Call_stack:
5534 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5537 case pn_ia32_Call_fpcw:
5538 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5546 * Transform the Projs from a Cmp.
5548 static ir_node *gen_Proj_Cmp(ir_node *node)
5550 /* this probably means not all mode_b nodes were lowered... */
5551 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5556 * Transform the Projs from a Bound.
5558 static ir_node *gen_Proj_Bound(ir_node *node)
5560 ir_node *new_node, *block;
5561 ir_node *pred = get_Proj_pred(node);
5563 switch (get_Proj_proj(node)) {
5565 return be_transform_node(get_Bound_mem(pred));
5566 case pn_Bound_X_regular:
5567 new_node = be_transform_node(pred);
5568 block = get_nodes_block(new_node);
5569 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5570 case pn_Bound_X_except:
5571 new_node = be_transform_node(pred);
5572 block = get_nodes_block(new_node);
5573 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5575 return be_transform_node(get_Bound_index(pred));
5577 panic("unsupported Proj from Bound");
5581 static ir_node *gen_Proj_ASM(ir_node *node)
5583 ir_mode *mode = get_irn_mode(node);
5584 ir_node *pred = get_Proj_pred(node);
5585 ir_node *new_pred = be_transform_node(pred);
5586 long pos = get_Proj_proj(node);
5588 if (mode == mode_M) {
5589 pos = arch_irn_get_n_outs(new_pred)-1;
5590 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5592 } else if (mode_is_float(mode)) {
5595 panic("unexpected proj mode at ASM");
5598 return new_r_Proj(new_pred, mode, pos);
5602 * Transform and potentially renumber Proj nodes.
5604 static ir_node *gen_Proj(ir_node *node)
5606 ir_node *pred = get_Proj_pred(node);
5609 switch (get_irn_opcode(pred)) {
5611 proj = get_Proj_proj(node);
5612 if (proj == pn_Store_M) {
5613 return be_transform_node(pred);
5615 panic("No idea how to transform proj->Store");
5618 return gen_Proj_Load(node);
5620 return gen_Proj_ASM(node);
5622 return gen_Proj_Builtin(node);
5626 return gen_Proj_DivMod(node);
5628 return gen_Proj_CopyB(node);
5630 return gen_Proj_Quot(node);
5632 return gen_Proj_be_SubSP(node);
5634 return gen_Proj_be_AddSP(node);
5636 return gen_Proj_be_Call(node);
5638 return gen_Proj_Cmp(node);
5640 return gen_Proj_Bound(node);
5642 proj = get_Proj_proj(node);
5644 case pn_Start_X_initial_exec: {
5645 ir_node *block = get_nodes_block(pred);
5646 ir_node *new_block = be_transform_node(block);
5647 dbg_info *dbgi = get_irn_dbg_info(node);
5648 /* we exchange the ProjX with a jump */
5649 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5654 case pn_Start_P_tls:
5655 return gen_Proj_tls(node);
5660 if (is_ia32_l_FloattoLL(pred)) {
5661 return gen_Proj_l_FloattoLL(node);
5663 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5667 ir_mode *mode = get_irn_mode(node);
5668 if (ia32_mode_needs_gp_reg(mode)) {
5669 ir_node *new_pred = be_transform_node(pred);
5670 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5671 get_Proj_proj(node));
5672 new_proj->node_nr = node->node_nr;
5677 return be_duplicate_node(node);
5681 * Enters all transform functions into the generic pointer
5683 static void register_transformers(void)
5685 /* first clear the generic function pointer for all ops */
5686 clear_irp_opcodes_generic_func();
5688 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5689 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5729 /* transform ops from intrinsic lowering */
5741 GEN(ia32_l_LLtoFloat);
5742 GEN(ia32_l_FloattoLL);
5748 /* we should never see these nodes */
5763 /* handle builtins */
5766 /* handle generic backend nodes */
5780 * Pre-transform all unknown and noreg nodes.
5782 static void ia32_pretransform_node(void)
5784 ia32_code_gen_t *cg = env_cg;
5786 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5787 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5788 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5789 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5790 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5791 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5793 nomem = get_irg_no_mem(current_ir_graph);
5794 noreg_GP = ia32_new_NoReg_gp(cg);
5800 * Walker, checks if all ia32 nodes producing more than one result have their
5801 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5803 static void add_missing_keep_walker(ir_node *node, void *data)
5806 unsigned found_projs = 0;
5807 const ir_edge_t *edge;
5808 ir_mode *mode = get_irn_mode(node);
5813 if (!is_ia32_irn(node))
5816 n_outs = arch_irn_get_n_outs(node);
5819 if (is_ia32_SwitchJmp(node))
5822 assert(n_outs < (int) sizeof(unsigned) * 8);
5823 foreach_out_edge(node, edge) {
5824 ir_node *proj = get_edge_src_irn(edge);
5827 /* The node could be kept */
5831 if (get_irn_mode(proj) == mode_M)
5834 pn = get_Proj_proj(proj);
5835 assert(pn < n_outs);
5836 found_projs |= 1 << pn;
5840 /* are keeps missing? */
5842 for (i = 0; i < n_outs; ++i) {
5845 const arch_register_req_t *req;
5846 const arch_register_class_t *cls;
5848 if (found_projs & (1 << i)) {
5852 req = arch_get_out_register_req(node, i);
5857 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5861 block = get_nodes_block(node);
5862 in[0] = new_r_Proj(node, arch_register_class_mode(cls), i);
5863 if (last_keep != NULL) {
5864 be_Keep_add_node(last_keep, cls, in[0]);
5866 last_keep = be_new_Keep(block, 1, in);
5867 if (sched_is_scheduled(node)) {
5868 sched_add_after(node, last_keep);
5875 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5878 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5880 ir_graph *irg = be_get_birg_irg(cg->birg);
5881 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5885 * Post-process all calls if we are in SSE mode.
5886 * The ABI requires that the results are in st0, copy them
5887 * to a xmm register.
5889 static void postprocess_fp_call_results(void)
5893 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5894 ir_node *call = call_list[i];
5895 ir_type *mtp = call_types[i];
5898 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5899 ir_type *res_tp = get_method_res_type(mtp, j);
5900 ir_node *res, *new_res;
5901 const ir_edge_t *edge, *next;
5904 if (! is_atomic_type(res_tp)) {
5905 /* no floating point return */
5908 mode = get_type_mode(res_tp);
5909 if (! mode_is_float(mode)) {
5910 /* no floating point return */
5914 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5917 /* now patch the users */
5918 foreach_out_edge_safe(res, edge, next) {
5919 ir_node *succ = get_edge_src_irn(edge);
5922 if (be_is_Keep(succ))
5925 if (is_ia32_xStore(succ)) {
5926 /* an xStore can be patched into an vfst */
5927 dbg_info *db = get_irn_dbg_info(succ);
5928 ir_node *block = get_nodes_block(succ);
5929 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5930 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5931 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5932 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5933 ir_mode *mode = get_ia32_ls_mode(succ);
5935 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5936 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5937 if (is_ia32_use_frame(succ))
5938 set_ia32_use_frame(st);
5939 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5940 set_irn_pinned(st, get_irn_pinned(succ));
5941 set_ia32_op_type(st, ia32_AddrModeD);
5945 if (new_res == NULL) {
5946 dbg_info *db = get_irn_dbg_info(call);
5947 ir_node *block = get_nodes_block(call);
5948 ir_node *frame = get_irg_frame(current_ir_graph);
5949 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5950 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5951 ir_node *vfst, *xld, *new_mem;
5953 /* store st(0) on stack */
5954 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5955 set_ia32_op_type(vfst, ia32_AddrModeD);
5956 set_ia32_use_frame(vfst);
5958 /* load into SSE register */
5959 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5960 set_ia32_op_type(xld, ia32_AddrModeS);
5961 set_ia32_use_frame(xld);
5963 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5964 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5966 if (old_mem != NULL) {
5967 edges_reroute(old_mem, new_mem, current_ir_graph);
5971 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5978 /* do the transformation */
5979 void ia32_transform_graph(ia32_code_gen_t *cg)
5983 register_transformers();
5985 initial_fpcw = NULL;
5988 be_timer_push(T_HEIGHTS);
5989 heights = heights_new(cg->irg);
5990 be_timer_pop(T_HEIGHTS);
5991 ia32_calculate_non_address_mode_nodes(cg->birg);
5993 /* the transform phase is not safe for CSE (yet) because several nodes get
5994 * attributes set after their creation */
5995 cse_last = get_opt_cse();
5998 call_list = NEW_ARR_F(ir_node *, 0);
5999 call_types = NEW_ARR_F(ir_type *, 0);
6000 be_transform_graph(cg->birg, ia32_pretransform_node);
6002 if (ia32_cg_config.use_sse2)
6003 postprocess_fp_call_results();
6004 DEL_ARR_F(call_types);
6005 DEL_ARR_F(call_list);
6007 set_opt_cse(cse_last);
6009 ia32_free_non_address_mode_nodes();
6010 heights_free(heights);
6014 void ia32_init_transform(void)
6016 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");