2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * return NoREG or pic_base in case of PIC.
200 * This is necessary as base address for newly created symbols
202 static ir_node *get_symconst_base(void)
204 if (env_cg->birg->main_env->options->pic) {
205 return arch_code_generator_get_pic_base(env_cg);
212 * Transforms a Const.
214 static ir_node *gen_Const(ir_node *node)
216 ir_node *old_block = get_nodes_block(node);
217 ir_node *block = be_transform_node(old_block);
218 dbg_info *dbgi = get_irn_dbg_info(node);
219 ir_mode *mode = get_irn_mode(node);
221 assert(is_Const(node));
223 if (mode_is_float(mode)) {
229 if (ia32_cg_config.use_sse2) {
230 tarval *tv = get_Const_tarval(node);
231 if (tarval_is_null(tv)) {
232 load = new_bd_ia32_xZero(dbgi, block);
233 set_ia32_ls_mode(load, mode);
235 #ifdef CONSTRUCT_SSE_CONST
236 } else if (tarval_is_one(tv)) {
237 int cnst = mode == mode_F ? 26 : 55;
238 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
239 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
240 ir_node *pslld, *psrld;
242 load = new_bd_ia32_xAllOnes(dbgi, block);
243 set_ia32_ls_mode(load, mode);
244 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
245 set_ia32_ls_mode(pslld, mode);
246 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
247 set_ia32_ls_mode(psrld, mode);
249 #endif /* CONSTRUCT_SSE_CONST */
250 } else if (mode == mode_F) {
251 /* we can place any 32bit constant by using a movd gp, sse */
252 unsigned val = get_tarval_sub_bits(tv, 0) |
253 (get_tarval_sub_bits(tv, 1) << 8) |
254 (get_tarval_sub_bits(tv, 2) << 16) |
255 (get_tarval_sub_bits(tv, 3) << 24);
256 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
257 load = new_bd_ia32_xMovd(dbgi, block, cnst);
258 set_ia32_ls_mode(load, mode);
261 #ifdef CONSTRUCT_SSE_CONST
262 if (mode == mode_D) {
263 unsigned val = get_tarval_sub_bits(tv, 0) |
264 (get_tarval_sub_bits(tv, 1) << 8) |
265 (get_tarval_sub_bits(tv, 2) << 16) |
266 (get_tarval_sub_bits(tv, 3) << 24);
268 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
269 ir_node *cnst, *psllq;
271 /* fine, lower 32bit are zero, produce 32bit value */
272 val = get_tarval_sub_bits(tv, 4) |
273 (get_tarval_sub_bits(tv, 5) << 8) |
274 (get_tarval_sub_bits(tv, 6) << 16) |
275 (get_tarval_sub_bits(tv, 7) << 24);
276 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
277 load = new_bd_ia32_xMovd(dbgi, block, cnst);
278 set_ia32_ls_mode(load, mode);
279 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
280 set_ia32_ls_mode(psllq, mode);
285 #endif /* CONSTRUCT_SSE_CONST */
286 floatent = create_float_const_entity(node);
288 base = get_symconst_base();
289 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
291 set_ia32_op_type(load, ia32_AddrModeS);
292 set_ia32_am_sc(load, floatent);
293 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
294 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
297 if (is_Const_null(node)) {
298 load = new_bd_ia32_vfldz(dbgi, block);
300 set_ia32_ls_mode(load, mode);
301 } else if (is_Const_one(node)) {
302 load = new_bd_ia32_vfld1(dbgi, block);
304 set_ia32_ls_mode(load, mode);
309 floatent = create_float_const_entity(node);
310 /* create_float_const_ent is smart and sometimes creates
312 ls_mode = get_type_mode(get_entity_type(floatent));
313 base = get_symconst_base();
314 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
316 set_ia32_op_type(load, ia32_AddrModeS);
317 set_ia32_am_sc(load, floatent);
318 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
319 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
322 #ifdef CONSTRUCT_SSE_CONST
324 #endif /* CONSTRUCT_SSE_CONST */
325 SET_IA32_ORIG_NODE(load, node);
327 be_dep_on_frame(load);
329 } else { /* non-float mode */
331 tarval *tv = get_Const_tarval(node);
334 tv = tarval_convert_to(tv, mode_Iu);
336 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
338 panic("couldn't convert constant tarval (%+F)", node);
340 val = get_tarval_long(tv);
342 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
343 SET_IA32_ORIG_NODE(cnst, node);
345 be_dep_on_frame(cnst);
351 * Transforms a SymConst.
353 static ir_node *gen_SymConst(ir_node *node)
355 ir_node *old_block = get_nodes_block(node);
356 ir_node *block = be_transform_node(old_block);
357 dbg_info *dbgi = get_irn_dbg_info(node);
358 ir_mode *mode = get_irn_mode(node);
361 if (mode_is_float(mode)) {
362 if (ia32_cg_config.use_sse2)
363 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
366 set_ia32_am_sc(cnst, get_SymConst_entity(node));
367 set_ia32_use_frame(cnst);
371 if (get_SymConst_kind(node) != symconst_addr_ent) {
372 panic("backend only support symconst_addr_ent (at %+F)", node);
374 entity = get_SymConst_entity(node);
375 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
378 SET_IA32_ORIG_NODE(cnst, node);
380 be_dep_on_frame(cnst);
385 * Create a float type for the given mode and cache it.
387 * @param mode the mode for the float type (might be integer mode for SSE2 types)
388 * @param align alignment
390 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
396 if (mode == mode_Iu) {
397 static ir_type *int_Iu[16] = {NULL, };
399 if (int_Iu[align] == NULL) {
400 int_Iu[align] = tp = new_type_primitive(mode);
401 /* set the specified alignment */
402 set_type_alignment_bytes(tp, align);
404 return int_Iu[align];
405 } else if (mode == mode_Lu) {
406 static ir_type *int_Lu[16] = {NULL, };
408 if (int_Lu[align] == NULL) {
409 int_Lu[align] = tp = new_type_primitive(mode);
410 /* set the specified alignment */
411 set_type_alignment_bytes(tp, align);
413 return int_Lu[align];
414 } else if (mode == mode_F) {
415 static ir_type *float_F[16] = {NULL, };
417 if (float_F[align] == NULL) {
418 float_F[align] = tp = new_type_primitive(mode);
419 /* set the specified alignment */
420 set_type_alignment_bytes(tp, align);
422 return float_F[align];
423 } else if (mode == mode_D) {
424 static ir_type *float_D[16] = {NULL, };
426 if (float_D[align] == NULL) {
427 float_D[align] = tp = new_type_primitive(mode);
428 /* set the specified alignment */
429 set_type_alignment_bytes(tp, align);
431 return float_D[align];
433 static ir_type *float_E[16] = {NULL, };
435 if (float_E[align] == NULL) {
436 float_E[align] = tp = new_type_primitive(mode);
437 /* set the specified alignment */
438 set_type_alignment_bytes(tp, align);
440 return float_E[align];
445 * Create a float[2] array type for the given atomic type.
447 * @param tp the atomic type
449 static ir_type *ia32_create_float_array(ir_type *tp)
451 ir_mode *mode = get_type_mode(tp);
452 unsigned align = get_type_alignment_bytes(tp);
457 if (mode == mode_F) {
458 static ir_type *float_F[16] = {NULL, };
460 if (float_F[align] != NULL)
461 return float_F[align];
462 arr = float_F[align] = new_type_array(1, tp);
463 } else if (mode == mode_D) {
464 static ir_type *float_D[16] = {NULL, };
466 if (float_D[align] != NULL)
467 return float_D[align];
468 arr = float_D[align] = new_type_array(1, tp);
470 static ir_type *float_E[16] = {NULL, };
472 if (float_E[align] != NULL)
473 return float_E[align];
474 arr = float_E[align] = new_type_array(1, tp);
476 set_type_alignment_bytes(arr, align);
477 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
478 set_type_state(arr, layout_fixed);
482 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
483 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
485 static const struct {
486 const char *ent_name;
487 const char *cnst_str;
490 } names [ia32_known_const_max] = {
491 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
492 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
493 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
494 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
495 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
497 static ir_entity *ent_cache[ia32_known_const_max];
499 const char *ent_name, *cnst_str;
505 ent_name = names[kct].ent_name;
506 if (! ent_cache[kct]) {
507 cnst_str = names[kct].cnst_str;
509 switch (names[kct].mode) {
510 case 0: mode = mode_Iu; break;
511 case 1: mode = mode_Lu; break;
512 default: mode = mode_F; break;
514 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
515 tp = ia32_create_float_type(mode, names[kct].align);
517 if (kct == ia32_ULLBIAS)
518 tp = ia32_create_float_array(tp);
519 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
521 set_entity_ld_ident(ent, get_entity_ident(ent));
522 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
523 set_entity_visibility(ent, ir_visibility_local);
525 if (kct == ia32_ULLBIAS) {
526 ir_initializer_t *initializer = create_initializer_compound(2);
528 set_initializer_compound_value(initializer, 0,
529 create_initializer_tarval(get_mode_null(mode)));
530 set_initializer_compound_value(initializer, 1,
531 create_initializer_tarval(tv));
533 set_entity_initializer(ent, initializer);
535 set_entity_initializer(ent, create_initializer_tarval(tv));
538 /* cache the entry */
539 ent_cache[kct] = ent;
542 return ent_cache[kct];
546 * return true if the node is a Proj(Load) and could be used in source address
547 * mode for another node. Will return only true if the @p other node is not
548 * dependent on the memory of the Load (for binary operations use the other
549 * input here, for unary operations use NULL).
551 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
552 ir_node *other, ir_node *other2, match_flags_t flags)
557 /* float constants are always available */
558 if (is_Const(node)) {
559 ir_mode *mode = get_irn_mode(node);
560 if (mode_is_float(mode)) {
561 if (ia32_cg_config.use_sse2) {
562 if (is_simple_sse_Const(node))
565 if (is_simple_x87_Const(node))
568 if (get_irn_n_edges(node) > 1)
576 load = get_Proj_pred(node);
577 pn = get_Proj_proj(node);
578 if (!is_Load(load) || pn != pn_Load_res)
580 if (get_nodes_block(load) != block)
582 /* we only use address mode if we're the only user of the load */
583 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
585 /* in some edge cases with address mode we might reach the load normally
586 * and through some AM sequence, if it is already materialized then we
587 * can't create an AM node from it */
588 if (be_is_transformed(node))
591 /* don't do AM if other node inputs depend on the load (via mem-proj) */
592 if (other != NULL && prevents_AM(block, load, other))
595 if (other2 != NULL && prevents_AM(block, load, other2))
601 typedef struct ia32_address_mode_t ia32_address_mode_t;
602 struct ia32_address_mode_t {
607 ia32_op_type_t op_type;
611 unsigned commutative : 1;
612 unsigned ins_permuted : 1;
615 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
617 /* construct load address */
618 memset(addr, 0, sizeof(addr[0]));
619 ia32_create_address_mode(addr, ptr, 0);
621 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
622 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
623 addr->mem = be_transform_node(mem);
626 static void build_address(ia32_address_mode_t *am, ir_node *node,
627 ia32_create_am_flags_t flags)
629 ia32_address_t *addr = &am->addr;
635 /* floating point immediates */
636 if (is_Const(node)) {
637 ir_entity *entity = create_float_const_entity(node);
638 addr->base = get_symconst_base();
639 addr->index = noreg_GP;
641 addr->symconst_ent = entity;
643 am->ls_mode = get_type_mode(get_entity_type(entity));
644 am->pinned = op_pin_state_floats;
648 load = get_Proj_pred(node);
649 ptr = get_Load_ptr(load);
650 mem = get_Load_mem(load);
651 new_mem = be_transform_node(mem);
652 am->pinned = get_irn_pinned(load);
653 am->ls_mode = get_Load_mode(load);
654 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
657 /* construct load address */
658 ia32_create_address_mode(addr, ptr, flags);
660 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
661 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
665 static void set_address(ir_node *node, const ia32_address_t *addr)
667 set_ia32_am_scale(node, addr->scale);
668 set_ia32_am_sc(node, addr->symconst_ent);
669 set_ia32_am_offs_int(node, addr->offset);
670 if (addr->symconst_sign)
671 set_ia32_am_sc_sign(node);
673 set_ia32_use_frame(node);
674 set_ia32_frame_ent(node, addr->frame_entity);
678 * Apply attributes of a given address mode to a node.
680 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
682 set_address(node, &am->addr);
684 set_ia32_op_type(node, am->op_type);
685 set_ia32_ls_mode(node, am->ls_mode);
686 if (am->pinned == op_pin_state_pinned) {
687 /* beware: some nodes are already pinned and did not allow to change the state */
688 if (get_irn_pinned(node) != op_pin_state_pinned)
689 set_irn_pinned(node, op_pin_state_pinned);
692 set_ia32_commutative(node);
696 * Check, if a given node is a Down-Conv, ie. a integer Conv
697 * from a mode with a mode with more bits to a mode with lesser bits.
698 * Moreover, we return only true if the node has not more than 1 user.
700 * @param node the node
701 * @return non-zero if node is a Down-Conv
703 static int is_downconv(const ir_node *node)
711 /* we only want to skip the conv when we're the only user
712 * (because this test is used in the context of address-mode selection
713 * and we don't want to use address mode for multiple users) */
714 if (get_irn_n_edges(node) > 1)
717 src_mode = get_irn_mode(get_Conv_op(node));
718 dest_mode = get_irn_mode(node);
720 ia32_mode_needs_gp_reg(src_mode) &&
721 ia32_mode_needs_gp_reg(dest_mode) &&
722 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
725 /** Skip all Down-Conv's on a given node and return the resulting node. */
726 ir_node *ia32_skip_downconv(ir_node *node)
728 while (is_downconv(node))
729 node = get_Conv_op(node);
734 static bool is_sameconv(ir_node *node)
742 /* we only want to skip the conv when we're the only user
743 * (because this test is used in the context of address-mode selection
744 * and we don't want to use address mode for multiple users) */
745 if (get_irn_n_edges(node) > 1)
748 src_mode = get_irn_mode(get_Conv_op(node));
749 dest_mode = get_irn_mode(node);
751 ia32_mode_needs_gp_reg(src_mode) &&
752 ia32_mode_needs_gp_reg(dest_mode) &&
753 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
756 /** Skip all signedness convs */
757 static ir_node *ia32_skip_sameconv(ir_node *node)
759 while (is_sameconv(node))
760 node = get_Conv_op(node);
765 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
767 ir_mode *mode = get_irn_mode(node);
772 if (mode_is_signed(mode)) {
777 block = get_nodes_block(node);
778 dbgi = get_irn_dbg_info(node);
780 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
784 * matches operands of a node into ia32 addressing/operand modes. This covers
785 * usage of source address mode, immediates, operations with non 32-bit modes,
787 * The resulting data is filled into the @p am struct. block is the block
788 * of the node whose arguments are matched. op1, op2 are the first and second
789 * input that are matched (op1 may be NULL). other_op is another unrelated
790 * input that is not matched! but which is needed sometimes to check if AM
791 * for op1/op2 is legal.
792 * @p flags describes the supported modes of the operation in detail.
794 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
795 ir_node *op1, ir_node *op2, ir_node *other_op,
798 ia32_address_t *addr = &am->addr;
799 ir_mode *mode = get_irn_mode(op2);
800 int mode_bits = get_mode_size_bits(mode);
801 ir_node *new_op1, *new_op2;
803 unsigned commutative;
804 int use_am_and_immediates;
807 memset(am, 0, sizeof(am[0]));
809 commutative = (flags & match_commutative) != 0;
810 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
811 use_am = (flags & match_am) != 0;
812 use_immediate = (flags & match_immediate) != 0;
813 assert(!use_am_and_immediates || use_immediate);
816 assert(!commutative || op1 != NULL);
817 assert(use_am || !(flags & match_8bit_am));
818 assert(use_am || !(flags & match_16bit_am));
820 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
821 (mode_bits == 16 && !(flags & match_16bit_am))) {
825 /* we can simply skip downconvs for mode neutral nodes: the upper bits
826 * can be random for these operations */
827 if (flags & match_mode_neutral) {
828 op2 = ia32_skip_downconv(op2);
830 op1 = ia32_skip_downconv(op1);
833 op2 = ia32_skip_sameconv(op2);
835 op1 = ia32_skip_sameconv(op1);
839 /* match immediates. firm nodes are normalized: constants are always on the
842 if (!(flags & match_try_am) && use_immediate) {
843 new_op2 = try_create_Immediate(op2, 0);
846 if (new_op2 == NULL &&
847 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
848 build_address(am, op2, 0);
849 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
850 if (mode_is_float(mode)) {
851 new_op2 = ia32_new_NoReg_vfp(env_cg);
855 am->op_type = ia32_AddrModeS;
856 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
858 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
860 build_address(am, op1, 0);
862 if (mode_is_float(mode)) {
863 noreg = ia32_new_NoReg_vfp(env_cg);
868 if (new_op2 != NULL) {
871 new_op1 = be_transform_node(op2);
873 am->ins_permuted = 1;
875 am->op_type = ia32_AddrModeS;
878 am->op_type = ia32_Normal;
880 if (flags & match_try_am) {
886 mode = get_irn_mode(op2);
887 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
888 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
890 new_op2 = create_upconv(op2, NULL);
891 am->ls_mode = mode_Iu;
893 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
895 new_op2 = be_transform_node(op2);
896 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
899 if (addr->base == NULL)
900 addr->base = noreg_GP;
901 if (addr->index == NULL)
902 addr->index = noreg_GP;
903 if (addr->mem == NULL)
906 am->new_op1 = new_op1;
907 am->new_op2 = new_op2;
908 am->commutative = commutative;
912 * "Fixes" a node that uses address mode by turning it into mode_T
913 * and returning a pn_ia32_res Proj.
915 * @param node the node
916 * @param am its address mode
918 * @return a Proj(pn_ia32_res) if a memory address mode is used,
921 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
926 if (am->mem_proj == NULL)
929 /* we have to create a mode_T so the old MemProj can attach to us */
930 mode = get_irn_mode(node);
931 load = get_Proj_pred(am->mem_proj);
933 be_set_transformed_node(load, node);
935 if (mode != mode_T) {
936 set_irn_mode(node, mode_T);
937 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
944 * Construct a standard binary operation, set AM and immediate if required.
946 * @param node The original node for which the binop is created
947 * @param op1 The first operand
948 * @param op2 The second operand
949 * @param func The node constructor function
950 * @return The constructed ia32 node.
952 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
953 construct_binop_func *func, match_flags_t flags)
956 ir_node *block, *new_block, *new_node;
957 ia32_address_mode_t am;
958 ia32_address_t *addr = &am.addr;
960 block = get_nodes_block(node);
961 match_arguments(&am, block, op1, op2, NULL, flags);
963 dbgi = get_irn_dbg_info(node);
964 new_block = be_transform_node(block);
965 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
966 am.new_op1, am.new_op2);
967 set_am_attributes(new_node, &am);
968 /* we can't use source address mode anymore when using immediates */
969 if (!(flags & match_am_and_immediates) &&
970 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
971 set_ia32_am_support(new_node, ia32_am_none);
972 SET_IA32_ORIG_NODE(new_node, node);
974 new_node = fix_mem_proj(new_node, &am);
980 * Generic names for the inputs of an ia32 binary op.
983 n_ia32_l_binop_left, /**< ia32 left input */
984 n_ia32_l_binop_right, /**< ia32 right input */
985 n_ia32_l_binop_eflags /**< ia32 eflags input */
987 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
988 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
989 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
990 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
991 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
992 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
995 * Construct a binary operation which also consumes the eflags.
997 * @param node The node to transform
998 * @param func The node constructor function
999 * @param flags The match flags
1000 * @return The constructor ia32 node
1002 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1003 match_flags_t flags)
1005 ir_node *src_block = get_nodes_block(node);
1006 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1007 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1008 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1010 ir_node *block, *new_node, *new_eflags;
1011 ia32_address_mode_t am;
1012 ia32_address_t *addr = &am.addr;
1014 match_arguments(&am, src_block, op1, op2, eflags, flags);
1016 dbgi = get_irn_dbg_info(node);
1017 block = be_transform_node(src_block);
1018 new_eflags = be_transform_node(eflags);
1019 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1020 am.new_op1, am.new_op2, new_eflags);
1021 set_am_attributes(new_node, &am);
1022 /* we can't use source address mode anymore when using immediates */
1023 if (!(flags & match_am_and_immediates) &&
1024 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1025 set_ia32_am_support(new_node, ia32_am_none);
1026 SET_IA32_ORIG_NODE(new_node, node);
1028 new_node = fix_mem_proj(new_node, &am);
1033 static ir_node *get_fpcw(void)
1036 if (initial_fpcw != NULL)
1037 return initial_fpcw;
1039 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
1040 &ia32_fp_cw_regs[REG_FPCW]);
1041 initial_fpcw = be_transform_node(fpcw);
1043 return initial_fpcw;
1047 * Construct a standard binary operation, set AM and immediate if required.
1049 * @param op1 The first operand
1050 * @param op2 The second operand
1051 * @param func The node constructor function
1052 * @return The constructed ia32 node.
1054 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1055 construct_binop_float_func *func)
1057 ir_mode *mode = get_irn_mode(node);
1059 ir_node *block, *new_block, *new_node;
1060 ia32_address_mode_t am;
1061 ia32_address_t *addr = &am.addr;
1062 ia32_x87_attr_t *attr;
1063 /* All operations are considered commutative, because there are reverse
1065 match_flags_t flags = match_commutative;
1067 /* happens for div nodes... */
1069 mode = get_divop_resmod(node);
1071 /* cannot use address mode with long double on x87 */
1072 if (get_mode_size_bits(mode) <= 64)
1075 block = get_nodes_block(node);
1076 match_arguments(&am, block, op1, op2, NULL, flags);
1078 dbgi = get_irn_dbg_info(node);
1079 new_block = be_transform_node(block);
1080 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1081 am.new_op1, am.new_op2, get_fpcw());
1082 set_am_attributes(new_node, &am);
1084 attr = get_ia32_x87_attr(new_node);
1085 attr->attr.data.ins_permuted = am.ins_permuted;
1087 SET_IA32_ORIG_NODE(new_node, node);
1089 new_node = fix_mem_proj(new_node, &am);
1095 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1097 * @param op1 The first operand
1098 * @param op2 The second operand
1099 * @param func The node constructor function
1100 * @return The constructed ia32 node.
1102 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1103 construct_shift_func *func,
1104 match_flags_t flags)
1107 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1109 assert(! mode_is_float(get_irn_mode(node)));
1110 assert(flags & match_immediate);
1111 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1113 if (flags & match_mode_neutral) {
1114 op1 = ia32_skip_downconv(op1);
1115 new_op1 = be_transform_node(op1);
1116 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1117 new_op1 = create_upconv(op1, node);
1119 new_op1 = be_transform_node(op1);
1122 /* the shift amount can be any mode that is bigger than 5 bits, since all
1123 * other bits are ignored anyway */
1124 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1125 ir_node *const op = get_Conv_op(op2);
1126 if (mode_is_float(get_irn_mode(op)))
1129 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1131 new_op2 = create_immediate_or_transform(op2, 0);
1133 dbgi = get_irn_dbg_info(node);
1134 block = get_nodes_block(node);
1135 new_block = be_transform_node(block);
1136 new_node = func(dbgi, new_block, new_op1, new_op2);
1137 SET_IA32_ORIG_NODE(new_node, node);
1139 /* lowered shift instruction may have a dependency operand, handle it here */
1140 if (get_irn_arity(node) == 3) {
1141 /* we have a dependency */
1142 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1143 add_irn_dep(new_node, new_dep);
1151 * Construct a standard unary operation, set AM and immediate if required.
1153 * @param op The operand
1154 * @param func The node constructor function
1155 * @return The constructed ia32 node.
1157 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1158 match_flags_t flags)
1161 ir_node *block, *new_block, *new_op, *new_node;
1163 assert(flags == 0 || flags == match_mode_neutral);
1164 if (flags & match_mode_neutral) {
1165 op = ia32_skip_downconv(op);
1168 new_op = be_transform_node(op);
1169 dbgi = get_irn_dbg_info(node);
1170 block = get_nodes_block(node);
1171 new_block = be_transform_node(block);
1172 new_node = func(dbgi, new_block, new_op);
1174 SET_IA32_ORIG_NODE(new_node, node);
1179 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1180 ia32_address_t *addr)
1182 ir_node *base, *index, *res;
1188 base = be_transform_node(base);
1191 index = addr->index;
1192 if (index == NULL) {
1195 index = be_transform_node(index);
1198 res = new_bd_ia32_Lea(dbgi, block, base, index);
1199 set_address(res, addr);
1205 * Returns non-zero if a given address mode has a symbolic or
1206 * numerical offset != 0.
1208 static int am_has_immediates(const ia32_address_t *addr)
1210 return addr->offset != 0 || addr->symconst_ent != NULL
1211 || addr->frame_entity || addr->use_frame;
1215 * Creates an ia32 Add.
1217 * @return the created ia32 Add node
1219 static ir_node *gen_Add(ir_node *node)
1221 ir_mode *mode = get_irn_mode(node);
1222 ir_node *op1 = get_Add_left(node);
1223 ir_node *op2 = get_Add_right(node);
1225 ir_node *block, *new_block, *new_node, *add_immediate_op;
1226 ia32_address_t addr;
1227 ia32_address_mode_t am;
1229 if (mode_is_float(mode)) {
1230 if (ia32_cg_config.use_sse2)
1231 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1232 match_commutative | match_am);
1234 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1237 ia32_mark_non_am(node);
1239 op2 = ia32_skip_downconv(op2);
1240 op1 = ia32_skip_downconv(op1);
1244 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1245 * 1. Add with immediate -> Lea
1246 * 2. Add with possible source address mode -> Add
1247 * 3. Otherwise -> Lea
1249 memset(&addr, 0, sizeof(addr));
1250 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1251 add_immediate_op = NULL;
1253 dbgi = get_irn_dbg_info(node);
1254 block = get_nodes_block(node);
1255 new_block = be_transform_node(block);
1258 if (addr.base == NULL && addr.index == NULL) {
1259 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1260 addr.symconst_sign, 0, addr.offset);
1261 be_dep_on_frame(new_node);
1262 SET_IA32_ORIG_NODE(new_node, node);
1265 /* add with immediate? */
1266 if (addr.index == NULL) {
1267 add_immediate_op = addr.base;
1268 } else if (addr.base == NULL && addr.scale == 0) {
1269 add_immediate_op = addr.index;
1272 if (add_immediate_op != NULL) {
1273 if (!am_has_immediates(&addr)) {
1274 #ifdef DEBUG_libfirm
1275 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1278 return be_transform_node(add_immediate_op);
1281 new_node = create_lea_from_address(dbgi, new_block, &addr);
1282 SET_IA32_ORIG_NODE(new_node, node);
1286 /* test if we can use source address mode */
1287 match_arguments(&am, block, op1, op2, NULL, match_commutative
1288 | match_mode_neutral | match_am | match_immediate | match_try_am);
1290 /* construct an Add with source address mode */
1291 if (am.op_type == ia32_AddrModeS) {
1292 ia32_address_t *am_addr = &am.addr;
1293 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1294 am_addr->index, am_addr->mem, am.new_op1,
1296 set_am_attributes(new_node, &am);
1297 SET_IA32_ORIG_NODE(new_node, node);
1299 new_node = fix_mem_proj(new_node, &am);
1304 /* otherwise construct a lea */
1305 new_node = create_lea_from_address(dbgi, new_block, &addr);
1306 SET_IA32_ORIG_NODE(new_node, node);
1311 * Creates an ia32 Mul.
1313 * @return the created ia32 Mul node
1315 static ir_node *gen_Mul(ir_node *node)
1317 ir_node *op1 = get_Mul_left(node);
1318 ir_node *op2 = get_Mul_right(node);
1319 ir_mode *mode = get_irn_mode(node);
1321 if (mode_is_float(mode)) {
1322 if (ia32_cg_config.use_sse2)
1323 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1324 match_commutative | match_am);
1326 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1328 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1329 match_commutative | match_am | match_mode_neutral |
1330 match_immediate | match_am_and_immediates);
1334 * Creates an ia32 Mulh.
1335 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1336 * this result while Mul returns the lower 32 bit.
1338 * @return the created ia32 Mulh node
1340 static ir_node *gen_Mulh(ir_node *node)
1342 dbg_info *dbgi = get_irn_dbg_info(node);
1343 ir_node *op1 = get_Mulh_left(node);
1344 ir_node *op2 = get_Mulh_right(node);
1345 ir_mode *mode = get_irn_mode(node);
1347 ir_node *proj_res_high;
1349 if (get_mode_size_bits(mode) != 32) {
1350 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1353 if (mode_is_signed(mode)) {
1354 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1355 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1357 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1358 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1360 return proj_res_high;
1364 * Creates an ia32 And.
1366 * @return The created ia32 And node
1368 static ir_node *gen_And(ir_node *node)
1370 ir_node *op1 = get_And_left(node);
1371 ir_node *op2 = get_And_right(node);
1372 assert(! mode_is_float(get_irn_mode(node)));
1374 /* is it a zero extension? */
1375 if (is_Const(op2)) {
1376 tarval *tv = get_Const_tarval(op2);
1377 long v = get_tarval_long(tv);
1379 if (v == 0xFF || v == 0xFFFF) {
1380 dbg_info *dbgi = get_irn_dbg_info(node);
1381 ir_node *block = get_nodes_block(node);
1388 assert(v == 0xFFFF);
1391 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1396 return gen_binop(node, op1, op2, new_bd_ia32_And,
1397 match_commutative | match_mode_neutral | match_am | match_immediate);
1403 * Creates an ia32 Or.
1405 * @return The created ia32 Or node
1407 static ir_node *gen_Or(ir_node *node)
1409 ir_node *op1 = get_Or_left(node);
1410 ir_node *op2 = get_Or_right(node);
1412 assert (! mode_is_float(get_irn_mode(node)));
1413 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1414 | match_mode_neutral | match_am | match_immediate);
1420 * Creates an ia32 Eor.
1422 * @return The created ia32 Eor node
1424 static ir_node *gen_Eor(ir_node *node)
1426 ir_node *op1 = get_Eor_left(node);
1427 ir_node *op2 = get_Eor_right(node);
1429 assert(! mode_is_float(get_irn_mode(node)));
1430 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1431 | match_mode_neutral | match_am | match_immediate);
1436 * Creates an ia32 Sub.
1438 * @return The created ia32 Sub node
1440 static ir_node *gen_Sub(ir_node *node)
1442 ir_node *op1 = get_Sub_left(node);
1443 ir_node *op2 = get_Sub_right(node);
1444 ir_mode *mode = get_irn_mode(node);
1446 if (mode_is_float(mode)) {
1447 if (ia32_cg_config.use_sse2)
1448 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1450 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1453 if (is_Const(op2)) {
1454 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1458 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1459 | match_am | match_immediate);
1462 static ir_node *transform_AM_mem(ir_node *const block,
1463 ir_node *const src_val,
1464 ir_node *const src_mem,
1465 ir_node *const am_mem)
1467 if (is_NoMem(am_mem)) {
1468 return be_transform_node(src_mem);
1469 } else if (is_Proj(src_val) &&
1471 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1472 /* avoid memory loop */
1474 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1475 ir_node *const ptr_pred = get_Proj_pred(src_val);
1476 int const arity = get_Sync_n_preds(src_mem);
1481 NEW_ARR_A(ir_node*, ins, arity + 1);
1483 /* NOTE: This sometimes produces dead-code because the old sync in
1484 * src_mem might not be used anymore, we should detect this case
1485 * and kill the sync... */
1486 for (i = arity - 1; i >= 0; --i) {
1487 ir_node *const pred = get_Sync_pred(src_mem, i);
1489 /* avoid memory loop */
1490 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1493 ins[n++] = be_transform_node(pred);
1498 return new_r_Sync(block, n, ins);
1502 ins[0] = be_transform_node(src_mem);
1504 return new_r_Sync(block, 2, ins);
1509 * Create a 32bit to 64bit signed extension.
1511 * @param dbgi debug info
1512 * @param block the block where node nodes should be placed
1513 * @param val the value to extend
1514 * @param orig the original node
1516 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1517 ir_node *val, const ir_node *orig)
1522 if (ia32_cg_config.use_short_sex_eax) {
1523 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1524 be_dep_on_frame(pval);
1525 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1527 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1528 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1530 SET_IA32_ORIG_NODE(res, orig);
1535 * Generates an ia32 DivMod with additional infrastructure for the
1536 * register allocator if needed.
1538 static ir_node *create_Div(ir_node *node)
1540 dbg_info *dbgi = get_irn_dbg_info(node);
1541 ir_node *block = get_nodes_block(node);
1542 ir_node *new_block = be_transform_node(block);
1549 ir_node *sign_extension;
1550 ia32_address_mode_t am;
1551 ia32_address_t *addr = &am.addr;
1553 /* the upper bits have random contents for smaller modes */
1554 switch (get_irn_opcode(node)) {
1556 op1 = get_Div_left(node);
1557 op2 = get_Div_right(node);
1558 mem = get_Div_mem(node);
1559 mode = get_Div_resmode(node);
1562 op1 = get_Mod_left(node);
1563 op2 = get_Mod_right(node);
1564 mem = get_Mod_mem(node);
1565 mode = get_Mod_resmode(node);
1568 op1 = get_DivMod_left(node);
1569 op2 = get_DivMod_right(node);
1570 mem = get_DivMod_mem(node);
1571 mode = get_DivMod_resmode(node);
1574 panic("invalid divmod node %+F", node);
1577 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1579 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1580 is the memory of the consumed address. We can have only the second op as address
1581 in Div nodes, so check only op2. */
1582 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1584 if (mode_is_signed(mode)) {
1585 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1586 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1587 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1589 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1590 be_dep_on_frame(sign_extension);
1592 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1593 addr->index, new_mem, am.new_op2,
1594 am.new_op1, sign_extension);
1597 set_irn_pinned(new_node, get_irn_pinned(node));
1599 set_am_attributes(new_node, &am);
1600 SET_IA32_ORIG_NODE(new_node, node);
1602 new_node = fix_mem_proj(new_node, &am);
1608 * Generates an ia32 Mod.
1610 static ir_node *gen_Mod(ir_node *node)
1612 return create_Div(node);
1616 * Generates an ia32 Div.
1618 static ir_node *gen_Div(ir_node *node)
1620 return create_Div(node);
1624 * Generates an ia32 DivMod.
1626 static ir_node *gen_DivMod(ir_node *node)
1628 return create_Div(node);
1634 * Creates an ia32 floating Div.
1636 * @return The created ia32 xDiv node
1638 static ir_node *gen_Quot(ir_node *node)
1640 ir_node *op1 = get_Quot_left(node);
1641 ir_node *op2 = get_Quot_right(node);
1643 if (ia32_cg_config.use_sse2) {
1644 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1646 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1652 * Creates an ia32 Shl.
1654 * @return The created ia32 Shl node
1656 static ir_node *gen_Shl(ir_node *node)
1658 ir_node *left = get_Shl_left(node);
1659 ir_node *right = get_Shl_right(node);
1661 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1662 match_mode_neutral | match_immediate);
1666 * Creates an ia32 Shr.
1668 * @return The created ia32 Shr node
1670 static ir_node *gen_Shr(ir_node *node)
1672 ir_node *left = get_Shr_left(node);
1673 ir_node *right = get_Shr_right(node);
1675 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1681 * Creates an ia32 Sar.
1683 * @return The created ia32 Shrs node
1685 static ir_node *gen_Shrs(ir_node *node)
1687 ir_node *left = get_Shrs_left(node);
1688 ir_node *right = get_Shrs_right(node);
1690 if (is_Const(right)) {
1691 tarval *tv = get_Const_tarval(right);
1692 long val = get_tarval_long(tv);
1694 /* this is a sign extension */
1695 dbg_info *dbgi = get_irn_dbg_info(node);
1696 ir_node *block = be_transform_node(get_nodes_block(node));
1697 ir_node *new_op = be_transform_node(left);
1699 return create_sex_32_64(dbgi, block, new_op, node);
1703 /* 8 or 16 bit sign extension? */
1704 if (is_Const(right) && is_Shl(left)) {
1705 ir_node *shl_left = get_Shl_left(left);
1706 ir_node *shl_right = get_Shl_right(left);
1707 if (is_Const(shl_right)) {
1708 tarval *tv1 = get_Const_tarval(right);
1709 tarval *tv2 = get_Const_tarval(shl_right);
1710 if (tv1 == tv2 && tarval_is_long(tv1)) {
1711 long val = get_tarval_long(tv1);
1712 if (val == 16 || val == 24) {
1713 dbg_info *dbgi = get_irn_dbg_info(node);
1714 ir_node *block = get_nodes_block(node);
1724 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1733 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1739 * Creates an ia32 Rol.
1741 * @param op1 The first operator
1742 * @param op2 The second operator
1743 * @return The created ia32 RotL node
1745 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1747 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1753 * Creates an ia32 Ror.
1754 * NOTE: There is no RotR with immediate because this would always be a RotL
1755 * "imm-mode_size_bits" which can be pre-calculated.
1757 * @param op1 The first operator
1758 * @param op2 The second operator
1759 * @return The created ia32 RotR node
1761 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1763 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1769 * Creates an ia32 RotR or RotL (depending on the found pattern).
1771 * @return The created ia32 RotL or RotR node
1773 static ir_node *gen_Rotl(ir_node *node)
1775 ir_node *rotate = NULL;
1776 ir_node *op1 = get_Rotl_left(node);
1777 ir_node *op2 = get_Rotl_right(node);
1779 /* Firm has only RotL, so we are looking for a right (op2)
1780 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1781 that means we can create a RotR instead of an Add and a RotL */
1785 ir_node *left = get_Add_left(add);
1786 ir_node *right = get_Add_right(add);
1787 if (is_Const(right)) {
1788 tarval *tv = get_Const_tarval(right);
1789 ir_mode *mode = get_irn_mode(node);
1790 long bits = get_mode_size_bits(mode);
1792 if (is_Minus(left) &&
1793 tarval_is_long(tv) &&
1794 get_tarval_long(tv) == bits &&
1797 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1798 rotate = gen_Ror(node, op1, get_Minus_op(left));
1803 if (rotate == NULL) {
1804 rotate = gen_Rol(node, op1, op2);
1813 * Transforms a Minus node.
1815 * @return The created ia32 Minus node
1817 static ir_node *gen_Minus(ir_node *node)
1819 ir_node *op = get_Minus_op(node);
1820 ir_node *block = be_transform_node(get_nodes_block(node));
1821 dbg_info *dbgi = get_irn_dbg_info(node);
1822 ir_mode *mode = get_irn_mode(node);
1827 if (mode_is_float(mode)) {
1828 ir_node *new_op = be_transform_node(op);
1829 if (ia32_cg_config.use_sse2) {
1830 /* TODO: non-optimal... if we have many xXors, then we should
1831 * rather create a load for the const and use that instead of
1832 * several AM nodes... */
1833 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1835 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1836 noreg_GP, nomem, new_op, noreg_xmm);
1838 size = get_mode_size_bits(mode);
1839 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1841 set_ia32_am_sc(new_node, ent);
1842 set_ia32_op_type(new_node, ia32_AddrModeS);
1843 set_ia32_ls_mode(new_node, mode);
1845 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1848 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1851 SET_IA32_ORIG_NODE(new_node, node);
1857 * Transforms a Not node.
1859 * @return The created ia32 Not node
1861 static ir_node *gen_Not(ir_node *node)
1863 ir_node *op = get_Not_op(node);
1865 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1866 assert (! mode_is_float(get_irn_mode(node)));
1868 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1874 * Transforms an Abs node.
1876 * @return The created ia32 Abs node
1878 static ir_node *gen_Abs(ir_node *node)
1880 ir_node *block = get_nodes_block(node);
1881 ir_node *new_block = be_transform_node(block);
1882 ir_node *op = get_Abs_op(node);
1883 dbg_info *dbgi = get_irn_dbg_info(node);
1884 ir_mode *mode = get_irn_mode(node);
1890 if (mode_is_float(mode)) {
1891 new_op = be_transform_node(op);
1893 if (ia32_cg_config.use_sse2) {
1894 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1895 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1896 noreg_GP, nomem, new_op, noreg_fp);
1898 size = get_mode_size_bits(mode);
1899 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1901 set_ia32_am_sc(new_node, ent);
1903 SET_IA32_ORIG_NODE(new_node, node);
1905 set_ia32_op_type(new_node, ia32_AddrModeS);
1906 set_ia32_ls_mode(new_node, mode);
1908 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1909 SET_IA32_ORIG_NODE(new_node, node);
1912 ir_node *xor, *sign_extension;
1914 if (get_mode_size_bits(mode) == 32) {
1915 new_op = be_transform_node(op);
1917 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1920 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1922 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1923 nomem, new_op, sign_extension);
1924 SET_IA32_ORIG_NODE(xor, node);
1926 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1927 nomem, xor, sign_extension);
1928 SET_IA32_ORIG_NODE(new_node, node);
1935 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1937 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1939 dbg_info *dbgi = get_irn_dbg_info(cmp);
1940 ir_node *block = get_nodes_block(cmp);
1941 ir_node *new_block = be_transform_node(block);
1942 ir_node *op1 = be_transform_node(x);
1943 ir_node *op2 = be_transform_node(n);
1945 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1949 * Transform a node returning a "flag" result.
1951 * @param node the node to transform
1952 * @param pnc_out the compare mode to use
1954 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1961 /* we have a Cmp as input */
1962 if (is_Proj(node)) {
1963 ir_node *pred = get_Proj_pred(node);
1965 pn_Cmp pnc = get_Proj_proj(node);
1966 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1967 ir_node *l = get_Cmp_left(pred);
1968 ir_node *r = get_Cmp_right(pred);
1970 ir_node *la = get_And_left(l);
1971 ir_node *ra = get_And_right(l);
1973 ir_node *c = get_Shl_left(la);
1974 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1975 /* (1 << n) & ra) */
1976 ir_node *n = get_Shl_right(la);
1977 flags = gen_bt(pred, ra, n);
1978 /* we must generate a Jc/Jnc jump */
1979 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1982 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1987 ir_node *c = get_Shl_left(ra);
1988 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1989 /* la & (1 << n)) */
1990 ir_node *n = get_Shl_right(ra);
1991 flags = gen_bt(pred, la, n);
1992 /* we must generate a Jc/Jnc jump */
1993 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1996 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
2002 /* add ia32 compare flags */
2004 ir_node *l = get_Cmp_left(pred);
2005 ir_mode *mode = get_irn_mode(l);
2006 if (mode_is_float(mode))
2007 pnc |= ia32_pn_Cmp_float;
2008 else if (! mode_is_signed(mode))
2009 pnc |= ia32_pn_Cmp_unsigned;
2012 flags = be_transform_node(pred);
2017 /* a mode_b value, we have to compare it against 0 */
2018 dbgi = get_irn_dbg_info(node);
2019 new_block = be_transform_node(get_nodes_block(node));
2020 new_op = be_transform_node(node);
2021 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
2022 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
2023 *pnc_out = pn_Cmp_Lg;
2028 * Transforms a Load.
2030 * @return the created ia32 Load node
2032 static ir_node *gen_Load(ir_node *node)
2034 ir_node *old_block = get_nodes_block(node);
2035 ir_node *block = be_transform_node(old_block);
2036 ir_node *ptr = get_Load_ptr(node);
2037 ir_node *mem = get_Load_mem(node);
2038 ir_node *new_mem = be_transform_node(mem);
2041 dbg_info *dbgi = get_irn_dbg_info(node);
2042 ir_mode *mode = get_Load_mode(node);
2044 ia32_address_t addr;
2046 /* construct load address */
2047 memset(&addr, 0, sizeof(addr));
2048 ia32_create_address_mode(&addr, ptr, 0);
2055 base = be_transform_node(base);
2058 if (index == NULL) {
2061 index = be_transform_node(index);
2064 if (mode_is_float(mode)) {
2065 if (ia32_cg_config.use_sse2) {
2066 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2069 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2073 assert(mode != mode_b);
2075 /* create a conv node with address mode for smaller modes */
2076 if (get_mode_size_bits(mode) < 32) {
2077 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2078 new_mem, noreg_GP, mode);
2080 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2084 set_irn_pinned(new_node, get_irn_pinned(node));
2085 set_ia32_op_type(new_node, ia32_AddrModeS);
2086 set_ia32_ls_mode(new_node, mode);
2087 set_address(new_node, &addr);
2089 if (get_irn_pinned(node) == op_pin_state_floats) {
2090 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2091 && pn_ia32_vfld_res == pn_ia32_Load_res
2092 && pn_ia32_Load_res == pn_ia32_res);
2093 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2096 SET_IA32_ORIG_NODE(new_node, node);
2098 be_dep_on_frame(new_node);
2102 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2103 ir_node *ptr, ir_node *other)
2110 /* we only use address mode if we're the only user of the load */
2111 if (get_irn_n_edges(node) > 1)
2114 load = get_Proj_pred(node);
2117 if (get_nodes_block(load) != block)
2120 /* store should have the same pointer as the load */
2121 if (get_Load_ptr(load) != ptr)
2124 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2125 if (other != NULL &&
2126 get_nodes_block(other) == block &&
2127 heights_reachable_in_block(heights, other, load)) {
2131 if (prevents_AM(block, load, mem))
2133 /* Store should be attached to the load via mem */
2134 assert(heights_reachable_in_block(heights, mem, load));
2139 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2140 ir_node *mem, ir_node *ptr, ir_mode *mode,
2141 construct_binop_dest_func *func,
2142 construct_binop_dest_func *func8bit,
2143 match_flags_t flags)
2145 ir_node *src_block = get_nodes_block(node);
2153 ia32_address_mode_t am;
2154 ia32_address_t *addr = &am.addr;
2155 memset(&am, 0, sizeof(am));
2157 assert(flags & match_immediate); /* there is no destam node without... */
2158 commutative = (flags & match_commutative) != 0;
2160 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2161 build_address(&am, op1, ia32_create_am_double_use);
2162 new_op = create_immediate_or_transform(op2, 0);
2163 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2164 build_address(&am, op2, ia32_create_am_double_use);
2165 new_op = create_immediate_or_transform(op1, 0);
2170 if (addr->base == NULL)
2171 addr->base = noreg_GP;
2172 if (addr->index == NULL)
2173 addr->index = noreg_GP;
2174 if (addr->mem == NULL)
2177 dbgi = get_irn_dbg_info(node);
2178 block = be_transform_node(src_block);
2179 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2181 if (get_mode_size_bits(mode) == 8) {
2182 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2184 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2186 set_address(new_node, addr);
2187 set_ia32_op_type(new_node, ia32_AddrModeD);
2188 set_ia32_ls_mode(new_node, mode);
2189 SET_IA32_ORIG_NODE(new_node, node);
2191 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2192 mem_proj = be_transform_node(am.mem_proj);
2193 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2198 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2199 ir_node *ptr, ir_mode *mode,
2200 construct_unop_dest_func *func)
2202 ir_node *src_block = get_nodes_block(node);
2208 ia32_address_mode_t am;
2209 ia32_address_t *addr = &am.addr;
2211 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2214 memset(&am, 0, sizeof(am));
2215 build_address(&am, op, ia32_create_am_double_use);
2217 dbgi = get_irn_dbg_info(node);
2218 block = be_transform_node(src_block);
2219 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2220 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2221 set_address(new_node, addr);
2222 set_ia32_op_type(new_node, ia32_AddrModeD);
2223 set_ia32_ls_mode(new_node, mode);
2224 SET_IA32_ORIG_NODE(new_node, node);
2226 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2227 mem_proj = be_transform_node(am.mem_proj);
2228 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2233 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2235 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2236 return get_negated_pnc(pnc, mode);
2239 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2241 ir_mode *mode = get_irn_mode(node);
2242 ir_node *mux_true = get_Mux_true(node);
2243 ir_node *mux_false = get_Mux_false(node);
2252 ia32_address_t addr;
2254 if (get_mode_size_bits(mode) != 8)
2257 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2259 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2265 cond = get_Mux_sel(node);
2266 flags = get_flags_node(cond, &pnc);
2267 /* we can't handle the float special cases with SetM */
2268 if (pnc & ia32_pn_Cmp_float)
2271 pnc = ia32_get_negated_pnc(pnc);
2273 build_address_ptr(&addr, ptr, mem);
2275 dbgi = get_irn_dbg_info(node);
2276 block = get_nodes_block(node);
2277 new_block = be_transform_node(block);
2278 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2279 addr.index, addr.mem, flags, pnc);
2280 set_address(new_node, &addr);
2281 set_ia32_op_type(new_node, ia32_AddrModeD);
2282 set_ia32_ls_mode(new_node, mode);
2283 SET_IA32_ORIG_NODE(new_node, node);
2288 static ir_node *try_create_dest_am(ir_node *node)
2290 ir_node *val = get_Store_value(node);
2291 ir_node *mem = get_Store_mem(node);
2292 ir_node *ptr = get_Store_ptr(node);
2293 ir_mode *mode = get_irn_mode(val);
2294 unsigned bits = get_mode_size_bits(mode);
2299 /* handle only GP modes for now... */
2300 if (!ia32_mode_needs_gp_reg(mode))
2304 /* store must be the only user of the val node */
2305 if (get_irn_n_edges(val) > 1)
2307 /* skip pointless convs */
2309 ir_node *conv_op = get_Conv_op(val);
2310 ir_mode *pred_mode = get_irn_mode(conv_op);
2311 if (!ia32_mode_needs_gp_reg(pred_mode))
2313 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2321 /* value must be in the same block */
2322 if (get_nodes_block(node) != get_nodes_block(val))
2325 switch (get_irn_opcode(val)) {
2327 op1 = get_Add_left(val);
2328 op2 = get_Add_right(val);
2329 if (ia32_cg_config.use_incdec) {
2330 if (is_Const_1(op2)) {
2331 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2333 } else if (is_Const_Minus_1(op2)) {
2334 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2338 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2339 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2340 match_commutative | match_immediate);
2343 op1 = get_Sub_left(val);
2344 op2 = get_Sub_right(val);
2345 if (is_Const(op2)) {
2346 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2348 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2349 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2353 op1 = get_And_left(val);
2354 op2 = get_And_right(val);
2355 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2356 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2357 match_commutative | match_immediate);
2360 op1 = get_Or_left(val);
2361 op2 = get_Or_right(val);
2362 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2363 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2364 match_commutative | match_immediate);
2367 op1 = get_Eor_left(val);
2368 op2 = get_Eor_right(val);
2369 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2370 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2371 match_commutative | match_immediate);
2374 op1 = get_Shl_left(val);
2375 op2 = get_Shl_right(val);
2376 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2377 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2381 op1 = get_Shr_left(val);
2382 op2 = get_Shr_right(val);
2383 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2384 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2388 op1 = get_Shrs_left(val);
2389 op2 = get_Shrs_right(val);
2390 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2391 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2395 op1 = get_Rotl_left(val);
2396 op2 = get_Rotl_right(val);
2397 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2398 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2401 /* TODO: match ROR patterns... */
2403 new_node = try_create_SetMem(val, ptr, mem);
2407 op1 = get_Minus_op(val);
2408 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2411 /* should be lowered already */
2412 assert(mode != mode_b);
2413 op1 = get_Not_op(val);
2414 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2420 if (new_node != NULL) {
2421 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2422 get_irn_pinned(node) == op_pin_state_pinned) {
2423 set_irn_pinned(new_node, op_pin_state_pinned);
2430 static bool possible_int_mode_for_fp(ir_mode *mode)
2434 if (!mode_is_signed(mode))
2436 size = get_mode_size_bits(mode);
2437 if (size != 16 && size != 32)
2442 static int is_float_to_int_conv(const ir_node *node)
2444 ir_mode *mode = get_irn_mode(node);
2448 if (!possible_int_mode_for_fp(mode))
2453 conv_op = get_Conv_op(node);
2454 conv_mode = get_irn_mode(conv_op);
2456 if (!mode_is_float(conv_mode))
2463 * Transform a Store(floatConst) into a sequence of
2466 * @return the created ia32 Store node
2468 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2470 ir_mode *mode = get_irn_mode(cns);
2471 unsigned size = get_mode_size_bytes(mode);
2472 tarval *tv = get_Const_tarval(cns);
2473 ir_node *block = get_nodes_block(node);
2474 ir_node *new_block = be_transform_node(block);
2475 ir_node *ptr = get_Store_ptr(node);
2476 ir_node *mem = get_Store_mem(node);
2477 dbg_info *dbgi = get_irn_dbg_info(node);
2481 ia32_address_t addr;
2483 assert(size % 4 == 0);
2486 build_address_ptr(&addr, ptr, mem);
2490 get_tarval_sub_bits(tv, ofs) |
2491 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2492 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2493 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2494 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2496 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2497 addr.index, addr.mem, imm);
2499 set_irn_pinned(new_node, get_irn_pinned(node));
2500 set_ia32_op_type(new_node, ia32_AddrModeD);
2501 set_ia32_ls_mode(new_node, mode_Iu);
2502 set_address(new_node, &addr);
2503 SET_IA32_ORIG_NODE(new_node, node);
2506 ins[i++] = new_node;
2511 } while (size != 0);
2514 return new_rd_Sync(dbgi, new_block, i, ins);
2521 * Generate a vfist or vfisttp instruction.
2523 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2524 ir_node *mem, ir_node *val, ir_node **fist)
2528 if (ia32_cg_config.use_fisttp) {
2529 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2530 if other users exists */
2531 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2532 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2533 be_new_Keep(block, 1, &value);
2535 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2538 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2541 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2547 * Transforms a general (no special case) Store.
2549 * @return the created ia32 Store node
2551 static ir_node *gen_general_Store(ir_node *node)
2553 ir_node *val = get_Store_value(node);
2554 ir_mode *mode = get_irn_mode(val);
2555 ir_node *block = get_nodes_block(node);
2556 ir_node *new_block = be_transform_node(block);
2557 ir_node *ptr = get_Store_ptr(node);
2558 ir_node *mem = get_Store_mem(node);
2559 dbg_info *dbgi = get_irn_dbg_info(node);
2560 ir_node *new_val, *new_node, *store;
2561 ia32_address_t addr;
2563 /* check for destination address mode */
2564 new_node = try_create_dest_am(node);
2565 if (new_node != NULL)
2568 /* construct store address */
2569 memset(&addr, 0, sizeof(addr));
2570 ia32_create_address_mode(&addr, ptr, 0);
2572 if (addr.base == NULL) {
2573 addr.base = noreg_GP;
2575 addr.base = be_transform_node(addr.base);
2578 if (addr.index == NULL) {
2579 addr.index = noreg_GP;
2581 addr.index = be_transform_node(addr.index);
2583 addr.mem = be_transform_node(mem);
2585 if (mode_is_float(mode)) {
2586 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2588 while (is_Conv(val) && mode == get_irn_mode(val)) {
2589 ir_node *op = get_Conv_op(val);
2590 if (!mode_is_float(get_irn_mode(op)))
2594 new_val = be_transform_node(val);
2595 if (ia32_cg_config.use_sse2) {
2596 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2597 addr.index, addr.mem, new_val);
2599 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2600 addr.index, addr.mem, new_val, mode);
2603 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2604 val = get_Conv_op(val);
2606 /* TODO: is this optimisation still necessary at all (middleend)? */
2607 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2608 while (is_Conv(val)) {
2609 ir_node *op = get_Conv_op(val);
2610 if (!mode_is_float(get_irn_mode(op)))
2612 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2616 new_val = be_transform_node(val);
2617 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2619 new_val = create_immediate_or_transform(val, 0);
2620 assert(mode != mode_b);
2622 if (get_mode_size_bits(mode) == 8) {
2623 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2624 addr.index, addr.mem, new_val);
2626 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2627 addr.index, addr.mem, new_val);
2632 set_irn_pinned(store, get_irn_pinned(node));
2633 set_ia32_op_type(store, ia32_AddrModeD);
2634 set_ia32_ls_mode(store, mode);
2636 set_address(store, &addr);
2637 SET_IA32_ORIG_NODE(store, node);
2643 * Transforms a Store.
2645 * @return the created ia32 Store node
2647 static ir_node *gen_Store(ir_node *node)
2649 ir_node *val = get_Store_value(node);
2650 ir_mode *mode = get_irn_mode(val);
2652 if (mode_is_float(mode) && is_Const(val)) {
2653 /* We can transform every floating const store
2654 into a sequence of integer stores.
2655 If the constant is already in a register,
2656 it would be better to use it, but we don't
2657 have this information here. */
2658 return gen_float_const_Store(node, val);
2660 return gen_general_Store(node);
2664 * Transforms a Switch.
2666 * @return the created ia32 SwitchJmp node
2668 static ir_node *create_Switch(ir_node *node)
2670 dbg_info *dbgi = get_irn_dbg_info(node);
2671 ir_node *block = be_transform_node(get_nodes_block(node));
2672 ir_node *sel = get_Cond_selector(node);
2673 ir_node *new_sel = be_transform_node(sel);
2674 long switch_min = LONG_MAX;
2675 long switch_max = LONG_MIN;
2676 long default_pn = get_Cond_default_proj(node);
2678 const ir_edge_t *edge;
2680 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2682 /* determine the smallest switch case value */
2683 foreach_out_edge(node, edge) {
2684 ir_node *proj = get_edge_src_irn(edge);
2685 long pn = get_Proj_proj(proj);
2686 if (pn == default_pn)
2689 if (pn < switch_min)
2691 if (pn > switch_max)
2695 if ((unsigned long) (switch_max - switch_min) > 128000) {
2696 panic("Size of switch %+F bigger than 128000", node);
2699 if (switch_min != 0) {
2700 /* if smallest switch case is not 0 we need an additional sub */
2701 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2702 add_ia32_am_offs_int(new_sel, -switch_min);
2703 set_ia32_op_type(new_sel, ia32_AddrModeS);
2705 SET_IA32_ORIG_NODE(new_sel, node);
2708 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2709 SET_IA32_ORIG_NODE(new_node, node);
2715 * Transform a Cond node.
2717 static ir_node *gen_Cond(ir_node *node)
2719 ir_node *block = get_nodes_block(node);
2720 ir_node *new_block = be_transform_node(block);
2721 dbg_info *dbgi = get_irn_dbg_info(node);
2722 ir_node *sel = get_Cond_selector(node);
2723 ir_mode *sel_mode = get_irn_mode(sel);
2724 ir_node *flags = NULL;
2728 if (sel_mode != mode_b) {
2729 return create_Switch(node);
2732 /* we get flags from a Cmp */
2733 flags = get_flags_node(sel, &pnc);
2735 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2736 SET_IA32_ORIG_NODE(new_node, node);
2742 * Transform a be_Copy.
2744 static ir_node *gen_be_Copy(ir_node *node)
2746 ir_node *new_node = be_duplicate_node(node);
2747 ir_mode *mode = get_irn_mode(new_node);
2749 if (ia32_mode_needs_gp_reg(mode)) {
2750 set_irn_mode(new_node, mode_Iu);
2756 static ir_node *create_Fucom(ir_node *node)
2758 dbg_info *dbgi = get_irn_dbg_info(node);
2759 ir_node *block = get_nodes_block(node);
2760 ir_node *new_block = be_transform_node(block);
2761 ir_node *left = get_Cmp_left(node);
2762 ir_node *new_left = be_transform_node(left);
2763 ir_node *right = get_Cmp_right(node);
2767 if (ia32_cg_config.use_fucomi) {
2768 new_right = be_transform_node(right);
2769 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2771 set_ia32_commutative(new_node);
2772 SET_IA32_ORIG_NODE(new_node, node);
2774 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2775 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2777 new_right = be_transform_node(right);
2778 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2781 set_ia32_commutative(new_node);
2783 SET_IA32_ORIG_NODE(new_node, node);
2785 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2786 SET_IA32_ORIG_NODE(new_node, node);
2792 static ir_node *create_Ucomi(ir_node *node)
2794 dbg_info *dbgi = get_irn_dbg_info(node);
2795 ir_node *src_block = get_nodes_block(node);
2796 ir_node *new_block = be_transform_node(src_block);
2797 ir_node *left = get_Cmp_left(node);
2798 ir_node *right = get_Cmp_right(node);
2800 ia32_address_mode_t am;
2801 ia32_address_t *addr = &am.addr;
2803 match_arguments(&am, src_block, left, right, NULL,
2804 match_commutative | match_am);
2806 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2807 addr->mem, am.new_op1, am.new_op2,
2809 set_am_attributes(new_node, &am);
2811 SET_IA32_ORIG_NODE(new_node, node);
2813 new_node = fix_mem_proj(new_node, &am);
2819 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2820 * to fold an and into a test node
2822 static bool can_fold_test_and(ir_node *node)
2824 const ir_edge_t *edge;
2826 /** we can only have eq and lg projs */
2827 foreach_out_edge(node, edge) {
2828 ir_node *proj = get_edge_src_irn(edge);
2829 pn_Cmp pnc = get_Proj_proj(proj);
2830 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2838 * returns true if it is assured, that the upper bits of a node are "clean"
2839 * which means for a 16 or 8 bit value, that the upper bits in the register
2840 * are 0 for unsigned and a copy of the last significant bit for signed
2843 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2845 assert(ia32_mode_needs_gp_reg(mode));
2846 if (get_mode_size_bits(mode) >= 32)
2849 if (is_Proj(transformed_node))
2850 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2852 switch (get_ia32_irn_opcode(transformed_node)) {
2853 case iro_ia32_Conv_I2I:
2854 case iro_ia32_Conv_I2I8Bit: {
2855 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2856 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2858 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2865 if (mode_is_signed(mode)) {
2866 return false; /* TODO handle signed modes */
2868 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2869 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2870 const ia32_immediate_attr_t *attr
2871 = get_ia32_immediate_attr_const(right);
2872 if (attr->symconst == 0 &&
2873 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2877 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2881 /* TODO too conservative if shift amount is constant */
2882 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2885 if (!mode_is_signed(mode)) {
2887 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2888 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2890 /* TODO if one is known to be zero extended, then || is sufficient */
2895 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2896 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2898 case iro_ia32_Const:
2899 case iro_ia32_Immediate: {
2900 const ia32_immediate_attr_t *attr =
2901 get_ia32_immediate_attr_const(transformed_node);
2902 if (mode_is_signed(mode)) {
2903 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2904 return shifted == 0 || shifted == -1;
2906 unsigned long shifted = (unsigned long)attr->offset;
2907 shifted >>= get_mode_size_bits(mode);
2908 return shifted == 0;
2918 * Generate code for a Cmp.
2920 static ir_node *gen_Cmp(ir_node *node)
2922 dbg_info *dbgi = get_irn_dbg_info(node);
2923 ir_node *block = get_nodes_block(node);
2924 ir_node *new_block = be_transform_node(block);
2925 ir_node *left = get_Cmp_left(node);
2926 ir_node *right = get_Cmp_right(node);
2927 ir_mode *cmp_mode = get_irn_mode(left);
2929 ia32_address_mode_t am;
2930 ia32_address_t *addr = &am.addr;
2933 if (mode_is_float(cmp_mode)) {
2934 if (ia32_cg_config.use_sse2) {
2935 return create_Ucomi(node);
2937 return create_Fucom(node);
2941 assert(ia32_mode_needs_gp_reg(cmp_mode));
2943 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2944 cmp_unsigned = !mode_is_signed(cmp_mode);
2945 if (is_Const_0(right) &&
2947 get_irn_n_edges(left) == 1 &&
2948 can_fold_test_and(node)) {
2949 /* Test(and_left, and_right) */
2950 ir_node *and_left = get_And_left(left);
2951 ir_node *and_right = get_And_right(left);
2953 /* matze: code here used mode instead of cmd_mode, I think it is always
2954 * the same as cmp_mode, but I leave this here to see if this is really
2957 assert(get_irn_mode(and_left) == cmp_mode);
2959 match_arguments(&am, block, and_left, and_right, NULL,
2961 match_am | match_8bit_am | match_16bit_am |
2962 match_am_and_immediates | match_immediate);
2964 /* use 32bit compare mode if possible since the opcode is smaller */
2965 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2966 upper_bits_clean(am.new_op2, cmp_mode)) {
2967 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2970 if (get_mode_size_bits(cmp_mode) == 8) {
2971 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2972 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2975 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2976 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2979 /* Cmp(left, right) */
2980 match_arguments(&am, block, left, right, NULL,
2981 match_commutative | match_am | match_8bit_am |
2982 match_16bit_am | match_am_and_immediates |
2984 /* use 32bit compare mode if possible since the opcode is smaller */
2985 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2986 upper_bits_clean(am.new_op2, cmp_mode)) {
2987 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2990 if (get_mode_size_bits(cmp_mode) == 8) {
2991 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2992 addr->index, addr->mem, am.new_op1,
2993 am.new_op2, am.ins_permuted,
2996 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2997 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
3000 set_am_attributes(new_node, &am);
3001 set_ia32_ls_mode(new_node, cmp_mode);
3003 SET_IA32_ORIG_NODE(new_node, node);
3005 new_node = fix_mem_proj(new_node, &am);
3010 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3013 dbg_info *dbgi = get_irn_dbg_info(node);
3014 ir_node *block = get_nodes_block(node);
3015 ir_node *new_block = be_transform_node(block);
3016 ir_node *val_true = get_Mux_true(node);
3017 ir_node *val_false = get_Mux_false(node);
3019 ia32_address_mode_t am;
3020 ia32_address_t *addr;
3022 assert(ia32_cg_config.use_cmov);
3023 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3027 match_arguments(&am, block, val_false, val_true, flags,
3028 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3030 if (am.ins_permuted)
3031 pnc = ia32_get_negated_pnc(pnc);
3033 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3034 addr->mem, am.new_op1, am.new_op2, new_flags,
3036 set_am_attributes(new_node, &am);
3038 SET_IA32_ORIG_NODE(new_node, node);
3040 new_node = fix_mem_proj(new_node, &am);
3046 * Creates a ia32 Setcc instruction.
3048 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3049 ir_node *flags, pn_Cmp pnc,
3052 ir_mode *mode = get_irn_mode(orig_node);
3055 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3056 SET_IA32_ORIG_NODE(new_node, orig_node);
3058 /* we might need to conv the result up */
3059 if (get_mode_size_bits(mode) > 8) {
3060 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3061 nomem, new_node, mode_Bu);
3062 SET_IA32_ORIG_NODE(new_node, orig_node);
3069 * Create instruction for an unsigned Difference or Zero.
3071 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3073 ir_mode *mode = get_irn_mode(psi);
3083 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3084 match_mode_neutral | match_am | match_immediate | match_two_users);
3086 block = get_nodes_block(new_node);
3088 if (is_Proj(new_node)) {
3089 sub = get_Proj_pred(new_node);
3090 assert(is_ia32_Sub(sub));
3093 set_irn_mode(sub, mode_T);
3094 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3096 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3098 dbgi = get_irn_dbg_info(psi);
3099 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3100 not = new_bd_ia32_Not(dbgi, block, sbb);
3102 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3103 set_ia32_commutative(new_node);
3108 * Create an const array of two float consts.
3110 * @param c0 the first constant
3111 * @param c1 the second constant
3112 * @param new_mode IN/OUT for the mode of the constants, if NULL
3113 * smallest possible mode will be used
3115 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3118 ir_mode *mode = *new_mode;
3120 ir_initializer_t *initializer;
3121 tarval *tv0 = get_Const_tarval(c0);
3122 tarval *tv1 = get_Const_tarval(c1);
3125 /* detect the best mode for the constants */
3126 mode = get_tarval_mode(tv0);
3128 if (mode != mode_F) {
3129 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3130 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3132 tv0 = tarval_convert_to(tv0, mode);
3133 tv1 = tarval_convert_to(tv1, mode);
3134 } else if (mode != mode_D) {
3135 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3136 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3138 tv0 = tarval_convert_to(tv0, mode);
3139 tv1 = tarval_convert_to(tv1, mode);
3146 tp = ia32_create_float_type(mode, 4);
3147 tp = ia32_create_float_array(tp);
3149 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3151 set_entity_ld_ident(ent, get_entity_ident(ent));
3152 set_entity_visibility(ent, ir_visibility_local);
3153 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3155 initializer = create_initializer_compound(2);
3157 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3158 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3160 set_entity_initializer(ent, initializer);
3167 * Possible transformations for creating a Setcc.
3169 enum setcc_transform_insn {
3182 typedef struct setcc_transform {
3184 unsigned permutate_cmp_ins;
3187 enum setcc_transform_insn transform;
3191 } setcc_transform_t;
3194 * Setcc can only handle 0 and 1 result.
3195 * Find a transformation that creates 0 and 1 from
3198 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f,
3199 setcc_transform_t *res)
3204 res->permutate_cmp_ins = 0;
3206 if (tarval_is_null(t)) {
3210 pnc = ia32_get_negated_pnc(pnc);
3211 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3212 // now, t is the bigger one
3216 pnc = ia32_get_negated_pnc(pnc);
3220 if (! tarval_is_null(f)) {
3221 tarval *t_sub = tarval_sub(t, f, NULL);
3224 res->steps[step].transform = SETCC_TR_ADD;
3226 if (t == tarval_bad)
3227 panic("constant subtract failed");
3228 if (! tarval_is_long(f))
3229 panic("tarval is not long");
3231 res->steps[step].val = get_tarval_long(f);
3233 f = tarval_sub(f, f, NULL);
3234 assert(tarval_is_null(f));
3237 if (tarval_is_one(t)) {
3238 res->steps[step].transform = SETCC_TR_SET;
3239 res->num_steps = ++step;
3243 if (tarval_is_minus_one(t)) {
3244 res->steps[step].transform = SETCC_TR_NEG;
3246 res->steps[step].transform = SETCC_TR_SET;
3247 res->num_steps = ++step;
3250 if (tarval_is_long(t)) {
3251 long v = get_tarval_long(t);
3253 res->steps[step].val = 0;
3256 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3258 res->steps[step].transform = SETCC_TR_LEAxx;
3259 res->steps[step].scale = 3; /* (a << 3) + a */
3262 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3264 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3265 res->steps[step].scale = 3; /* (a << 3) */
3268 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3270 res->steps[step].transform = SETCC_TR_LEAxx;
3271 res->steps[step].scale = 2; /* (a << 2) + a */
3274 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3276 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3277 res->steps[step].scale = 2; /* (a << 2) */
3280 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3282 res->steps[step].transform = SETCC_TR_LEAxx;
3283 res->steps[step].scale = 1; /* (a << 1) + a */
3286 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3288 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3289 res->steps[step].scale = 1; /* (a << 1) */
3292 res->num_steps = step;
3295 if (! tarval_is_single_bit(t)) {
3296 res->steps[step].transform = SETCC_TR_AND;
3297 res->steps[step].val = v;
3299 res->steps[step].transform = SETCC_TR_NEG;
3301 int v = get_tarval_lowest_bit(t);
3304 res->steps[step].transform = SETCC_TR_SHL;
3305 res->steps[step].scale = v;
3309 res->steps[step].transform = SETCC_TR_SET;
3310 res->num_steps = ++step;
3313 panic("tarval is not long");
3317 * Transforms a Mux node into some code sequence.
3319 * @return The transformed node.
3321 static ir_node *gen_Mux(ir_node *node)
3323 dbg_info *dbgi = get_irn_dbg_info(node);
3324 ir_node *block = get_nodes_block(node);
3325 ir_node *new_block = be_transform_node(block);
3326 ir_node *mux_true = get_Mux_true(node);
3327 ir_node *mux_false = get_Mux_false(node);
3328 ir_node *cond = get_Mux_sel(node);
3329 ir_mode *mode = get_irn_mode(node);
3334 assert(get_irn_mode(cond) == mode_b);
3336 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3337 if (mode_is_float(mode)) {
3338 ir_node *cmp = get_Proj_pred(cond);
3339 ir_node *cmp_left = get_Cmp_left(cmp);
3340 ir_node *cmp_right = get_Cmp_right(cmp);
3341 pn_Cmp pnc = get_Proj_proj(cond);
3343 if (ia32_cg_config.use_sse2) {
3344 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3345 if (cmp_left == mux_true && cmp_right == mux_false) {
3346 /* Mux(a <= b, a, b) => MIN */
3347 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3348 match_commutative | match_am | match_two_users);
3349 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3350 /* Mux(a <= b, b, a) => MAX */
3351 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3352 match_commutative | match_am | match_two_users);
3354 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3355 if (cmp_left == mux_true && cmp_right == mux_false) {
3356 /* Mux(a >= b, a, b) => MAX */
3357 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3358 match_commutative | match_am | match_two_users);
3359 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3360 /* Mux(a >= b, b, a) => MIN */
3361 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3362 match_commutative | match_am | match_two_users);
3367 if (is_Const(mux_true) && is_Const(mux_false)) {
3368 ia32_address_mode_t am;
3373 flags = get_flags_node(cond, &pnc);
3374 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3376 if (ia32_cg_config.use_sse2) {
3377 /* cannot load from different mode on SSE */
3380 /* x87 can load any mode */
3384 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3386 switch (get_mode_size_bytes(new_mode)) {
3396 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3397 set_ia32_am_scale(new_node, 2);
3402 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3403 set_ia32_am_scale(new_node, 1);
3406 /* arg, shift 16 NOT supported */
3408 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3411 panic("Unsupported constant size");
3414 am.ls_mode = new_mode;
3415 am.addr.base = get_symconst_base();
3416 am.addr.index = new_node;
3417 am.addr.mem = nomem;
3419 am.addr.scale = scale;
3420 am.addr.use_frame = 0;
3421 am.addr.frame_entity = NULL;
3422 am.addr.symconst_sign = 0;
3423 am.mem_proj = am.addr.mem;
3424 am.op_type = ia32_AddrModeS;
3427 am.pinned = op_pin_state_floats;
3429 am.ins_permuted = 0;
3431 if (ia32_cg_config.use_sse2)
3432 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3434 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3435 set_am_attributes(load, &am);
3437 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3439 panic("cannot transform floating point Mux");
3442 assert(ia32_mode_needs_gp_reg(mode));
3444 if (is_Proj(cond)) {
3445 ir_node *cmp = get_Proj_pred(cond);
3447 ir_node *cmp_left = get_Cmp_left(cmp);
3448 ir_node *cmp_right = get_Cmp_right(cmp);
3449 pn_Cmp pnc = get_Proj_proj(cond);
3451 /* check for unsigned Doz first */
3452 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3453 is_Const_0(mux_false) && is_Sub(mux_true) &&
3454 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3455 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3456 return create_doz(node, cmp_left, cmp_right);
3457 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3458 is_Const_0(mux_true) && is_Sub(mux_false) &&
3459 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3460 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3461 return create_doz(node, cmp_left, cmp_right);
3466 flags = get_flags_node(cond, &pnc);
3468 if (is_Const(mux_true) && is_Const(mux_false)) {
3469 /* both are const, good */
3470 tarval *tv_true = get_Const_tarval(mux_true);
3471 tarval *tv_false = get_Const_tarval(mux_false);
3472 setcc_transform_t res;
3475 find_const_transform(pnc, tv_true, tv_false, &res);
3477 if (res.permutate_cmp_ins) {
3478 ia32_attr_t *attr = get_ia32_attr(flags);
3479 attr->data.ins_permuted ^= 1;
3481 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3484 switch (res.steps[step].transform) {
3486 imm = ia32_immediate_from_long(res.steps[step].val);
3487 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3489 case SETCC_TR_ADDxx:
3490 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3493 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3494 set_ia32_am_scale(new_node, res.steps[step].scale);
3495 set_ia32_am_offs_int(new_node, res.steps[step].val);
3497 case SETCC_TR_LEAxx:
3498 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3499 set_ia32_am_scale(new_node, res.steps[step].scale);
3500 set_ia32_am_offs_int(new_node, res.steps[step].val);
3503 imm = ia32_immediate_from_long(res.steps[step].scale);
3504 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3507 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3510 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3513 imm = ia32_immediate_from_long(res.steps[step].val);
3514 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3517 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, new_node);
3520 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3523 panic("unknown setcc transform");
3527 new_node = create_CMov(node, cond, flags, pnc);
3535 * Create a conversion from x87 state register to general purpose.
3537 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3539 ir_node *block = be_transform_node(get_nodes_block(node));
3540 ir_node *op = get_Conv_op(node);
3541 ir_node *new_op = be_transform_node(op);
3542 ir_graph *irg = current_ir_graph;
3543 dbg_info *dbgi = get_irn_dbg_info(node);
3544 ir_mode *mode = get_irn_mode(node);
3545 ir_node *fist, *load, *mem;
3547 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3548 set_irn_pinned(fist, op_pin_state_floats);
3549 set_ia32_use_frame(fist);
3550 set_ia32_op_type(fist, ia32_AddrModeD);
3552 assert(get_mode_size_bits(mode) <= 32);
3553 /* exception we can only store signed 32 bit integers, so for unsigned
3554 we store a 64bit (signed) integer and load the lower bits */
3555 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3556 set_ia32_ls_mode(fist, mode_Ls);
3558 set_ia32_ls_mode(fist, mode_Is);
3560 SET_IA32_ORIG_NODE(fist, node);
3563 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3565 set_irn_pinned(load, op_pin_state_floats);
3566 set_ia32_use_frame(load);
3567 set_ia32_op_type(load, ia32_AddrModeS);
3568 set_ia32_ls_mode(load, mode_Is);
3569 if (get_ia32_ls_mode(fist) == mode_Ls) {
3570 ia32_attr_t *attr = get_ia32_attr(load);
3571 attr->data.need_64bit_stackent = 1;
3573 ia32_attr_t *attr = get_ia32_attr(load);
3574 attr->data.need_32bit_stackent = 1;
3576 SET_IA32_ORIG_NODE(load, node);
3578 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3582 * Creates a x87 strict Conv by placing a Store and a Load
3584 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3586 ir_node *block = get_nodes_block(node);
3587 ir_graph *irg = get_Block_irg(block);
3588 dbg_info *dbgi = get_irn_dbg_info(node);
3589 ir_node *frame = get_irg_frame(irg);
3590 ir_node *store, *load;
3593 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3594 set_ia32_use_frame(store);
3595 set_ia32_op_type(store, ia32_AddrModeD);
3596 SET_IA32_ORIG_NODE(store, node);
3598 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3599 set_ia32_use_frame(load);
3600 set_ia32_op_type(load, ia32_AddrModeS);
3601 SET_IA32_ORIG_NODE(load, node);
3603 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3607 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3608 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3610 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3612 func = get_mode_size_bits(mode) == 8 ?
3613 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3614 return func(dbgi, block, base, index, mem, val, mode);
3618 * Create a conversion from general purpose to x87 register
3620 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3622 ir_node *src_block = get_nodes_block(node);
3623 ir_node *block = be_transform_node(src_block);
3624 ir_graph *irg = get_Block_irg(block);
3625 dbg_info *dbgi = get_irn_dbg_info(node);
3626 ir_node *op = get_Conv_op(node);
3627 ir_node *new_op = NULL;
3629 ir_mode *store_mode;
3634 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3635 if (possible_int_mode_for_fp(src_mode)) {
3636 ia32_address_mode_t am;
3638 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3639 if (am.op_type == ia32_AddrModeS) {
3640 ia32_address_t *addr = &am.addr;
3642 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3643 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3645 set_am_attributes(fild, &am);
3646 SET_IA32_ORIG_NODE(fild, node);
3648 fix_mem_proj(fild, &am);
3653 if (new_op == NULL) {
3654 new_op = be_transform_node(op);
3657 mode = get_irn_mode(op);
3659 /* first convert to 32 bit signed if necessary */
3660 if (get_mode_size_bits(src_mode) < 32) {
3661 if (!upper_bits_clean(new_op, src_mode)) {
3662 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3663 SET_IA32_ORIG_NODE(new_op, node);
3668 assert(get_mode_size_bits(mode) == 32);
3671 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3673 set_ia32_use_frame(store);
3674 set_ia32_op_type(store, ia32_AddrModeD);
3675 set_ia32_ls_mode(store, mode_Iu);
3677 /* exception for 32bit unsigned, do a 64bit spill+load */
3678 if (!mode_is_signed(mode)) {
3681 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3683 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3684 noreg_GP, nomem, zero_const);
3686 set_ia32_use_frame(zero_store);
3687 set_ia32_op_type(zero_store, ia32_AddrModeD);
3688 add_ia32_am_offs_int(zero_store, 4);
3689 set_ia32_ls_mode(zero_store, mode_Iu);
3694 store = new_rd_Sync(dbgi, block, 2, in);
3695 store_mode = mode_Ls;
3697 store_mode = mode_Is;
3701 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3703 set_ia32_use_frame(fild);
3704 set_ia32_op_type(fild, ia32_AddrModeS);
3705 set_ia32_ls_mode(fild, store_mode);
3707 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3713 * Create a conversion from one integer mode into another one
3715 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3716 dbg_info *dbgi, ir_node *block, ir_node *op,
3719 ir_node *new_block = be_transform_node(block);
3721 ir_mode *smaller_mode;
3722 ia32_address_mode_t am;
3723 ia32_address_t *addr = &am.addr;
3726 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3727 smaller_mode = src_mode;
3729 smaller_mode = tgt_mode;
3732 #ifdef DEBUG_libfirm
3734 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3739 match_arguments(&am, block, NULL, op, NULL,
3740 match_am | match_8bit_am | match_16bit_am);
3742 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3743 /* unnecessary conv. in theory it shouldn't have been AM */
3744 assert(is_ia32_NoReg_GP(addr->base));
3745 assert(is_ia32_NoReg_GP(addr->index));
3746 assert(is_NoMem(addr->mem));
3747 assert(am.addr.offset == 0);
3748 assert(am.addr.symconst_ent == NULL);
3752 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3753 addr->mem, am.new_op2, smaller_mode);
3754 set_am_attributes(new_node, &am);
3755 /* match_arguments assume that out-mode = in-mode, this isn't true here
3757 set_ia32_ls_mode(new_node, smaller_mode);
3758 SET_IA32_ORIG_NODE(new_node, node);
3759 new_node = fix_mem_proj(new_node, &am);
3764 * Transforms a Conv node.
3766 * @return The created ia32 Conv node
3768 static ir_node *gen_Conv(ir_node *node)
3770 ir_node *block = get_nodes_block(node);
3771 ir_node *new_block = be_transform_node(block);
3772 ir_node *op = get_Conv_op(node);
3773 ir_node *new_op = NULL;
3774 dbg_info *dbgi = get_irn_dbg_info(node);
3775 ir_mode *src_mode = get_irn_mode(op);
3776 ir_mode *tgt_mode = get_irn_mode(node);
3777 int src_bits = get_mode_size_bits(src_mode);
3778 int tgt_bits = get_mode_size_bits(tgt_mode);
3779 ir_node *res = NULL;
3781 assert(!mode_is_int(src_mode) || src_bits <= 32);
3782 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3784 /* modeB -> X should already be lowered by the lower_mode_b pass */
3785 if (src_mode == mode_b) {
3786 panic("ConvB not lowered %+F", node);
3789 if (src_mode == tgt_mode) {
3790 if (get_Conv_strict(node)) {
3791 if (ia32_cg_config.use_sse2) {
3792 /* when we are in SSE mode, we can kill all strict no-op conversion */
3793 return be_transform_node(op);
3796 /* this should be optimized already, but who knows... */
3797 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3798 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3799 return be_transform_node(op);
3803 if (mode_is_float(src_mode)) {
3804 new_op = be_transform_node(op);
3805 /* we convert from float ... */
3806 if (mode_is_float(tgt_mode)) {
3808 if (ia32_cg_config.use_sse2) {
3809 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3810 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3812 set_ia32_ls_mode(res, tgt_mode);
3814 if (get_Conv_strict(node)) {
3815 /* if fp_no_float_fold is not set then we assume that we
3816 * don't have any float operations in a non
3817 * mode_float_arithmetic mode and can skip strict upconvs */
3818 if (src_bits < tgt_bits
3819 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3820 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3823 res = gen_x87_strict_conv(tgt_mode, new_op);
3824 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3828 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3833 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3834 if (ia32_cg_config.use_sse2) {
3835 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3837 set_ia32_ls_mode(res, src_mode);
3839 return gen_x87_fp_to_gp(node);
3843 /* we convert from int ... */
3844 if (mode_is_float(tgt_mode)) {
3846 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3847 if (ia32_cg_config.use_sse2) {
3848 new_op = be_transform_node(op);
3849 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3851 set_ia32_ls_mode(res, tgt_mode);
3853 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3854 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3855 res = gen_x87_gp_to_fp(node, src_mode);
3857 /* we need a strict-Conv, if the int mode has more bits than the
3859 if (float_mantissa < int_mantissa) {
3860 res = gen_x87_strict_conv(tgt_mode, res);
3861 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3865 } else if (tgt_mode == mode_b) {
3866 /* mode_b lowering already took care that we only have 0/1 values */
3867 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3868 src_mode, tgt_mode));
3869 return be_transform_node(op);
3872 if (src_bits == tgt_bits) {
3873 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3874 src_mode, tgt_mode));
3875 return be_transform_node(op);
3878 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3886 static ir_node *create_immediate_or_transform(ir_node *node,
3887 char immediate_constraint_type)
3889 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3890 if (new_node == NULL) {
3891 new_node = be_transform_node(node);
3897 * Transforms a FrameAddr into an ia32 Add.
3899 static ir_node *gen_be_FrameAddr(ir_node *node)
3901 ir_node *block = be_transform_node(get_nodes_block(node));
3902 ir_node *op = be_get_FrameAddr_frame(node);
3903 ir_node *new_op = be_transform_node(op);
3904 dbg_info *dbgi = get_irn_dbg_info(node);
3907 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3908 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3909 set_ia32_use_frame(new_node);
3911 SET_IA32_ORIG_NODE(new_node, node);
3917 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3919 static ir_node *gen_be_Return(ir_node *node)
3921 ir_graph *irg = current_ir_graph;
3922 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3923 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3924 ir_entity *ent = get_irg_entity(irg);
3925 ir_type *tp = get_entity_type(ent);
3930 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3931 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3933 int pn_ret_val, pn_ret_mem, arity, i;
3935 assert(ret_val != NULL);
3936 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3937 return be_duplicate_node(node);
3940 res_type = get_method_res_type(tp, 0);
3942 if (! is_Primitive_type(res_type)) {
3943 return be_duplicate_node(node);
3946 mode = get_type_mode(res_type);
3947 if (! mode_is_float(mode)) {
3948 return be_duplicate_node(node);
3951 assert(get_method_n_ress(tp) == 1);
3953 pn_ret_val = get_Proj_proj(ret_val);
3954 pn_ret_mem = get_Proj_proj(ret_mem);
3956 /* get the Barrier */
3957 barrier = get_Proj_pred(ret_val);
3959 /* get result input of the Barrier */
3960 ret_val = get_irn_n(barrier, pn_ret_val);
3961 new_ret_val = be_transform_node(ret_val);
3963 /* get memory input of the Barrier */
3964 ret_mem = get_irn_n(barrier, pn_ret_mem);
3965 new_ret_mem = be_transform_node(ret_mem);
3967 frame = get_irg_frame(irg);
3969 dbgi = get_irn_dbg_info(barrier);
3970 block = be_transform_node(get_nodes_block(barrier));
3972 /* store xmm0 onto stack */
3973 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3974 new_ret_mem, new_ret_val);
3975 set_ia32_ls_mode(sse_store, mode);
3976 set_ia32_op_type(sse_store, ia32_AddrModeD);
3977 set_ia32_use_frame(sse_store);
3979 /* load into x87 register */
3980 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3981 set_ia32_op_type(fld, ia32_AddrModeS);
3982 set_ia32_use_frame(fld);
3984 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3985 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3987 /* create a new barrier */
3988 arity = get_irn_arity(barrier);
3989 in = ALLOCAN(ir_node*, arity);
3990 for (i = 0; i < arity; ++i) {
3993 if (i == pn_ret_val) {
3995 } else if (i == pn_ret_mem) {
3998 ir_node *in = get_irn_n(barrier, i);
3999 new_in = be_transform_node(in);
4004 new_barrier = new_ir_node(dbgi, irg, block,
4005 get_irn_op(barrier), get_irn_mode(barrier),
4007 copy_node_attr(irg, barrier, new_barrier);
4008 be_duplicate_deps(barrier, new_barrier);
4009 be_set_transformed_node(barrier, new_barrier);
4011 /* transform normally */
4012 return be_duplicate_node(node);
4016 * Transform a be_AddSP into an ia32_SubSP.
4018 static ir_node *gen_be_AddSP(ir_node *node)
4020 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4021 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4023 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4024 match_am | match_immediate);
4028 * Transform a be_SubSP into an ia32_AddSP
4030 static ir_node *gen_be_SubSP(ir_node *node)
4032 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4033 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4035 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4036 match_am | match_immediate);
4040 * Change some phi modes
4042 static ir_node *gen_Phi(ir_node *node)
4044 const arch_register_req_t *req;
4045 ir_node *block = be_transform_node(get_nodes_block(node));
4046 ir_graph *irg = current_ir_graph;
4047 dbg_info *dbgi = get_irn_dbg_info(node);
4048 ir_mode *mode = get_irn_mode(node);
4051 if (ia32_mode_needs_gp_reg(mode)) {
4052 /* we shouldn't have any 64bit stuff around anymore */
4053 assert(get_mode_size_bits(mode) <= 32);
4054 /* all integer operations are on 32bit registers now */
4056 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4057 } else if (mode_is_float(mode)) {
4058 if (ia32_cg_config.use_sse2) {
4060 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4063 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4066 req = arch_no_register_req;
4069 /* phi nodes allow loops, so we use the old arguments for now
4070 * and fix this later */
4071 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4072 get_irn_in(node) + 1);
4073 copy_node_attr(irg, node, phi);
4074 be_duplicate_deps(node, phi);
4076 arch_set_out_register_req(phi, 0, req);
4078 be_enqueue_preds(node);
4083 static ir_node *gen_Jmp(ir_node *node)
4085 ir_node *block = get_nodes_block(node);
4086 ir_node *new_block = be_transform_node(block);
4087 dbg_info *dbgi = get_irn_dbg_info(node);
4090 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4091 SET_IA32_ORIG_NODE(new_node, node);
4099 static ir_node *gen_IJmp(ir_node *node)
4101 ir_node *block = get_nodes_block(node);
4102 ir_node *new_block = be_transform_node(block);
4103 dbg_info *dbgi = get_irn_dbg_info(node);
4104 ir_node *op = get_IJmp_target(node);
4106 ia32_address_mode_t am;
4107 ia32_address_t *addr = &am.addr;
4109 assert(get_irn_mode(op) == mode_P);
4111 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4113 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4114 addr->mem, am.new_op2);
4115 set_am_attributes(new_node, &am);
4116 SET_IA32_ORIG_NODE(new_node, node);
4118 new_node = fix_mem_proj(new_node, &am);
4124 * Transform a Bound node.
4126 static ir_node *gen_Bound(ir_node *node)
4129 ir_node *lower = get_Bound_lower(node);
4130 dbg_info *dbgi = get_irn_dbg_info(node);
4132 if (is_Const_0(lower)) {
4133 /* typical case for Java */
4134 ir_node *sub, *res, *flags, *block;
4136 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4137 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
4139 block = get_nodes_block(res);
4140 if (! is_Proj(res)) {
4142 set_irn_mode(sub, mode_T);
4143 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4145 sub = get_Proj_pred(res);
4147 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4148 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4149 SET_IA32_ORIG_NODE(new_node, node);
4151 panic("generic Bound not supported in ia32 Backend");
4157 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4159 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4160 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4162 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4163 match_immediate | match_mode_neutral);
4166 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4168 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4169 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4170 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4174 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4176 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4177 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4178 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4182 static ir_node *gen_ia32_l_Add(ir_node *node)
4184 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4185 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4186 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4187 match_commutative | match_am | match_immediate |
4188 match_mode_neutral);
4190 if (is_Proj(lowered)) {
4191 lowered = get_Proj_pred(lowered);
4193 assert(is_ia32_Add(lowered));
4194 set_irn_mode(lowered, mode_T);
4200 static ir_node *gen_ia32_l_Adc(ir_node *node)
4202 return gen_binop_flags(node, new_bd_ia32_Adc,
4203 match_commutative | match_am | match_immediate |
4204 match_mode_neutral);
4208 * Transforms a l_MulS into a "real" MulS node.
4210 * @return the created ia32 Mul node
4212 static ir_node *gen_ia32_l_Mul(ir_node *node)
4214 ir_node *left = get_binop_left(node);
4215 ir_node *right = get_binop_right(node);
4217 return gen_binop(node, left, right, new_bd_ia32_Mul,
4218 match_commutative | match_am | match_mode_neutral);
4222 * Transforms a l_IMulS into a "real" IMul1OPS node.
4224 * @return the created ia32 IMul1OP node
4226 static ir_node *gen_ia32_l_IMul(ir_node *node)
4228 ir_node *left = get_binop_left(node);
4229 ir_node *right = get_binop_right(node);
4231 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4232 match_commutative | match_am | match_mode_neutral);
4235 static ir_node *gen_ia32_l_Sub(ir_node *node)
4237 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4238 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4239 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4240 match_am | match_immediate | match_mode_neutral);
4242 if (is_Proj(lowered)) {
4243 lowered = get_Proj_pred(lowered);
4245 assert(is_ia32_Sub(lowered));
4246 set_irn_mode(lowered, mode_T);
4252 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4254 return gen_binop_flags(node, new_bd_ia32_Sbb,
4255 match_am | match_immediate | match_mode_neutral);
4259 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4260 * op1 - target to be shifted
4261 * op2 - contains bits to be shifted into target
4263 * Only op3 can be an immediate.
4265 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4266 ir_node *low, ir_node *count)
4268 ir_node *block = get_nodes_block(node);
4269 ir_node *new_block = be_transform_node(block);
4270 dbg_info *dbgi = get_irn_dbg_info(node);
4271 ir_node *new_high = be_transform_node(high);
4272 ir_node *new_low = be_transform_node(low);
4276 /* the shift amount can be any mode that is bigger than 5 bits, since all
4277 * other bits are ignored anyway */
4278 while (is_Conv(count) &&
4279 get_irn_n_edges(count) == 1 &&
4280 mode_is_int(get_irn_mode(count))) {
4281 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4282 count = get_Conv_op(count);
4284 new_count = create_immediate_or_transform(count, 0);
4286 if (is_ia32_l_ShlD(node)) {
4287 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4290 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4293 SET_IA32_ORIG_NODE(new_node, node);
4298 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4300 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4301 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4302 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4303 return gen_lowered_64bit_shifts(node, high, low, count);
4306 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4308 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4309 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4310 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4311 return gen_lowered_64bit_shifts(node, high, low, count);
4314 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4316 ir_node *src_block = get_nodes_block(node);
4317 ir_node *block = be_transform_node(src_block);
4318 ir_graph *irg = current_ir_graph;
4319 dbg_info *dbgi = get_irn_dbg_info(node);
4320 ir_node *frame = get_irg_frame(irg);
4321 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4322 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4323 ir_node *new_val_low = be_transform_node(val_low);
4324 ir_node *new_val_high = be_transform_node(val_high);
4326 ir_node *sync, *fild, *res;
4327 ir_node *store_low, *store_high;
4329 if (ia32_cg_config.use_sse2) {
4330 panic("ia32_l_LLtoFloat not implemented for SSE2");
4334 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4336 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4338 SET_IA32_ORIG_NODE(store_low, node);
4339 SET_IA32_ORIG_NODE(store_high, node);
4341 set_ia32_use_frame(store_low);
4342 set_ia32_use_frame(store_high);
4343 set_ia32_op_type(store_low, ia32_AddrModeD);
4344 set_ia32_op_type(store_high, ia32_AddrModeD);
4345 set_ia32_ls_mode(store_low, mode_Iu);
4346 set_ia32_ls_mode(store_high, mode_Is);
4347 add_ia32_am_offs_int(store_high, 4);
4351 sync = new_rd_Sync(dbgi, block, 2, in);
4354 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4356 set_ia32_use_frame(fild);
4357 set_ia32_op_type(fild, ia32_AddrModeS);
4358 set_ia32_ls_mode(fild, mode_Ls);
4360 SET_IA32_ORIG_NODE(fild, node);
4362 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4364 if (! mode_is_signed(get_irn_mode(val_high))) {
4365 ia32_address_mode_t am;
4367 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4370 am.addr.base = get_symconst_base();
4371 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4372 am.addr.mem = nomem;
4375 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4376 am.addr.use_frame = 0;
4377 am.addr.frame_entity = NULL;
4378 am.addr.symconst_sign = 0;
4379 am.ls_mode = mode_F;
4380 am.mem_proj = nomem;
4381 am.op_type = ia32_AddrModeS;
4383 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4384 am.pinned = op_pin_state_floats;
4386 am.ins_permuted = 0;
4388 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4389 am.new_op1, am.new_op2, get_fpcw());
4390 set_am_attributes(fadd, &am);
4392 set_irn_mode(fadd, mode_T);
4393 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4398 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4400 ir_node *src_block = get_nodes_block(node);
4401 ir_node *block = be_transform_node(src_block);
4402 ir_graph *irg = get_Block_irg(block);
4403 dbg_info *dbgi = get_irn_dbg_info(node);
4404 ir_node *frame = get_irg_frame(irg);
4405 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4406 ir_node *new_val = be_transform_node(val);
4407 ir_node *fist, *mem;
4409 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4410 SET_IA32_ORIG_NODE(fist, node);
4411 set_ia32_use_frame(fist);
4412 set_ia32_op_type(fist, ia32_AddrModeD);
4413 set_ia32_ls_mode(fist, mode_Ls);
4419 * the BAD transformer.
4421 static ir_node *bad_transform(ir_node *node)
4423 panic("No transform function for %+F available.", node);
4426 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4428 ir_node *block = be_transform_node(get_nodes_block(node));
4429 ir_graph *irg = get_Block_irg(block);
4430 ir_node *pred = get_Proj_pred(node);
4431 ir_node *new_pred = be_transform_node(pred);
4432 ir_node *frame = get_irg_frame(irg);
4433 dbg_info *dbgi = get_irn_dbg_info(node);
4434 long pn = get_Proj_proj(node);
4439 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4440 SET_IA32_ORIG_NODE(load, node);
4441 set_ia32_use_frame(load);
4442 set_ia32_op_type(load, ia32_AddrModeS);
4443 set_ia32_ls_mode(load, mode_Iu);
4444 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4445 * 32 bit from it with this particular load */
4446 attr = get_ia32_attr(load);
4447 attr->data.need_64bit_stackent = 1;
4449 if (pn == pn_ia32_l_FloattoLL_res_high) {
4450 add_ia32_am_offs_int(load, 4);
4452 assert(pn == pn_ia32_l_FloattoLL_res_low);
4455 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4461 * Transform the Projs of an AddSP.
4463 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4465 ir_node *pred = get_Proj_pred(node);
4466 ir_node *new_pred = be_transform_node(pred);
4467 dbg_info *dbgi = get_irn_dbg_info(node);
4468 long proj = get_Proj_proj(node);
4470 if (proj == pn_be_AddSP_sp) {
4471 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4472 pn_ia32_SubSP_stack);
4473 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4475 } else if (proj == pn_be_AddSP_res) {
4476 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4477 pn_ia32_SubSP_addr);
4478 } else if (proj == pn_be_AddSP_M) {
4479 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4482 panic("No idea how to transform proj->AddSP");
4486 * Transform the Projs of a SubSP.
4488 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4490 ir_node *pred = get_Proj_pred(node);
4491 ir_node *new_pred = be_transform_node(pred);
4492 dbg_info *dbgi = get_irn_dbg_info(node);
4493 long proj = get_Proj_proj(node);
4495 if (proj == pn_be_SubSP_sp) {
4496 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4497 pn_ia32_AddSP_stack);
4498 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4500 } else if (proj == pn_be_SubSP_M) {
4501 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4504 panic("No idea how to transform proj->SubSP");
4508 * Transform and renumber the Projs from a Load.
4510 static ir_node *gen_Proj_Load(ir_node *node)
4513 ir_node *block = be_transform_node(get_nodes_block(node));
4514 ir_node *pred = get_Proj_pred(node);
4515 dbg_info *dbgi = get_irn_dbg_info(node);
4516 long proj = get_Proj_proj(node);
4518 /* loads might be part of source address mode matches, so we don't
4519 * transform the ProjMs yet (with the exception of loads whose result is
4522 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4525 /* this is needed, because sometimes we have loops that are only
4526 reachable through the ProjM */
4527 be_enqueue_preds(node);
4528 /* do it in 2 steps, to silence firm verifier */
4529 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4530 set_Proj_proj(res, pn_ia32_mem);
4534 /* renumber the proj */
4535 new_pred = be_transform_node(pred);
4536 if (is_ia32_Load(new_pred)) {
4539 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4541 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4542 case pn_Load_X_regular:
4543 return new_rd_Jmp(dbgi, block);
4544 case pn_Load_X_except:
4545 /* This Load might raise an exception. Mark it. */
4546 set_ia32_exc_label(new_pred, 1);
4547 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4551 } else if (is_ia32_Conv_I2I(new_pred) ||
4552 is_ia32_Conv_I2I8Bit(new_pred)) {
4553 set_irn_mode(new_pred, mode_T);
4554 if (proj == pn_Load_res) {
4555 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4556 } else if (proj == pn_Load_M) {
4557 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4559 } else if (is_ia32_xLoad(new_pred)) {
4562 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4564 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4565 case pn_Load_X_regular:
4566 return new_rd_Jmp(dbgi, block);
4567 case pn_Load_X_except:
4568 /* This Load might raise an exception. Mark it. */
4569 set_ia32_exc_label(new_pred, 1);
4570 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4574 } else if (is_ia32_vfld(new_pred)) {
4577 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4579 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4580 case pn_Load_X_regular:
4581 return new_rd_Jmp(dbgi, block);
4582 case pn_Load_X_except:
4583 /* This Load might raise an exception. Mark it. */
4584 set_ia32_exc_label(new_pred, 1);
4585 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4590 /* can happen for ProJMs when source address mode happened for the
4593 /* however it should not be the result proj, as that would mean the
4594 load had multiple users and should not have been used for
4596 if (proj != pn_Load_M) {
4597 panic("internal error: transformed node not a Load");
4599 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4602 panic("No idea how to transform proj");
4606 * Transform and renumber the Projs from a DivMod like instruction.
4608 static ir_node *gen_Proj_DivMod(ir_node *node)
4610 ir_node *block = be_transform_node(get_nodes_block(node));
4611 ir_node *pred = get_Proj_pred(node);
4612 ir_node *new_pred = be_transform_node(pred);
4613 dbg_info *dbgi = get_irn_dbg_info(node);
4614 long proj = get_Proj_proj(node);
4616 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4618 switch (get_irn_opcode(pred)) {
4622 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4624 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4625 case pn_Div_X_regular:
4626 return new_rd_Jmp(dbgi, block);
4627 case pn_Div_X_except:
4628 set_ia32_exc_label(new_pred, 1);
4629 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4637 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4639 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4640 case pn_Mod_X_except:
4641 set_ia32_exc_label(new_pred, 1);
4642 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4650 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4651 case pn_DivMod_res_div:
4652 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4653 case pn_DivMod_res_mod:
4654 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4655 case pn_DivMod_X_regular:
4656 return new_rd_Jmp(dbgi, block);
4657 case pn_DivMod_X_except:
4658 set_ia32_exc_label(new_pred, 1);
4659 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4668 panic("No idea how to transform proj->DivMod");
4672 * Transform and renumber the Projs from a CopyB.
4674 static ir_node *gen_Proj_CopyB(ir_node *node)
4676 ir_node *pred = get_Proj_pred(node);
4677 ir_node *new_pred = be_transform_node(pred);
4678 dbg_info *dbgi = get_irn_dbg_info(node);
4679 long proj = get_Proj_proj(node);
4682 case pn_CopyB_M_regular:
4683 if (is_ia32_CopyB_i(new_pred)) {
4684 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4685 } else if (is_ia32_CopyB(new_pred)) {
4686 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4693 panic("No idea how to transform proj->CopyB");
4697 * Transform and renumber the Projs from a Quot.
4699 static ir_node *gen_Proj_Quot(ir_node *node)
4701 ir_node *pred = get_Proj_pred(node);
4702 ir_node *new_pred = be_transform_node(pred);
4703 dbg_info *dbgi = get_irn_dbg_info(node);
4704 long proj = get_Proj_proj(node);
4708 if (is_ia32_xDiv(new_pred)) {
4709 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4710 } else if (is_ia32_vfdiv(new_pred)) {
4711 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4715 if (is_ia32_xDiv(new_pred)) {
4716 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4717 } else if (is_ia32_vfdiv(new_pred)) {
4718 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4721 case pn_Quot_X_regular:
4722 case pn_Quot_X_except:
4727 panic("No idea how to transform proj->Quot");
4730 static ir_node *gen_be_Call(ir_node *node)
4732 dbg_info *const dbgi = get_irn_dbg_info(node);
4733 ir_node *const src_block = get_nodes_block(node);
4734 ir_node *const block = be_transform_node(src_block);
4735 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4736 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4737 ir_node *const sp = be_transform_node(src_sp);
4738 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4739 ia32_address_mode_t am;
4740 ia32_address_t *const addr = &am.addr;
4745 ir_node * eax = noreg_GP;
4746 ir_node * ecx = noreg_GP;
4747 ir_node * edx = noreg_GP;
4748 unsigned const pop = be_Call_get_pop(node);
4749 ir_type *const call_tp = be_Call_get_type(node);
4750 int old_no_pic_adjust;
4752 /* Run the x87 simulator if the call returns a float value */
4753 if (get_method_n_ress(call_tp) > 0) {
4754 ir_type *const res_type = get_method_res_type(call_tp, 0);
4755 ir_mode *const res_mode = get_type_mode(res_type);
4757 if (res_mode != NULL && mode_is_float(res_mode)) {
4758 env_cg->do_x87_sim = 1;
4762 /* We do not want be_Call direct calls */
4763 assert(be_Call_get_entity(node) == NULL);
4765 /* special case for PIC trampoline calls */
4766 old_no_pic_adjust = no_pic_adjust;
4767 no_pic_adjust = env_cg->birg->main_env->options->pic;
4769 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4770 match_am | match_immediate);
4772 no_pic_adjust = old_no_pic_adjust;
4774 i = get_irn_arity(node) - 1;
4775 fpcw = be_transform_node(get_irn_n(node, i--));
4776 for (; i >= be_pos_Call_first_arg; --i) {
4777 arch_register_req_t const *const req = arch_get_register_req(node, i);
4778 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4780 assert(req->type == arch_register_req_type_limited);
4781 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4783 switch (*req->limited) {
4784 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4785 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4786 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4787 default: panic("Invalid GP register for register parameter");
4791 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4792 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4793 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4794 set_am_attributes(call, &am);
4795 call = fix_mem_proj(call, &am);
4797 if (get_irn_pinned(node) == op_pin_state_pinned)
4798 set_irn_pinned(call, op_pin_state_pinned);
4800 SET_IA32_ORIG_NODE(call, node);
4802 if (ia32_cg_config.use_sse2) {
4803 /* remember this call for post-processing */
4804 ARR_APP1(ir_node *, call_list, call);
4805 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4812 * Transform Builtin trap
4814 static ir_node *gen_trap(ir_node *node)
4816 dbg_info *dbgi = get_irn_dbg_info(node);
4817 ir_node *block = be_transform_node(get_nodes_block(node));
4818 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4820 return new_bd_ia32_UD2(dbgi, block, mem);
4824 * Transform Builtin debugbreak
4826 static ir_node *gen_debugbreak(ir_node *node)
4828 dbg_info *dbgi = get_irn_dbg_info(node);
4829 ir_node *block = be_transform_node(get_nodes_block(node));
4830 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4832 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4836 * Transform Builtin return_address
4838 static ir_node *gen_return_address(ir_node *node)
4840 ir_node *param = get_Builtin_param(node, 0);
4841 ir_node *frame = get_Builtin_param(node, 1);
4842 dbg_info *dbgi = get_irn_dbg_info(node);
4843 tarval *tv = get_Const_tarval(param);
4844 unsigned long value = get_tarval_long(tv);
4846 ir_node *block = be_transform_node(get_nodes_block(node));
4847 ir_node *ptr = be_transform_node(frame);
4851 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4852 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4853 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4856 /* load the return address from this frame */
4857 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4859 set_irn_pinned(load, get_irn_pinned(node));
4860 set_ia32_op_type(load, ia32_AddrModeS);
4861 set_ia32_ls_mode(load, mode_Iu);
4863 set_ia32_am_offs_int(load, 0);
4864 set_ia32_use_frame(load);
4865 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4867 if (get_irn_pinned(node) == op_pin_state_floats) {
4868 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4869 && pn_ia32_vfld_res == pn_ia32_Load_res
4870 && pn_ia32_Load_res == pn_ia32_res);
4871 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4874 SET_IA32_ORIG_NODE(load, node);
4875 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4879 * Transform Builtin frame_address
4881 static ir_node *gen_frame_address(ir_node *node)
4883 ir_node *param = get_Builtin_param(node, 0);
4884 ir_node *frame = get_Builtin_param(node, 1);
4885 dbg_info *dbgi = get_irn_dbg_info(node);
4886 tarval *tv = get_Const_tarval(param);
4887 unsigned long value = get_tarval_long(tv);
4889 ir_node *block = be_transform_node(get_nodes_block(node));
4890 ir_node *ptr = be_transform_node(frame);
4895 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4896 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4897 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4900 /* load the frame address from this frame */
4901 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4903 set_irn_pinned(load, get_irn_pinned(node));
4904 set_ia32_op_type(load, ia32_AddrModeS);
4905 set_ia32_ls_mode(load, mode_Iu);
4907 ent = ia32_get_frame_address_entity();
4909 set_ia32_am_offs_int(load, 0);
4910 set_ia32_use_frame(load);
4911 set_ia32_frame_ent(load, ent);
4913 /* will fail anyway, but gcc does this: */
4914 set_ia32_am_offs_int(load, 0);
4917 if (get_irn_pinned(node) == op_pin_state_floats) {
4918 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4919 && pn_ia32_vfld_res == pn_ia32_Load_res
4920 && pn_ia32_Load_res == pn_ia32_res);
4921 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4924 SET_IA32_ORIG_NODE(load, node);
4925 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4929 * Transform Builtin frame_address
4931 static ir_node *gen_prefetch(ir_node *node)
4934 ir_node *ptr, *block, *mem, *base, *index;
4935 ir_node *param, *new_node;
4938 ia32_address_t addr;
4940 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4941 /* no prefetch at all, route memory */
4942 return be_transform_node(get_Builtin_mem(node));
4945 param = get_Builtin_param(node, 1);
4946 tv = get_Const_tarval(param);
4947 rw = get_tarval_long(tv);
4949 /* construct load address */
4950 memset(&addr, 0, sizeof(addr));
4951 ptr = get_Builtin_param(node, 0);
4952 ia32_create_address_mode(&addr, ptr, 0);
4959 base = be_transform_node(base);
4962 if (index == NULL) {
4965 index = be_transform_node(index);
4968 dbgi = get_irn_dbg_info(node);
4969 block = be_transform_node(get_nodes_block(node));
4970 mem = be_transform_node(get_Builtin_mem(node));
4972 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4973 /* we have 3DNow!, this was already checked above */
4974 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4975 } else if (ia32_cg_config.use_sse_prefetch) {
4976 /* note: rw == 1 is IGNORED in that case */
4977 param = get_Builtin_param(node, 2);
4978 tv = get_Const_tarval(param);
4979 locality = get_tarval_long(tv);
4981 /* SSE style prefetch */
4984 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4987 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4990 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4993 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4997 assert(ia32_cg_config.use_3dnow_prefetch);
4998 /* 3DNow! style prefetch */
4999 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
5002 set_irn_pinned(new_node, get_irn_pinned(node));
5003 set_ia32_op_type(new_node, ia32_AddrModeS);
5004 set_ia32_ls_mode(new_node, mode_Bu);
5005 set_address(new_node, &addr);
5007 SET_IA32_ORIG_NODE(new_node, node);
5009 be_dep_on_frame(new_node);
5010 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5014 * Transform bsf like node
5016 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5018 ir_node *param = get_Builtin_param(node, 0);
5019 dbg_info *dbgi = get_irn_dbg_info(node);
5021 ir_node *block = get_nodes_block(node);
5022 ir_node *new_block = be_transform_node(block);
5024 ia32_address_mode_t am;
5025 ia32_address_t *addr = &am.addr;
5028 match_arguments(&am, block, NULL, param, NULL, match_am);
5030 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5031 set_am_attributes(cnt, &am);
5032 set_ia32_ls_mode(cnt, get_irn_mode(param));
5034 SET_IA32_ORIG_NODE(cnt, node);
5035 return fix_mem_proj(cnt, &am);
5039 * Transform builtin ffs.
5041 static ir_node *gen_ffs(ir_node *node)
5043 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5044 ir_node *real = skip_Proj(bsf);
5045 dbg_info *dbgi = get_irn_dbg_info(real);
5046 ir_node *block = get_nodes_block(real);
5047 ir_node *flag, *set, *conv, *neg, *or;
5050 if (get_irn_mode(real) != mode_T) {
5051 set_irn_mode(real, mode_T);
5052 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5055 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5058 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5059 SET_IA32_ORIG_NODE(set, node);
5062 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5063 SET_IA32_ORIG_NODE(conv, node);
5066 neg = new_bd_ia32_Neg(dbgi, block, conv);
5069 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5070 set_ia32_commutative(or);
5073 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5077 * Transform builtin clz.
5079 static ir_node *gen_clz(ir_node *node)
5081 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5082 ir_node *real = skip_Proj(bsr);
5083 dbg_info *dbgi = get_irn_dbg_info(real);
5084 ir_node *block = get_nodes_block(real);
5085 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5087 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5091 * Transform builtin ctz.
5093 static ir_node *gen_ctz(ir_node *node)
5095 return gen_unop_AM(node, new_bd_ia32_Bsf);
5099 * Transform builtin parity.
5101 static ir_node *gen_parity(ir_node *node)
5103 ir_node *param = get_Builtin_param(node, 0);
5104 dbg_info *dbgi = get_irn_dbg_info(node);
5106 ir_node *block = get_nodes_block(node);
5108 ir_node *new_block = be_transform_node(block);
5109 ir_node *imm, *cmp, *new_node;
5111 ia32_address_mode_t am;
5112 ia32_address_t *addr = &am.addr;
5116 match_arguments(&am, block, NULL, param, NULL, match_am);
5117 imm = ia32_create_Immediate(NULL, 0, 0);
5118 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5119 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5120 set_am_attributes(cmp, &am);
5121 set_ia32_ls_mode(cmp, mode_Iu);
5123 SET_IA32_ORIG_NODE(cmp, node);
5125 cmp = fix_mem_proj(cmp, &am);
5128 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5129 SET_IA32_ORIG_NODE(new_node, node);
5132 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5133 nomem, new_node, mode_Bu);
5134 SET_IA32_ORIG_NODE(new_node, node);
5139 * Transform builtin popcount
5141 static ir_node *gen_popcount(ir_node *node)
5143 ir_node *param = get_Builtin_param(node, 0);
5144 dbg_info *dbgi = get_irn_dbg_info(node);
5146 ir_node *block = get_nodes_block(node);
5147 ir_node *new_block = be_transform_node(block);
5150 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5152 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5153 if (ia32_cg_config.use_popcnt) {
5154 ia32_address_mode_t am;
5155 ia32_address_t *addr = &am.addr;
5158 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5160 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5161 set_am_attributes(cnt, &am);
5162 set_ia32_ls_mode(cnt, get_irn_mode(param));
5164 SET_IA32_ORIG_NODE(cnt, node);
5165 return fix_mem_proj(cnt, &am);
5168 new_param = be_transform_node(param);
5170 /* do the standard popcount algo */
5172 /* m1 = x & 0x55555555 */
5173 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5174 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5177 simm = ia32_create_Immediate(NULL, 0, 1);
5178 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5180 /* m2 = s1 & 0x55555555 */
5181 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5184 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5186 /* m4 = m3 & 0x33333333 */
5187 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5188 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5191 simm = ia32_create_Immediate(NULL, 0, 2);
5192 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5194 /* m5 = s2 & 0x33333333 */
5195 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5198 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5200 /* m7 = m6 & 0x0F0F0F0F */
5201 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5202 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5205 simm = ia32_create_Immediate(NULL, 0, 4);
5206 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5208 /* m8 = s3 & 0x0F0F0F0F */
5209 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5212 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5214 /* m10 = m9 & 0x00FF00FF */
5215 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5216 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5219 simm = ia32_create_Immediate(NULL, 0, 8);
5220 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5222 /* m11 = s4 & 0x00FF00FF */
5223 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5225 /* m12 = m10 + m11 */
5226 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5228 /* m13 = m12 & 0x0000FFFF */
5229 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5230 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5232 /* s5 = m12 >> 16 */
5233 simm = ia32_create_Immediate(NULL, 0, 16);
5234 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5236 /* res = m13 + s5 */
5237 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5241 * Transform builtin byte swap.
5243 static ir_node *gen_bswap(ir_node *node)
5245 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5246 dbg_info *dbgi = get_irn_dbg_info(node);
5248 ir_node *block = get_nodes_block(node);
5249 ir_node *new_block = be_transform_node(block);
5250 ir_mode *mode = get_irn_mode(param);
5251 unsigned size = get_mode_size_bits(mode);
5252 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5256 if (ia32_cg_config.use_i486) {
5257 /* swap available */
5258 return new_bd_ia32_Bswap(dbgi, new_block, param);
5260 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5261 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5263 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5264 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5266 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5268 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5269 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5271 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5272 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5275 /* swap16 always available */
5276 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5279 panic("Invalid bswap size (%d)", size);
5284 * Transform builtin outport.
5286 static ir_node *gen_outport(ir_node *node)
5288 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5289 ir_node *oldv = get_Builtin_param(node, 1);
5290 ir_mode *mode = get_irn_mode(oldv);
5291 ir_node *value = be_transform_node(oldv);
5292 ir_node *block = be_transform_node(get_nodes_block(node));
5293 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5294 dbg_info *dbgi = get_irn_dbg_info(node);
5296 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5297 set_ia32_ls_mode(res, mode);
5302 * Transform builtin inport.
5304 static ir_node *gen_inport(ir_node *node)
5306 ir_type *tp = get_Builtin_type(node);
5307 ir_type *rstp = get_method_res_type(tp, 0);
5308 ir_mode *mode = get_type_mode(rstp);
5309 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5310 ir_node *block = be_transform_node(get_nodes_block(node));
5311 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5312 dbg_info *dbgi = get_irn_dbg_info(node);
5314 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5315 set_ia32_ls_mode(res, mode);
5317 /* check for missing Result Proj */
5322 * Transform a builtin inner trampoline
5324 static ir_node *gen_inner_trampoline(ir_node *node)
5326 ir_node *ptr = get_Builtin_param(node, 0);
5327 ir_node *callee = get_Builtin_param(node, 1);
5328 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5329 ir_node *mem = get_Builtin_mem(node);
5330 ir_node *block = get_nodes_block(node);
5331 ir_node *new_block = be_transform_node(block);
5335 ir_node *trampoline;
5337 dbg_info *dbgi = get_irn_dbg_info(node);
5338 ia32_address_t addr;
5340 /* construct store address */
5341 memset(&addr, 0, sizeof(addr));
5342 ia32_create_address_mode(&addr, ptr, 0);
5344 if (addr.base == NULL) {
5345 addr.base = noreg_GP;
5347 addr.base = be_transform_node(addr.base);
5350 if (addr.index == NULL) {
5351 addr.index = noreg_GP;
5353 addr.index = be_transform_node(addr.index);
5355 addr.mem = be_transform_node(mem);
5357 /* mov ecx, <env> */
5358 val = ia32_create_Immediate(NULL, 0, 0xB9);
5359 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5360 addr.index, addr.mem, val);
5361 set_irn_pinned(store, get_irn_pinned(node));
5362 set_ia32_op_type(store, ia32_AddrModeD);
5363 set_ia32_ls_mode(store, mode_Bu);
5364 set_address(store, &addr);
5368 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5369 addr.index, addr.mem, env);
5370 set_irn_pinned(store, get_irn_pinned(node));
5371 set_ia32_op_type(store, ia32_AddrModeD);
5372 set_ia32_ls_mode(store, mode_Iu);
5373 set_address(store, &addr);
5377 /* jmp rel <callee> */
5378 val = ia32_create_Immediate(NULL, 0, 0xE9);
5379 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5380 addr.index, addr.mem, val);
5381 set_irn_pinned(store, get_irn_pinned(node));
5382 set_ia32_op_type(store, ia32_AddrModeD);
5383 set_ia32_ls_mode(store, mode_Bu);
5384 set_address(store, &addr);
5388 trampoline = be_transform_node(ptr);
5390 /* the callee is typically an immediate */
5391 if (is_SymConst(callee)) {
5392 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5394 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5396 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5398 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5399 addr.index, addr.mem, rel);
5400 set_irn_pinned(store, get_irn_pinned(node));
5401 set_ia32_op_type(store, ia32_AddrModeD);
5402 set_ia32_ls_mode(store, mode_Iu);
5403 set_address(store, &addr);
5408 return new_r_Tuple(new_block, 2, in);
5412 * Transform Builtin node.
5414 static ir_node *gen_Builtin(ir_node *node)
5416 ir_builtin_kind kind = get_Builtin_kind(node);
5420 return gen_trap(node);
5421 case ir_bk_debugbreak:
5422 return gen_debugbreak(node);
5423 case ir_bk_return_address:
5424 return gen_return_address(node);
5425 case ir_bk_frame_address:
5426 return gen_frame_address(node);
5427 case ir_bk_prefetch:
5428 return gen_prefetch(node);
5430 return gen_ffs(node);
5432 return gen_clz(node);
5434 return gen_ctz(node);
5436 return gen_parity(node);
5437 case ir_bk_popcount:
5438 return gen_popcount(node);
5440 return gen_bswap(node);
5442 return gen_outport(node);
5444 return gen_inport(node);
5445 case ir_bk_inner_trampoline:
5446 return gen_inner_trampoline(node);
5448 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5452 * Transform Proj(Builtin) node.
5454 static ir_node *gen_Proj_Builtin(ir_node *proj)
5456 ir_node *node = get_Proj_pred(proj);
5457 ir_node *new_node = be_transform_node(node);
5458 ir_builtin_kind kind = get_Builtin_kind(node);
5461 case ir_bk_return_address:
5462 case ir_bk_frame_address:
5467 case ir_bk_popcount:
5469 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5472 case ir_bk_debugbreak:
5473 case ir_bk_prefetch:
5475 assert(get_Proj_proj(proj) == pn_Builtin_M);
5478 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5479 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5481 assert(get_Proj_proj(proj) == pn_Builtin_M);
5482 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5484 case ir_bk_inner_trampoline:
5485 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5486 return get_Tuple_pred(new_node, 1);
5488 assert(get_Proj_proj(proj) == pn_Builtin_M);
5489 return get_Tuple_pred(new_node, 0);
5492 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5495 static ir_node *gen_be_IncSP(ir_node *node)
5497 ir_node *res = be_duplicate_node(node);
5498 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5504 * Transform the Projs from a be_Call.
5506 static ir_node *gen_Proj_be_Call(ir_node *node)
5508 ir_node *call = get_Proj_pred(node);
5509 ir_node *new_call = be_transform_node(call);
5510 dbg_info *dbgi = get_irn_dbg_info(node);
5511 long proj = get_Proj_proj(node);
5512 ir_mode *mode = get_irn_mode(node);
5515 if (proj == pn_be_Call_M_regular) {
5516 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5518 /* transform call modes */
5519 if (mode_is_data(mode)) {
5520 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5524 /* Map from be_Call to ia32_Call proj number */
5525 if (proj == pn_be_Call_sp) {
5526 proj = pn_ia32_Call_stack;
5527 } else if (proj == pn_be_Call_M_regular) {
5528 proj = pn_ia32_Call_M;
5530 arch_register_req_t const *const req = arch_get_register_req_out(node);
5531 int const n_outs = arch_irn_get_n_outs(new_call);
5534 assert(proj >= pn_be_Call_first_res);
5535 assert(req->type & arch_register_req_type_limited);
5537 for (i = 0; i < n_outs; ++i) {
5538 arch_register_req_t const *const new_req
5539 = arch_get_out_register_req(new_call, i);
5541 if (!(new_req->type & arch_register_req_type_limited) ||
5542 new_req->cls != req->cls ||
5543 *new_req->limited != *req->limited)
5552 res = new_rd_Proj(dbgi, new_call, mode, proj);
5554 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5556 case pn_ia32_Call_stack:
5557 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5560 case pn_ia32_Call_fpcw:
5561 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5569 * Transform the Projs from a Cmp.
5571 static ir_node *gen_Proj_Cmp(ir_node *node)
5573 /* this probably means not all mode_b nodes were lowered... */
5574 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5579 * Transform the Projs from a Bound.
5581 static ir_node *gen_Proj_Bound(ir_node *node)
5584 ir_node *pred = get_Proj_pred(node);
5586 switch (get_Proj_proj(node)) {
5588 return be_transform_node(get_Bound_mem(pred));
5589 case pn_Bound_X_regular:
5590 new_node = be_transform_node(pred);
5591 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5592 case pn_Bound_X_except:
5593 new_node = be_transform_node(pred);
5594 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5596 return be_transform_node(get_Bound_index(pred));
5598 panic("unsupported Proj from Bound");
5602 static ir_node *gen_Proj_ASM(ir_node *node)
5604 ir_mode *mode = get_irn_mode(node);
5605 ir_node *pred = get_Proj_pred(node);
5606 ir_node *new_pred = be_transform_node(pred);
5607 long pos = get_Proj_proj(node);
5609 if (mode == mode_M) {
5610 pos = arch_irn_get_n_outs(new_pred)-1;
5611 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5613 } else if (mode_is_float(mode)) {
5616 panic("unexpected proj mode at ASM");
5619 return new_r_Proj(new_pred, mode, pos);
5623 * Transform and potentially renumber Proj nodes.
5625 static ir_node *gen_Proj(ir_node *node)
5627 ir_node *pred = get_Proj_pred(node);
5630 switch (get_irn_opcode(pred)) {
5632 proj = get_Proj_proj(node);
5633 if (proj == pn_Store_M) {
5634 return be_transform_node(pred);
5636 panic("No idea how to transform proj->Store");
5639 return gen_Proj_Load(node);
5641 return gen_Proj_ASM(node);
5643 return gen_Proj_Builtin(node);
5647 return gen_Proj_DivMod(node);
5649 return gen_Proj_CopyB(node);
5651 return gen_Proj_Quot(node);
5653 return gen_Proj_be_SubSP(node);
5655 return gen_Proj_be_AddSP(node);
5657 return gen_Proj_be_Call(node);
5659 return gen_Proj_Cmp(node);
5661 return gen_Proj_Bound(node);
5663 proj = get_Proj_proj(node);
5665 case pn_Start_X_initial_exec: {
5666 ir_node *block = get_nodes_block(pred);
5667 ir_node *new_block = be_transform_node(block);
5668 dbg_info *dbgi = get_irn_dbg_info(node);
5669 /* we exchange the ProjX with a jump */
5670 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5675 case pn_Start_P_tls:
5676 return gen_Proj_tls(node);
5681 if (is_ia32_l_FloattoLL(pred)) {
5682 return gen_Proj_l_FloattoLL(node);
5684 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5688 ir_mode *mode = get_irn_mode(node);
5689 if (ia32_mode_needs_gp_reg(mode)) {
5690 ir_node *new_pred = be_transform_node(pred);
5691 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5692 get_Proj_proj(node));
5693 new_proj->node_nr = node->node_nr;
5698 return be_duplicate_node(node);
5702 * Enters all transform functions into the generic pointer
5704 static void register_transformers(void)
5706 /* first clear the generic function pointer for all ops */
5707 clear_irp_opcodes_generic_func();
5709 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5710 #define BAD(a) { op_##a->ops.generic = (op_func)bad_transform; }
5750 /* transform ops from intrinsic lowering */
5762 GEN(ia32_l_LLtoFloat)
5763 GEN(ia32_l_FloattoLL)
5769 /* we should never see these nodes */
5784 /* handle builtins */
5787 /* handle generic backend nodes */
5801 * Pre-transform all unknown and noreg nodes.
5803 static void ia32_pretransform_node(void)
5805 ia32_code_gen_t *cg = env_cg;
5807 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5808 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5809 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5811 nomem = get_irg_no_mem(current_ir_graph);
5812 noreg_GP = ia32_new_NoReg_gp(cg);
5818 * Walker, checks if all ia32 nodes producing more than one result have their
5819 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5821 static void add_missing_keep_walker(ir_node *node, void *data)
5824 unsigned found_projs = 0;
5825 const ir_edge_t *edge;
5826 ir_mode *mode = get_irn_mode(node);
5831 if (!is_ia32_irn(node))
5834 n_outs = arch_irn_get_n_outs(node);
5837 if (is_ia32_SwitchJmp(node))
5840 assert(n_outs < (int) sizeof(unsigned) * 8);
5841 foreach_out_edge(node, edge) {
5842 ir_node *proj = get_edge_src_irn(edge);
5845 /* The node could be kept */
5849 if (get_irn_mode(proj) == mode_M)
5852 pn = get_Proj_proj(proj);
5853 assert(pn < n_outs);
5854 found_projs |= 1 << pn;
5858 /* are keeps missing? */
5860 for (i = 0; i < n_outs; ++i) {
5863 const arch_register_req_t *req;
5864 const arch_register_class_t *cls;
5866 if (found_projs & (1 << i)) {
5870 req = arch_get_out_register_req(node, i);
5875 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5879 block = get_nodes_block(node);
5880 in[0] = new_r_Proj(node, arch_register_class_mode(cls), i);
5881 if (last_keep != NULL) {
5882 be_Keep_add_node(last_keep, cls, in[0]);
5884 last_keep = be_new_Keep(block, 1, in);
5885 if (sched_is_scheduled(node)) {
5886 sched_add_after(node, last_keep);
5893 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5896 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5898 ir_graph *irg = be_get_birg_irg(cg->birg);
5899 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5903 * Post-process all calls if we are in SSE mode.
5904 * The ABI requires that the results are in st0, copy them
5905 * to a xmm register.
5907 static void postprocess_fp_call_results(void)
5911 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5912 ir_node *call = call_list[i];
5913 ir_type *mtp = call_types[i];
5916 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5917 ir_type *res_tp = get_method_res_type(mtp, j);
5918 ir_node *res, *new_res;
5919 const ir_edge_t *edge, *next;
5922 if (! is_atomic_type(res_tp)) {
5923 /* no floating point return */
5926 mode = get_type_mode(res_tp);
5927 if (! mode_is_float(mode)) {
5928 /* no floating point return */
5932 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5935 /* now patch the users */
5936 foreach_out_edge_safe(res, edge, next) {
5937 ir_node *succ = get_edge_src_irn(edge);
5940 if (be_is_Keep(succ))
5943 if (is_ia32_xStore(succ)) {
5944 /* an xStore can be patched into an vfst */
5945 dbg_info *db = get_irn_dbg_info(succ);
5946 ir_node *block = get_nodes_block(succ);
5947 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5948 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5949 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5950 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5951 ir_mode *mode = get_ia32_ls_mode(succ);
5953 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5954 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5955 if (is_ia32_use_frame(succ))
5956 set_ia32_use_frame(st);
5957 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5958 set_irn_pinned(st, get_irn_pinned(succ));
5959 set_ia32_op_type(st, ia32_AddrModeD);
5963 if (new_res == NULL) {
5964 dbg_info *db = get_irn_dbg_info(call);
5965 ir_node *block = get_nodes_block(call);
5966 ir_node *frame = get_irg_frame(current_ir_graph);
5967 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5968 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5969 ir_node *vfst, *xld, *new_mem;
5971 /* store st(0) on stack */
5972 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5973 set_ia32_op_type(vfst, ia32_AddrModeD);
5974 set_ia32_use_frame(vfst);
5976 /* load into SSE register */
5977 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5978 set_ia32_op_type(xld, ia32_AddrModeS);
5979 set_ia32_use_frame(xld);
5981 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5982 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5984 if (old_mem != NULL) {
5985 edges_reroute(old_mem, new_mem, current_ir_graph);
5989 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5996 /* do the transformation */
5997 void ia32_transform_graph(ia32_code_gen_t *cg)
6001 register_transformers();
6003 initial_fpcw = NULL;
6006 be_timer_push(T_HEIGHTS);
6007 heights = heights_new(cg->irg);
6008 be_timer_pop(T_HEIGHTS);
6009 ia32_calculate_non_address_mode_nodes(cg->birg);
6011 /* the transform phase is not safe for CSE (yet) because several nodes get
6012 * attributes set after their creation */
6013 cse_last = get_opt_cse();
6016 call_list = NEW_ARR_F(ir_node *, 0);
6017 call_types = NEW_ARR_F(ir_type *, 0);
6018 be_transform_graph(cg->irg, ia32_pretransform_node);
6020 if (ia32_cg_config.use_sse2)
6021 postprocess_fp_call_results();
6022 DEL_ARR_F(call_types);
6023 DEL_ARR_F(call_list);
6025 set_opt_cse(cse_last);
6027 ia32_free_non_address_mode_nodes();
6028 heights_free(heights);
6032 void ia32_init_transform(void)
6034 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");