2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_map_regs.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_util.h"
67 #include "ia32_address_mode.h"
68 #include "ia32_architecture.h"
70 #include "gen_ia32_regalloc_if.h"
72 /* define this to construct SSE constants instead of load them */
73 #undef CONSTRUCT_SSE_CONST
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
81 #define ULL_BIAS "18446744073709551616"
83 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
84 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
85 #define ENT_SFP_ABS "C_ia32_sfp_abs"
86 #define ENT_DFP_ABS "C_ia32_dfp_abs"
87 #define ENT_ULL_BIAS "C_ia32_ull_bias"
89 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
90 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
92 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
94 static ir_node *initial_fpcw = NULL;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 if (be_get_irg_options(env_cg->irg)->pic) {
204 return arch_code_generator_get_pic_base(env_cg);
211 * Transforms a Const.
213 static ir_node *gen_Const(ir_node *node)
215 ir_node *old_block = get_nodes_block(node);
216 ir_node *block = be_transform_node(old_block);
217 dbg_info *dbgi = get_irn_dbg_info(node);
218 ir_mode *mode = get_irn_mode(node);
220 assert(is_Const(node));
222 if (mode_is_float(mode)) {
228 if (ia32_cg_config.use_sse2) {
229 tarval *tv = get_Const_tarval(node);
230 if (tarval_is_null(tv)) {
231 load = new_bd_ia32_xZero(dbgi, block);
232 set_ia32_ls_mode(load, mode);
234 #ifdef CONSTRUCT_SSE_CONST
235 } else if (tarval_is_one(tv)) {
236 int cnst = mode == mode_F ? 26 : 55;
237 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
238 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
239 ir_node *pslld, *psrld;
241 load = new_bd_ia32_xAllOnes(dbgi, block);
242 set_ia32_ls_mode(load, mode);
243 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
244 set_ia32_ls_mode(pslld, mode);
245 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
246 set_ia32_ls_mode(psrld, mode);
248 #endif /* CONSTRUCT_SSE_CONST */
249 } else if (mode == mode_F) {
250 /* we can place any 32bit constant by using a movd gp, sse */
251 unsigned val = get_tarval_sub_bits(tv, 0) |
252 (get_tarval_sub_bits(tv, 1) << 8) |
253 (get_tarval_sub_bits(tv, 2) << 16) |
254 (get_tarval_sub_bits(tv, 3) << 24);
255 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
256 load = new_bd_ia32_xMovd(dbgi, block, cnst);
257 set_ia32_ls_mode(load, mode);
260 #ifdef CONSTRUCT_SSE_CONST
261 if (mode == mode_D) {
262 unsigned val = get_tarval_sub_bits(tv, 0) |
263 (get_tarval_sub_bits(tv, 1) << 8) |
264 (get_tarval_sub_bits(tv, 2) << 16) |
265 (get_tarval_sub_bits(tv, 3) << 24);
267 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
268 ir_node *cnst, *psllq;
270 /* fine, lower 32bit are zero, produce 32bit value */
271 val = get_tarval_sub_bits(tv, 4) |
272 (get_tarval_sub_bits(tv, 5) << 8) |
273 (get_tarval_sub_bits(tv, 6) << 16) |
274 (get_tarval_sub_bits(tv, 7) << 24);
275 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
276 load = new_bd_ia32_xMovd(dbgi, block, cnst);
277 set_ia32_ls_mode(load, mode);
278 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
279 set_ia32_ls_mode(psllq, mode);
284 #endif /* CONSTRUCT_SSE_CONST */
285 floatent = create_float_const_entity(node);
287 base = get_symconst_base();
288 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
290 set_ia32_op_type(load, ia32_AddrModeS);
291 set_ia32_am_sc(load, floatent);
292 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
293 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
296 if (is_Const_null(node)) {
297 load = new_bd_ia32_vfldz(dbgi, block);
299 set_ia32_ls_mode(load, mode);
300 } else if (is_Const_one(node)) {
301 load = new_bd_ia32_vfld1(dbgi, block);
303 set_ia32_ls_mode(load, mode);
308 floatent = create_float_const_entity(node);
309 /* create_float_const_ent is smart and sometimes creates
311 ls_mode = get_type_mode(get_entity_type(floatent));
312 base = get_symconst_base();
313 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
315 set_ia32_op_type(load, ia32_AddrModeS);
316 set_ia32_am_sc(load, floatent);
317 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
318 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
321 #ifdef CONSTRUCT_SSE_CONST
323 #endif /* CONSTRUCT_SSE_CONST */
324 SET_IA32_ORIG_NODE(load, node);
326 be_dep_on_frame(load);
328 } else { /* non-float mode */
330 tarval *tv = get_Const_tarval(node);
333 tv = tarval_convert_to(tv, mode_Iu);
335 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
337 panic("couldn't convert constant tarval (%+F)", node);
339 val = get_tarval_long(tv);
341 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
342 SET_IA32_ORIG_NODE(cnst, node);
344 be_dep_on_frame(cnst);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
377 SET_IA32_ORIG_NODE(cnst, node);
379 be_dep_on_frame(cnst);
384 * Create a float type for the given mode and cache it.
386 * @param mode the mode for the float type (might be integer mode for SSE2 types)
387 * @param align alignment
389 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
395 if (mode == mode_Iu) {
396 static ir_type *int_Iu[16] = {NULL, };
398 if (int_Iu[align] == NULL) {
399 int_Iu[align] = tp = new_type_primitive(mode);
400 /* set the specified alignment */
401 set_type_alignment_bytes(tp, align);
403 return int_Iu[align];
404 } else if (mode == mode_Lu) {
405 static ir_type *int_Lu[16] = {NULL, };
407 if (int_Lu[align] == NULL) {
408 int_Lu[align] = tp = new_type_primitive(mode);
409 /* set the specified alignment */
410 set_type_alignment_bytes(tp, align);
412 return int_Lu[align];
413 } else if (mode == mode_F) {
414 static ir_type *float_F[16] = {NULL, };
416 if (float_F[align] == NULL) {
417 float_F[align] = tp = new_type_primitive(mode);
418 /* set the specified alignment */
419 set_type_alignment_bytes(tp, align);
421 return float_F[align];
422 } else if (mode == mode_D) {
423 static ir_type *float_D[16] = {NULL, };
425 if (float_D[align] == NULL) {
426 float_D[align] = tp = new_type_primitive(mode);
427 /* set the specified alignment */
428 set_type_alignment_bytes(tp, align);
430 return float_D[align];
432 static ir_type *float_E[16] = {NULL, };
434 if (float_E[align] == NULL) {
435 float_E[align] = tp = new_type_primitive(mode);
436 /* set the specified alignment */
437 set_type_alignment_bytes(tp, align);
439 return float_E[align];
444 * Create a float[2] array type for the given atomic type.
446 * @param tp the atomic type
448 static ir_type *ia32_create_float_array(ir_type *tp)
450 ir_mode *mode = get_type_mode(tp);
451 unsigned align = get_type_alignment_bytes(tp);
456 if (mode == mode_F) {
457 static ir_type *float_F[16] = {NULL, };
459 if (float_F[align] != NULL)
460 return float_F[align];
461 arr = float_F[align] = new_type_array(1, tp);
462 } else if (mode == mode_D) {
463 static ir_type *float_D[16] = {NULL, };
465 if (float_D[align] != NULL)
466 return float_D[align];
467 arr = float_D[align] = new_type_array(1, tp);
469 static ir_type *float_E[16] = {NULL, };
471 if (float_E[align] != NULL)
472 return float_E[align];
473 arr = float_E[align] = new_type_array(1, tp);
475 set_type_alignment_bytes(arr, align);
476 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
477 set_type_state(arr, layout_fixed);
481 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
482 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
484 static const struct {
485 const char *ent_name;
486 const char *cnst_str;
489 } names [ia32_known_const_max] = {
490 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
491 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
492 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
493 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
494 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
496 static ir_entity *ent_cache[ia32_known_const_max];
498 const char *ent_name, *cnst_str;
504 ent_name = names[kct].ent_name;
505 if (! ent_cache[kct]) {
506 cnst_str = names[kct].cnst_str;
508 switch (names[kct].mode) {
509 case 0: mode = mode_Iu; break;
510 case 1: mode = mode_Lu; break;
511 default: mode = mode_F; break;
513 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
514 tp = ia32_create_float_type(mode, names[kct].align);
516 if (kct == ia32_ULLBIAS)
517 tp = ia32_create_float_array(tp);
518 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
520 set_entity_ld_ident(ent, get_entity_ident(ent));
521 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
522 set_entity_visibility(ent, ir_visibility_private);
524 if (kct == ia32_ULLBIAS) {
525 ir_initializer_t *initializer = create_initializer_compound(2);
527 set_initializer_compound_value(initializer, 0,
528 create_initializer_tarval(get_mode_null(mode)));
529 set_initializer_compound_value(initializer, 1,
530 create_initializer_tarval(tv));
532 set_entity_initializer(ent, initializer);
534 set_entity_initializer(ent, create_initializer_tarval(tv));
537 /* cache the entry */
538 ent_cache[kct] = ent;
541 return ent_cache[kct];
545 * return true if the node is a Proj(Load) and could be used in source address
546 * mode for another node. Will return only true if the @p other node is not
547 * dependent on the memory of the Load (for binary operations use the other
548 * input here, for unary operations use NULL).
550 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
551 ir_node *other, ir_node *other2, match_flags_t flags)
556 /* float constants are always available */
557 if (is_Const(node)) {
558 ir_mode *mode = get_irn_mode(node);
559 if (mode_is_float(mode)) {
560 if (ia32_cg_config.use_sse2) {
561 if (is_simple_sse_Const(node))
564 if (is_simple_x87_Const(node))
567 if (get_irn_n_edges(node) > 1)
575 load = get_Proj_pred(node);
576 pn = get_Proj_proj(node);
577 if (!is_Load(load) || pn != pn_Load_res)
579 if (get_nodes_block(load) != block)
581 /* we only use address mode if we're the only user of the load */
582 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
584 /* in some edge cases with address mode we might reach the load normally
585 * and through some AM sequence, if it is already materialized then we
586 * can't create an AM node from it */
587 if (be_is_transformed(node))
590 /* don't do AM if other node inputs depend on the load (via mem-proj) */
591 if (other != NULL && prevents_AM(block, load, other))
594 if (other2 != NULL && prevents_AM(block, load, other2))
600 typedef struct ia32_address_mode_t ia32_address_mode_t;
601 struct ia32_address_mode_t {
606 ia32_op_type_t op_type;
610 unsigned commutative : 1;
611 unsigned ins_permuted : 1;
614 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
616 /* construct load address */
617 memset(addr, 0, sizeof(addr[0]));
618 ia32_create_address_mode(addr, ptr, 0);
620 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
621 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
622 addr->mem = be_transform_node(mem);
625 static void build_address(ia32_address_mode_t *am, ir_node *node,
626 ia32_create_am_flags_t flags)
628 ia32_address_t *addr = &am->addr;
634 /* floating point immediates */
635 if (is_Const(node)) {
636 ir_entity *entity = create_float_const_entity(node);
637 addr->base = get_symconst_base();
638 addr->index = noreg_GP;
640 addr->symconst_ent = entity;
642 am->ls_mode = get_type_mode(get_entity_type(entity));
643 am->pinned = op_pin_state_floats;
647 load = get_Proj_pred(node);
648 ptr = get_Load_ptr(load);
649 mem = get_Load_mem(load);
650 new_mem = be_transform_node(mem);
651 am->pinned = get_irn_pinned(load);
652 am->ls_mode = get_Load_mode(load);
653 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
656 /* construct load address */
657 ia32_create_address_mode(addr, ptr, flags);
659 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
660 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
664 static void set_address(ir_node *node, const ia32_address_t *addr)
666 set_ia32_am_scale(node, addr->scale);
667 set_ia32_am_sc(node, addr->symconst_ent);
668 set_ia32_am_offs_int(node, addr->offset);
669 if (addr->symconst_sign)
670 set_ia32_am_sc_sign(node);
672 set_ia32_use_frame(node);
673 set_ia32_frame_ent(node, addr->frame_entity);
677 * Apply attributes of a given address mode to a node.
679 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
681 set_address(node, &am->addr);
683 set_ia32_op_type(node, am->op_type);
684 set_ia32_ls_mode(node, am->ls_mode);
685 if (am->pinned == op_pin_state_pinned) {
686 /* beware: some nodes are already pinned and did not allow to change the state */
687 if (get_irn_pinned(node) != op_pin_state_pinned)
688 set_irn_pinned(node, op_pin_state_pinned);
691 set_ia32_commutative(node);
695 * Check, if a given node is a Down-Conv, ie. a integer Conv
696 * from a mode with a mode with more bits to a mode with lesser bits.
697 * Moreover, we return only true if the node has not more than 1 user.
699 * @param node the node
700 * @return non-zero if node is a Down-Conv
702 static int is_downconv(const ir_node *node)
710 /* we only want to skip the conv when we're the only user
711 * (because this test is used in the context of address-mode selection
712 * and we don't want to use address mode for multiple users) */
713 if (get_irn_n_edges(node) > 1)
716 src_mode = get_irn_mode(get_Conv_op(node));
717 dest_mode = get_irn_mode(node);
719 ia32_mode_needs_gp_reg(src_mode) &&
720 ia32_mode_needs_gp_reg(dest_mode) &&
721 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
724 /** Skip all Down-Conv's on a given node and return the resulting node. */
725 ir_node *ia32_skip_downconv(ir_node *node)
727 while (is_downconv(node))
728 node = get_Conv_op(node);
733 static bool is_sameconv(ir_node *node)
741 /* we only want to skip the conv when we're the only user
742 * (because this test is used in the context of address-mode selection
743 * and we don't want to use address mode for multiple users) */
744 if (get_irn_n_edges(node) > 1)
747 src_mode = get_irn_mode(get_Conv_op(node));
748 dest_mode = get_irn_mode(node);
750 ia32_mode_needs_gp_reg(src_mode) &&
751 ia32_mode_needs_gp_reg(dest_mode) &&
752 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
755 /** Skip all signedness convs */
756 static ir_node *ia32_skip_sameconv(ir_node *node)
758 while (is_sameconv(node))
759 node = get_Conv_op(node);
764 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
766 ir_mode *mode = get_irn_mode(node);
771 if (mode_is_signed(mode)) {
776 block = get_nodes_block(node);
777 dbgi = get_irn_dbg_info(node);
779 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
783 * matches operands of a node into ia32 addressing/operand modes. This covers
784 * usage of source address mode, immediates, operations with non 32-bit modes,
786 * The resulting data is filled into the @p am struct. block is the block
787 * of the node whose arguments are matched. op1, op2 are the first and second
788 * input that are matched (op1 may be NULL). other_op is another unrelated
789 * input that is not matched! but which is needed sometimes to check if AM
790 * for op1/op2 is legal.
791 * @p flags describes the supported modes of the operation in detail.
793 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
794 ir_node *op1, ir_node *op2, ir_node *other_op,
797 ia32_address_t *addr = &am->addr;
798 ir_mode *mode = get_irn_mode(op2);
799 int mode_bits = get_mode_size_bits(mode);
800 ir_node *new_op1, *new_op2;
802 unsigned commutative;
803 int use_am_and_immediates;
806 memset(am, 0, sizeof(am[0]));
808 commutative = (flags & match_commutative) != 0;
809 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
810 use_am = (flags & match_am) != 0;
811 use_immediate = (flags & match_immediate) != 0;
812 assert(!use_am_and_immediates || use_immediate);
815 assert(!commutative || op1 != NULL);
816 assert(use_am || !(flags & match_8bit_am));
817 assert(use_am || !(flags & match_16bit_am));
819 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
820 (mode_bits == 16 && !(flags & match_16bit_am))) {
824 /* we can simply skip downconvs for mode neutral nodes: the upper bits
825 * can be random for these operations */
826 if (flags & match_mode_neutral) {
827 op2 = ia32_skip_downconv(op2);
829 op1 = ia32_skip_downconv(op1);
832 op2 = ia32_skip_sameconv(op2);
834 op1 = ia32_skip_sameconv(op1);
838 /* match immediates. firm nodes are normalized: constants are always on the
841 if (!(flags & match_try_am) && use_immediate) {
842 new_op2 = try_create_Immediate(op2, 0);
845 if (new_op2 == NULL &&
846 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
847 build_address(am, op2, 0);
848 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
849 if (mode_is_float(mode)) {
850 new_op2 = ia32_new_NoReg_vfp(env_cg);
854 am->op_type = ia32_AddrModeS;
855 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
857 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
859 build_address(am, op1, 0);
861 if (mode_is_float(mode)) {
862 noreg = ia32_new_NoReg_vfp(env_cg);
867 if (new_op2 != NULL) {
870 new_op1 = be_transform_node(op2);
872 am->ins_permuted = 1;
874 am->op_type = ia32_AddrModeS;
877 am->op_type = ia32_Normal;
879 if (flags & match_try_am) {
885 mode = get_irn_mode(op2);
886 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
887 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
889 new_op2 = create_upconv(op2, NULL);
890 am->ls_mode = mode_Iu;
892 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
894 new_op2 = be_transform_node(op2);
895 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
898 if (addr->base == NULL)
899 addr->base = noreg_GP;
900 if (addr->index == NULL)
901 addr->index = noreg_GP;
902 if (addr->mem == NULL)
905 am->new_op1 = new_op1;
906 am->new_op2 = new_op2;
907 am->commutative = commutative;
911 * "Fixes" a node that uses address mode by turning it into mode_T
912 * and returning a pn_ia32_res Proj.
914 * @param node the node
915 * @param am its address mode
917 * @return a Proj(pn_ia32_res) if a memory address mode is used,
920 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
925 if (am->mem_proj == NULL)
928 /* we have to create a mode_T so the old MemProj can attach to us */
929 mode = get_irn_mode(node);
930 load = get_Proj_pred(am->mem_proj);
932 be_set_transformed_node(load, node);
934 if (mode != mode_T) {
935 set_irn_mode(node, mode_T);
936 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
943 * Construct a standard binary operation, set AM and immediate if required.
945 * @param node The original node for which the binop is created
946 * @param op1 The first operand
947 * @param op2 The second operand
948 * @param func The node constructor function
949 * @return The constructed ia32 node.
951 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
952 construct_binop_func *func, match_flags_t flags)
955 ir_node *block, *new_block, *new_node;
956 ia32_address_mode_t am;
957 ia32_address_t *addr = &am.addr;
959 block = get_nodes_block(node);
960 match_arguments(&am, block, op1, op2, NULL, flags);
962 dbgi = get_irn_dbg_info(node);
963 new_block = be_transform_node(block);
964 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
965 am.new_op1, am.new_op2);
966 set_am_attributes(new_node, &am);
967 /* we can't use source address mode anymore when using immediates */
968 if (!(flags & match_am_and_immediates) &&
969 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
970 set_ia32_am_support(new_node, ia32_am_none);
971 SET_IA32_ORIG_NODE(new_node, node);
973 new_node = fix_mem_proj(new_node, &am);
979 * Generic names for the inputs of an ia32 binary op.
982 n_ia32_l_binop_left, /**< ia32 left input */
983 n_ia32_l_binop_right, /**< ia32 right input */
984 n_ia32_l_binop_eflags /**< ia32 eflags input */
986 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
987 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
988 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
989 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
990 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
991 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
994 * Construct a binary operation which also consumes the eflags.
996 * @param node The node to transform
997 * @param func The node constructor function
998 * @param flags The match flags
999 * @return The constructor ia32 node
1001 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1002 match_flags_t flags)
1004 ir_node *src_block = get_nodes_block(node);
1005 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1006 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1007 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1009 ir_node *block, *new_node, *new_eflags;
1010 ia32_address_mode_t am;
1011 ia32_address_t *addr = &am.addr;
1013 match_arguments(&am, src_block, op1, op2, eflags, flags);
1015 dbgi = get_irn_dbg_info(node);
1016 block = be_transform_node(src_block);
1017 new_eflags = be_transform_node(eflags);
1018 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1019 am.new_op1, am.new_op2, new_eflags);
1020 set_am_attributes(new_node, &am);
1021 /* we can't use source address mode anymore when using immediates */
1022 if (!(flags & match_am_and_immediates) &&
1023 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1024 set_ia32_am_support(new_node, ia32_am_none);
1025 SET_IA32_ORIG_NODE(new_node, node);
1027 new_node = fix_mem_proj(new_node, &am);
1032 static ir_node *get_fpcw(void)
1035 if (initial_fpcw != NULL)
1036 return initial_fpcw;
1038 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(env_cg->irg),
1039 &ia32_fp_cw_regs[REG_FPCW]);
1040 initial_fpcw = be_transform_node(fpcw);
1042 return initial_fpcw;
1046 * Construct a standard binary operation, set AM and immediate if required.
1048 * @param op1 The first operand
1049 * @param op2 The second operand
1050 * @param func The node constructor function
1051 * @return The constructed ia32 node.
1053 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1054 construct_binop_float_func *func)
1056 ir_mode *mode = get_irn_mode(node);
1058 ir_node *block, *new_block, *new_node;
1059 ia32_address_mode_t am;
1060 ia32_address_t *addr = &am.addr;
1061 ia32_x87_attr_t *attr;
1062 /* All operations are considered commutative, because there are reverse
1064 match_flags_t flags = match_commutative;
1066 /* happens for div nodes... */
1068 mode = get_divop_resmod(node);
1070 /* cannot use address mode with long double on x87 */
1071 if (get_mode_size_bits(mode) <= 64)
1074 block = get_nodes_block(node);
1075 match_arguments(&am, block, op1, op2, NULL, flags);
1077 dbgi = get_irn_dbg_info(node);
1078 new_block = be_transform_node(block);
1079 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1080 am.new_op1, am.new_op2, get_fpcw());
1081 set_am_attributes(new_node, &am);
1083 attr = get_ia32_x87_attr(new_node);
1084 attr->attr.data.ins_permuted = am.ins_permuted;
1086 SET_IA32_ORIG_NODE(new_node, node);
1088 new_node = fix_mem_proj(new_node, &am);
1094 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1096 * @param op1 The first operand
1097 * @param op2 The second operand
1098 * @param func The node constructor function
1099 * @return The constructed ia32 node.
1101 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1102 construct_shift_func *func,
1103 match_flags_t flags)
1106 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1108 assert(! mode_is_float(get_irn_mode(node)));
1109 assert(flags & match_immediate);
1110 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1112 if (flags & match_mode_neutral) {
1113 op1 = ia32_skip_downconv(op1);
1114 new_op1 = be_transform_node(op1);
1115 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1116 new_op1 = create_upconv(op1, node);
1118 new_op1 = be_transform_node(op1);
1121 /* the shift amount can be any mode that is bigger than 5 bits, since all
1122 * other bits are ignored anyway */
1123 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1124 ir_node *const op = get_Conv_op(op2);
1125 if (mode_is_float(get_irn_mode(op)))
1128 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1130 new_op2 = create_immediate_or_transform(op2, 0);
1132 dbgi = get_irn_dbg_info(node);
1133 block = get_nodes_block(node);
1134 new_block = be_transform_node(block);
1135 new_node = func(dbgi, new_block, new_op1, new_op2);
1136 SET_IA32_ORIG_NODE(new_node, node);
1138 /* lowered shift instruction may have a dependency operand, handle it here */
1139 if (get_irn_arity(node) == 3) {
1140 /* we have a dependency */
1141 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1142 add_irn_dep(new_node, new_dep);
1150 * Construct a standard unary operation, set AM and immediate if required.
1152 * @param op The operand
1153 * @param func The node constructor function
1154 * @return The constructed ia32 node.
1156 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1157 match_flags_t flags)
1160 ir_node *block, *new_block, *new_op, *new_node;
1162 assert(flags == 0 || flags == match_mode_neutral);
1163 if (flags & match_mode_neutral) {
1164 op = ia32_skip_downconv(op);
1167 new_op = be_transform_node(op);
1168 dbgi = get_irn_dbg_info(node);
1169 block = get_nodes_block(node);
1170 new_block = be_transform_node(block);
1171 new_node = func(dbgi, new_block, new_op);
1173 SET_IA32_ORIG_NODE(new_node, node);
1178 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1179 ia32_address_t *addr)
1181 ir_node *base, *index, *res;
1187 base = be_transform_node(base);
1190 index = addr->index;
1191 if (index == NULL) {
1194 index = be_transform_node(index);
1197 res = new_bd_ia32_Lea(dbgi, block, base, index);
1198 set_address(res, addr);
1204 * Returns non-zero if a given address mode has a symbolic or
1205 * numerical offset != 0.
1207 static int am_has_immediates(const ia32_address_t *addr)
1209 return addr->offset != 0 || addr->symconst_ent != NULL
1210 || addr->frame_entity || addr->use_frame;
1214 * Creates an ia32 Add.
1216 * @return the created ia32 Add node
1218 static ir_node *gen_Add(ir_node *node)
1220 ir_mode *mode = get_irn_mode(node);
1221 ir_node *op1 = get_Add_left(node);
1222 ir_node *op2 = get_Add_right(node);
1224 ir_node *block, *new_block, *new_node, *add_immediate_op;
1225 ia32_address_t addr;
1226 ia32_address_mode_t am;
1228 if (mode_is_float(mode)) {
1229 if (ia32_cg_config.use_sse2)
1230 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1231 match_commutative | match_am);
1233 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1236 ia32_mark_non_am(node);
1238 op2 = ia32_skip_downconv(op2);
1239 op1 = ia32_skip_downconv(op1);
1243 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1244 * 1. Add with immediate -> Lea
1245 * 2. Add with possible source address mode -> Add
1246 * 3. Otherwise -> Lea
1248 memset(&addr, 0, sizeof(addr));
1249 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1250 add_immediate_op = NULL;
1252 dbgi = get_irn_dbg_info(node);
1253 block = get_nodes_block(node);
1254 new_block = be_transform_node(block);
1257 if (addr.base == NULL && addr.index == NULL) {
1258 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1259 addr.symconst_sign, 0, addr.offset);
1260 be_dep_on_frame(new_node);
1261 SET_IA32_ORIG_NODE(new_node, node);
1264 /* add with immediate? */
1265 if (addr.index == NULL) {
1266 add_immediate_op = addr.base;
1267 } else if (addr.base == NULL && addr.scale == 0) {
1268 add_immediate_op = addr.index;
1271 if (add_immediate_op != NULL) {
1272 if (!am_has_immediates(&addr)) {
1273 #ifdef DEBUG_libfirm
1274 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1277 return be_transform_node(add_immediate_op);
1280 new_node = create_lea_from_address(dbgi, new_block, &addr);
1281 SET_IA32_ORIG_NODE(new_node, node);
1285 /* test if we can use source address mode */
1286 match_arguments(&am, block, op1, op2, NULL, match_commutative
1287 | match_mode_neutral | match_am | match_immediate | match_try_am);
1289 /* construct an Add with source address mode */
1290 if (am.op_type == ia32_AddrModeS) {
1291 ia32_address_t *am_addr = &am.addr;
1292 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1293 am_addr->index, am_addr->mem, am.new_op1,
1295 set_am_attributes(new_node, &am);
1296 SET_IA32_ORIG_NODE(new_node, node);
1298 new_node = fix_mem_proj(new_node, &am);
1303 /* otherwise construct a lea */
1304 new_node = create_lea_from_address(dbgi, new_block, &addr);
1305 SET_IA32_ORIG_NODE(new_node, node);
1310 * Creates an ia32 Mul.
1312 * @return the created ia32 Mul node
1314 static ir_node *gen_Mul(ir_node *node)
1316 ir_node *op1 = get_Mul_left(node);
1317 ir_node *op2 = get_Mul_right(node);
1318 ir_mode *mode = get_irn_mode(node);
1320 if (mode_is_float(mode)) {
1321 if (ia32_cg_config.use_sse2)
1322 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1323 match_commutative | match_am);
1325 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1327 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1328 match_commutative | match_am | match_mode_neutral |
1329 match_immediate | match_am_and_immediates);
1333 * Creates an ia32 Mulh.
1334 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1335 * this result while Mul returns the lower 32 bit.
1337 * @return the created ia32 Mulh node
1339 static ir_node *gen_Mulh(ir_node *node)
1341 dbg_info *dbgi = get_irn_dbg_info(node);
1342 ir_node *op1 = get_Mulh_left(node);
1343 ir_node *op2 = get_Mulh_right(node);
1344 ir_mode *mode = get_irn_mode(node);
1346 ir_node *proj_res_high;
1348 if (get_mode_size_bits(mode) != 32) {
1349 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1352 if (mode_is_signed(mode)) {
1353 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1354 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1356 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1357 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1359 return proj_res_high;
1363 * Creates an ia32 And.
1365 * @return The created ia32 And node
1367 static ir_node *gen_And(ir_node *node)
1369 ir_node *op1 = get_And_left(node);
1370 ir_node *op2 = get_And_right(node);
1371 assert(! mode_is_float(get_irn_mode(node)));
1373 /* is it a zero extension? */
1374 if (is_Const(op2)) {
1375 tarval *tv = get_Const_tarval(op2);
1376 long v = get_tarval_long(tv);
1378 if (v == 0xFF || v == 0xFFFF) {
1379 dbg_info *dbgi = get_irn_dbg_info(node);
1380 ir_node *block = get_nodes_block(node);
1387 assert(v == 0xFFFF);
1390 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1395 return gen_binop(node, op1, op2, new_bd_ia32_And,
1396 match_commutative | match_mode_neutral | match_am | match_immediate);
1402 * Creates an ia32 Or.
1404 * @return The created ia32 Or node
1406 static ir_node *gen_Or(ir_node *node)
1408 ir_node *op1 = get_Or_left(node);
1409 ir_node *op2 = get_Or_right(node);
1411 assert (! mode_is_float(get_irn_mode(node)));
1412 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1413 | match_mode_neutral | match_am | match_immediate);
1419 * Creates an ia32 Eor.
1421 * @return The created ia32 Eor node
1423 static ir_node *gen_Eor(ir_node *node)
1425 ir_node *op1 = get_Eor_left(node);
1426 ir_node *op2 = get_Eor_right(node);
1428 assert(! mode_is_float(get_irn_mode(node)));
1429 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1430 | match_mode_neutral | match_am | match_immediate);
1435 * Creates an ia32 Sub.
1437 * @return The created ia32 Sub node
1439 static ir_node *gen_Sub(ir_node *node)
1441 ir_node *op1 = get_Sub_left(node);
1442 ir_node *op2 = get_Sub_right(node);
1443 ir_mode *mode = get_irn_mode(node);
1445 if (mode_is_float(mode)) {
1446 if (ia32_cg_config.use_sse2)
1447 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1449 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1452 if (is_Const(op2)) {
1453 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1457 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1458 | match_am | match_immediate);
1461 static ir_node *transform_AM_mem(ir_node *const block,
1462 ir_node *const src_val,
1463 ir_node *const src_mem,
1464 ir_node *const am_mem)
1466 if (is_NoMem(am_mem)) {
1467 return be_transform_node(src_mem);
1468 } else if (is_Proj(src_val) &&
1470 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1471 /* avoid memory loop */
1473 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1474 ir_node *const ptr_pred = get_Proj_pred(src_val);
1475 int const arity = get_Sync_n_preds(src_mem);
1480 NEW_ARR_A(ir_node*, ins, arity + 1);
1482 /* NOTE: This sometimes produces dead-code because the old sync in
1483 * src_mem might not be used anymore, we should detect this case
1484 * and kill the sync... */
1485 for (i = arity - 1; i >= 0; --i) {
1486 ir_node *const pred = get_Sync_pred(src_mem, i);
1488 /* avoid memory loop */
1489 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1492 ins[n++] = be_transform_node(pred);
1497 return new_r_Sync(block, n, ins);
1501 ins[0] = be_transform_node(src_mem);
1503 return new_r_Sync(block, 2, ins);
1508 * Create a 32bit to 64bit signed extension.
1510 * @param dbgi debug info
1511 * @param block the block where node nodes should be placed
1512 * @param val the value to extend
1513 * @param orig the original node
1515 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1516 ir_node *val, const ir_node *orig)
1521 if (ia32_cg_config.use_short_sex_eax) {
1522 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1523 be_dep_on_frame(pval);
1524 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1526 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1527 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1529 SET_IA32_ORIG_NODE(res, orig);
1534 * Generates an ia32 DivMod with additional infrastructure for the
1535 * register allocator if needed.
1537 static ir_node *create_Div(ir_node *node)
1539 dbg_info *dbgi = get_irn_dbg_info(node);
1540 ir_node *block = get_nodes_block(node);
1541 ir_node *new_block = be_transform_node(block);
1548 ir_node *sign_extension;
1549 ia32_address_mode_t am;
1550 ia32_address_t *addr = &am.addr;
1552 /* the upper bits have random contents for smaller modes */
1553 switch (get_irn_opcode(node)) {
1555 op1 = get_Div_left(node);
1556 op2 = get_Div_right(node);
1557 mem = get_Div_mem(node);
1558 mode = get_Div_resmode(node);
1561 op1 = get_Mod_left(node);
1562 op2 = get_Mod_right(node);
1563 mem = get_Mod_mem(node);
1564 mode = get_Mod_resmode(node);
1567 op1 = get_DivMod_left(node);
1568 op2 = get_DivMod_right(node);
1569 mem = get_DivMod_mem(node);
1570 mode = get_DivMod_resmode(node);
1573 panic("invalid divmod node %+F", node);
1576 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1578 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1579 is the memory of the consumed address. We can have only the second op as address
1580 in Div nodes, so check only op2. */
1581 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1583 if (mode_is_signed(mode)) {
1584 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1585 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1586 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1588 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1589 be_dep_on_frame(sign_extension);
1591 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1592 addr->index, new_mem, am.new_op2,
1593 am.new_op1, sign_extension);
1596 set_irn_pinned(new_node, get_irn_pinned(node));
1598 set_am_attributes(new_node, &am);
1599 SET_IA32_ORIG_NODE(new_node, node);
1601 new_node = fix_mem_proj(new_node, &am);
1607 * Generates an ia32 Mod.
1609 static ir_node *gen_Mod(ir_node *node)
1611 return create_Div(node);
1615 * Generates an ia32 Div.
1617 static ir_node *gen_Div(ir_node *node)
1619 return create_Div(node);
1623 * Generates an ia32 DivMod.
1625 static ir_node *gen_DivMod(ir_node *node)
1627 return create_Div(node);
1633 * Creates an ia32 floating Div.
1635 * @return The created ia32 xDiv node
1637 static ir_node *gen_Quot(ir_node *node)
1639 ir_node *op1 = get_Quot_left(node);
1640 ir_node *op2 = get_Quot_right(node);
1642 if (ia32_cg_config.use_sse2) {
1643 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1645 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1651 * Creates an ia32 Shl.
1653 * @return The created ia32 Shl node
1655 static ir_node *gen_Shl(ir_node *node)
1657 ir_node *left = get_Shl_left(node);
1658 ir_node *right = get_Shl_right(node);
1660 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1661 match_mode_neutral | match_immediate);
1665 * Creates an ia32 Shr.
1667 * @return The created ia32 Shr node
1669 static ir_node *gen_Shr(ir_node *node)
1671 ir_node *left = get_Shr_left(node);
1672 ir_node *right = get_Shr_right(node);
1674 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1680 * Creates an ia32 Sar.
1682 * @return The created ia32 Shrs node
1684 static ir_node *gen_Shrs(ir_node *node)
1686 ir_node *left = get_Shrs_left(node);
1687 ir_node *right = get_Shrs_right(node);
1689 if (is_Const(right)) {
1690 tarval *tv = get_Const_tarval(right);
1691 long val = get_tarval_long(tv);
1693 /* this is a sign extension */
1694 dbg_info *dbgi = get_irn_dbg_info(node);
1695 ir_node *block = be_transform_node(get_nodes_block(node));
1696 ir_node *new_op = be_transform_node(left);
1698 return create_sex_32_64(dbgi, block, new_op, node);
1702 /* 8 or 16 bit sign extension? */
1703 if (is_Const(right) && is_Shl(left)) {
1704 ir_node *shl_left = get_Shl_left(left);
1705 ir_node *shl_right = get_Shl_right(left);
1706 if (is_Const(shl_right)) {
1707 tarval *tv1 = get_Const_tarval(right);
1708 tarval *tv2 = get_Const_tarval(shl_right);
1709 if (tv1 == tv2 && tarval_is_long(tv1)) {
1710 long val = get_tarval_long(tv1);
1711 if (val == 16 || val == 24) {
1712 dbg_info *dbgi = get_irn_dbg_info(node);
1713 ir_node *block = get_nodes_block(node);
1723 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1732 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1738 * Creates an ia32 Rol.
1740 * @param op1 The first operator
1741 * @param op2 The second operator
1742 * @return The created ia32 RotL node
1744 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1746 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1752 * Creates an ia32 Ror.
1753 * NOTE: There is no RotR with immediate because this would always be a RotL
1754 * "imm-mode_size_bits" which can be pre-calculated.
1756 * @param op1 The first operator
1757 * @param op2 The second operator
1758 * @return The created ia32 RotR node
1760 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1762 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1768 * Creates an ia32 RotR or RotL (depending on the found pattern).
1770 * @return The created ia32 RotL or RotR node
1772 static ir_node *gen_Rotl(ir_node *node)
1774 ir_node *rotate = NULL;
1775 ir_node *op1 = get_Rotl_left(node);
1776 ir_node *op2 = get_Rotl_right(node);
1778 /* Firm has only RotL, so we are looking for a right (op2)
1779 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1780 that means we can create a RotR instead of an Add and a RotL */
1784 ir_node *left = get_Add_left(add);
1785 ir_node *right = get_Add_right(add);
1786 if (is_Const(right)) {
1787 tarval *tv = get_Const_tarval(right);
1788 ir_mode *mode = get_irn_mode(node);
1789 long bits = get_mode_size_bits(mode);
1791 if (is_Minus(left) &&
1792 tarval_is_long(tv) &&
1793 get_tarval_long(tv) == bits &&
1796 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1797 rotate = gen_Ror(node, op1, get_Minus_op(left));
1802 if (rotate == NULL) {
1803 rotate = gen_Rol(node, op1, op2);
1812 * Transforms a Minus node.
1814 * @return The created ia32 Minus node
1816 static ir_node *gen_Minus(ir_node *node)
1818 ir_node *op = get_Minus_op(node);
1819 ir_node *block = be_transform_node(get_nodes_block(node));
1820 dbg_info *dbgi = get_irn_dbg_info(node);
1821 ir_mode *mode = get_irn_mode(node);
1826 if (mode_is_float(mode)) {
1827 ir_node *new_op = be_transform_node(op);
1828 if (ia32_cg_config.use_sse2) {
1829 /* TODO: non-optimal... if we have many xXors, then we should
1830 * rather create a load for the const and use that instead of
1831 * several AM nodes... */
1832 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1834 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1835 noreg_GP, nomem, new_op, noreg_xmm);
1837 size = get_mode_size_bits(mode);
1838 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1840 set_ia32_am_sc(new_node, ent);
1841 set_ia32_op_type(new_node, ia32_AddrModeS);
1842 set_ia32_ls_mode(new_node, mode);
1844 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1847 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1850 SET_IA32_ORIG_NODE(new_node, node);
1856 * Transforms a Not node.
1858 * @return The created ia32 Not node
1860 static ir_node *gen_Not(ir_node *node)
1862 ir_node *op = get_Not_op(node);
1864 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1865 assert (! mode_is_float(get_irn_mode(node)));
1867 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1873 * Transforms an Abs node.
1875 * @return The created ia32 Abs node
1877 static ir_node *gen_Abs(ir_node *node)
1879 ir_node *block = get_nodes_block(node);
1880 ir_node *new_block = be_transform_node(block);
1881 ir_node *op = get_Abs_op(node);
1882 dbg_info *dbgi = get_irn_dbg_info(node);
1883 ir_mode *mode = get_irn_mode(node);
1889 if (mode_is_float(mode)) {
1890 new_op = be_transform_node(op);
1892 if (ia32_cg_config.use_sse2) {
1893 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1894 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1895 noreg_GP, nomem, new_op, noreg_fp);
1897 size = get_mode_size_bits(mode);
1898 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1900 set_ia32_am_sc(new_node, ent);
1902 SET_IA32_ORIG_NODE(new_node, node);
1904 set_ia32_op_type(new_node, ia32_AddrModeS);
1905 set_ia32_ls_mode(new_node, mode);
1907 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1908 SET_IA32_ORIG_NODE(new_node, node);
1911 ir_node *xor, *sign_extension;
1913 if (get_mode_size_bits(mode) == 32) {
1914 new_op = be_transform_node(op);
1916 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1919 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1921 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1922 nomem, new_op, sign_extension);
1923 SET_IA32_ORIG_NODE(xor, node);
1925 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1926 nomem, xor, sign_extension);
1927 SET_IA32_ORIG_NODE(new_node, node);
1934 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1936 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1938 dbg_info *dbgi = get_irn_dbg_info(cmp);
1939 ir_node *block = get_nodes_block(cmp);
1940 ir_node *new_block = be_transform_node(block);
1941 ir_node *op1 = be_transform_node(x);
1942 ir_node *op2 = be_transform_node(n);
1944 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1948 * Transform a node returning a "flag" result.
1950 * @param node the node to transform
1951 * @param pnc_out the compare mode to use
1953 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1960 /* we have a Cmp as input */
1961 if (is_Proj(node)) {
1962 ir_node *pred = get_Proj_pred(node);
1964 pn_Cmp pnc = get_Proj_proj(node);
1965 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1966 ir_node *l = get_Cmp_left(pred);
1967 ir_node *r = get_Cmp_right(pred);
1969 ir_node *la = get_And_left(l);
1970 ir_node *ra = get_And_right(l);
1972 ir_node *c = get_Shl_left(la);
1973 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1974 /* (1 << n) & ra) */
1975 ir_node *n = get_Shl_right(la);
1976 flags = gen_bt(pred, ra, n);
1977 /* we must generate a Jc/Jnc jump */
1978 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1981 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1986 ir_node *c = get_Shl_left(ra);
1987 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1988 /* la & (1 << n)) */
1989 ir_node *n = get_Shl_right(ra);
1990 flags = gen_bt(pred, la, n);
1991 /* we must generate a Jc/Jnc jump */
1992 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1995 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
2001 /* add ia32 compare flags */
2003 ir_node *l = get_Cmp_left(pred);
2004 ir_mode *mode = get_irn_mode(l);
2005 if (mode_is_float(mode))
2006 pnc |= ia32_pn_Cmp_float;
2007 else if (! mode_is_signed(mode))
2008 pnc |= ia32_pn_Cmp_unsigned;
2011 flags = be_transform_node(pred);
2016 /* a mode_b value, we have to compare it against 0 */
2017 dbgi = get_irn_dbg_info(node);
2018 new_block = be_transform_node(get_nodes_block(node));
2019 new_op = be_transform_node(node);
2020 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
2021 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
2022 *pnc_out = pn_Cmp_Lg;
2027 * Transforms a Load.
2029 * @return the created ia32 Load node
2031 static ir_node *gen_Load(ir_node *node)
2033 ir_node *old_block = get_nodes_block(node);
2034 ir_node *block = be_transform_node(old_block);
2035 ir_node *ptr = get_Load_ptr(node);
2036 ir_node *mem = get_Load_mem(node);
2037 ir_node *new_mem = be_transform_node(mem);
2040 dbg_info *dbgi = get_irn_dbg_info(node);
2041 ir_mode *mode = get_Load_mode(node);
2043 ia32_address_t addr;
2045 /* construct load address */
2046 memset(&addr, 0, sizeof(addr));
2047 ia32_create_address_mode(&addr, ptr, 0);
2054 base = be_transform_node(base);
2057 if (index == NULL) {
2060 index = be_transform_node(index);
2063 if (mode_is_float(mode)) {
2064 if (ia32_cg_config.use_sse2) {
2065 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2068 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2072 assert(mode != mode_b);
2074 /* create a conv node with address mode for smaller modes */
2075 if (get_mode_size_bits(mode) < 32) {
2076 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2077 new_mem, noreg_GP, mode);
2079 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2083 set_irn_pinned(new_node, get_irn_pinned(node));
2084 set_ia32_op_type(new_node, ia32_AddrModeS);
2085 set_ia32_ls_mode(new_node, mode);
2086 set_address(new_node, &addr);
2088 if (get_irn_pinned(node) == op_pin_state_floats) {
2089 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2090 && pn_ia32_vfld_res == pn_ia32_Load_res
2091 && pn_ia32_Load_res == pn_ia32_res);
2092 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2095 SET_IA32_ORIG_NODE(new_node, node);
2097 be_dep_on_frame(new_node);
2101 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2102 ir_node *ptr, ir_node *other)
2109 /* we only use address mode if we're the only user of the load */
2110 if (get_irn_n_edges(node) > 1)
2113 load = get_Proj_pred(node);
2116 if (get_nodes_block(load) != block)
2119 /* store should have the same pointer as the load */
2120 if (get_Load_ptr(load) != ptr)
2123 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2124 if (other != NULL &&
2125 get_nodes_block(other) == block &&
2126 heights_reachable_in_block(heights, other, load)) {
2130 if (prevents_AM(block, load, mem))
2132 /* Store should be attached to the load via mem */
2133 assert(heights_reachable_in_block(heights, mem, load));
2138 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2139 ir_node *mem, ir_node *ptr, ir_mode *mode,
2140 construct_binop_dest_func *func,
2141 construct_binop_dest_func *func8bit,
2142 match_flags_t flags)
2144 ir_node *src_block = get_nodes_block(node);
2152 ia32_address_mode_t am;
2153 ia32_address_t *addr = &am.addr;
2154 memset(&am, 0, sizeof(am));
2156 assert(flags & match_immediate); /* there is no destam node without... */
2157 commutative = (flags & match_commutative) != 0;
2159 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2160 build_address(&am, op1, ia32_create_am_double_use);
2161 new_op = create_immediate_or_transform(op2, 0);
2162 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2163 build_address(&am, op2, ia32_create_am_double_use);
2164 new_op = create_immediate_or_transform(op1, 0);
2169 if (addr->base == NULL)
2170 addr->base = noreg_GP;
2171 if (addr->index == NULL)
2172 addr->index = noreg_GP;
2173 if (addr->mem == NULL)
2176 dbgi = get_irn_dbg_info(node);
2177 block = be_transform_node(src_block);
2178 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2180 if (get_mode_size_bits(mode) == 8) {
2181 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2183 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2185 set_address(new_node, addr);
2186 set_ia32_op_type(new_node, ia32_AddrModeD);
2187 set_ia32_ls_mode(new_node, mode);
2188 SET_IA32_ORIG_NODE(new_node, node);
2190 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2191 mem_proj = be_transform_node(am.mem_proj);
2192 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2197 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2198 ir_node *ptr, ir_mode *mode,
2199 construct_unop_dest_func *func)
2201 ir_node *src_block = get_nodes_block(node);
2207 ia32_address_mode_t am;
2208 ia32_address_t *addr = &am.addr;
2210 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2213 memset(&am, 0, sizeof(am));
2214 build_address(&am, op, ia32_create_am_double_use);
2216 dbgi = get_irn_dbg_info(node);
2217 block = be_transform_node(src_block);
2218 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2219 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2220 set_address(new_node, addr);
2221 set_ia32_op_type(new_node, ia32_AddrModeD);
2222 set_ia32_ls_mode(new_node, mode);
2223 SET_IA32_ORIG_NODE(new_node, node);
2225 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2226 mem_proj = be_transform_node(am.mem_proj);
2227 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2232 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2234 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2235 return get_negated_pnc(pnc, mode);
2238 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2240 ir_mode *mode = get_irn_mode(node);
2241 ir_node *mux_true = get_Mux_true(node);
2242 ir_node *mux_false = get_Mux_false(node);
2251 ia32_address_t addr;
2253 if (get_mode_size_bits(mode) != 8)
2256 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2258 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2264 cond = get_Mux_sel(node);
2265 flags = get_flags_node(cond, &pnc);
2266 /* we can't handle the float special cases with SetM */
2267 if (pnc & ia32_pn_Cmp_float)
2270 pnc = ia32_get_negated_pnc(pnc);
2272 build_address_ptr(&addr, ptr, mem);
2274 dbgi = get_irn_dbg_info(node);
2275 block = get_nodes_block(node);
2276 new_block = be_transform_node(block);
2277 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2278 addr.index, addr.mem, flags, pnc);
2279 set_address(new_node, &addr);
2280 set_ia32_op_type(new_node, ia32_AddrModeD);
2281 set_ia32_ls_mode(new_node, mode);
2282 SET_IA32_ORIG_NODE(new_node, node);
2287 static ir_node *try_create_dest_am(ir_node *node)
2289 ir_node *val = get_Store_value(node);
2290 ir_node *mem = get_Store_mem(node);
2291 ir_node *ptr = get_Store_ptr(node);
2292 ir_mode *mode = get_irn_mode(val);
2293 unsigned bits = get_mode_size_bits(mode);
2298 /* handle only GP modes for now... */
2299 if (!ia32_mode_needs_gp_reg(mode))
2303 /* store must be the only user of the val node */
2304 if (get_irn_n_edges(val) > 1)
2306 /* skip pointless convs */
2308 ir_node *conv_op = get_Conv_op(val);
2309 ir_mode *pred_mode = get_irn_mode(conv_op);
2310 if (!ia32_mode_needs_gp_reg(pred_mode))
2312 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2320 /* value must be in the same block */
2321 if (get_nodes_block(node) != get_nodes_block(val))
2324 switch (get_irn_opcode(val)) {
2326 op1 = get_Add_left(val);
2327 op2 = get_Add_right(val);
2328 if (ia32_cg_config.use_incdec) {
2329 if (is_Const_1(op2)) {
2330 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2332 } else if (is_Const_Minus_1(op2)) {
2333 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2337 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2338 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2339 match_commutative | match_immediate);
2342 op1 = get_Sub_left(val);
2343 op2 = get_Sub_right(val);
2344 if (is_Const(op2)) {
2345 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2347 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2348 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2352 op1 = get_And_left(val);
2353 op2 = get_And_right(val);
2354 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2355 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2356 match_commutative | match_immediate);
2359 op1 = get_Or_left(val);
2360 op2 = get_Or_right(val);
2361 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2362 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2363 match_commutative | match_immediate);
2366 op1 = get_Eor_left(val);
2367 op2 = get_Eor_right(val);
2368 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2369 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2370 match_commutative | match_immediate);
2373 op1 = get_Shl_left(val);
2374 op2 = get_Shl_right(val);
2375 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2376 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2380 op1 = get_Shr_left(val);
2381 op2 = get_Shr_right(val);
2382 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2383 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2387 op1 = get_Shrs_left(val);
2388 op2 = get_Shrs_right(val);
2389 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2390 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2394 op1 = get_Rotl_left(val);
2395 op2 = get_Rotl_right(val);
2396 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2397 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2400 /* TODO: match ROR patterns... */
2402 new_node = try_create_SetMem(val, ptr, mem);
2406 op1 = get_Minus_op(val);
2407 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2410 /* should be lowered already */
2411 assert(mode != mode_b);
2412 op1 = get_Not_op(val);
2413 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2419 if (new_node != NULL) {
2420 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2421 get_irn_pinned(node) == op_pin_state_pinned) {
2422 set_irn_pinned(new_node, op_pin_state_pinned);
2429 static bool possible_int_mode_for_fp(ir_mode *mode)
2433 if (!mode_is_signed(mode))
2435 size = get_mode_size_bits(mode);
2436 if (size != 16 && size != 32)
2441 static int is_float_to_int_conv(const ir_node *node)
2443 ir_mode *mode = get_irn_mode(node);
2447 if (!possible_int_mode_for_fp(mode))
2452 conv_op = get_Conv_op(node);
2453 conv_mode = get_irn_mode(conv_op);
2455 if (!mode_is_float(conv_mode))
2462 * Transform a Store(floatConst) into a sequence of
2465 * @return the created ia32 Store node
2467 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2469 ir_mode *mode = get_irn_mode(cns);
2470 unsigned size = get_mode_size_bytes(mode);
2471 tarval *tv = get_Const_tarval(cns);
2472 ir_node *block = get_nodes_block(node);
2473 ir_node *new_block = be_transform_node(block);
2474 ir_node *ptr = get_Store_ptr(node);
2475 ir_node *mem = get_Store_mem(node);
2476 dbg_info *dbgi = get_irn_dbg_info(node);
2480 ia32_address_t addr;
2482 assert(size % 4 == 0);
2485 build_address_ptr(&addr, ptr, mem);
2489 get_tarval_sub_bits(tv, ofs) |
2490 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2491 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2492 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2493 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2495 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2496 addr.index, addr.mem, imm);
2498 set_irn_pinned(new_node, get_irn_pinned(node));
2499 set_ia32_op_type(new_node, ia32_AddrModeD);
2500 set_ia32_ls_mode(new_node, mode_Iu);
2501 set_address(new_node, &addr);
2502 SET_IA32_ORIG_NODE(new_node, node);
2505 ins[i++] = new_node;
2510 } while (size != 0);
2513 return new_rd_Sync(dbgi, new_block, i, ins);
2520 * Generate a vfist or vfisttp instruction.
2522 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2523 ir_node *mem, ir_node *val, ir_node **fist)
2527 if (ia32_cg_config.use_fisttp) {
2528 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2529 if other users exists */
2530 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2531 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2532 be_new_Keep(block, 1, &value);
2534 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2537 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2540 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2546 * Transforms a general (no special case) Store.
2548 * @return the created ia32 Store node
2550 static ir_node *gen_general_Store(ir_node *node)
2552 ir_node *val = get_Store_value(node);
2553 ir_mode *mode = get_irn_mode(val);
2554 ir_node *block = get_nodes_block(node);
2555 ir_node *new_block = be_transform_node(block);
2556 ir_node *ptr = get_Store_ptr(node);
2557 ir_node *mem = get_Store_mem(node);
2558 dbg_info *dbgi = get_irn_dbg_info(node);
2559 ir_node *new_val, *new_node, *store;
2560 ia32_address_t addr;
2562 /* check for destination address mode */
2563 new_node = try_create_dest_am(node);
2564 if (new_node != NULL)
2567 /* construct store address */
2568 memset(&addr, 0, sizeof(addr));
2569 ia32_create_address_mode(&addr, ptr, 0);
2571 if (addr.base == NULL) {
2572 addr.base = noreg_GP;
2574 addr.base = be_transform_node(addr.base);
2577 if (addr.index == NULL) {
2578 addr.index = noreg_GP;
2580 addr.index = be_transform_node(addr.index);
2582 addr.mem = be_transform_node(mem);
2584 if (mode_is_float(mode)) {
2585 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2587 while (is_Conv(val) && mode == get_irn_mode(val)) {
2588 ir_node *op = get_Conv_op(val);
2589 if (!mode_is_float(get_irn_mode(op)))
2593 new_val = be_transform_node(val);
2594 if (ia32_cg_config.use_sse2) {
2595 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2596 addr.index, addr.mem, new_val);
2598 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2599 addr.index, addr.mem, new_val, mode);
2602 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2603 val = get_Conv_op(val);
2605 /* TODO: is this optimisation still necessary at all (middleend)? */
2606 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2607 while (is_Conv(val)) {
2608 ir_node *op = get_Conv_op(val);
2609 if (!mode_is_float(get_irn_mode(op)))
2611 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2615 new_val = be_transform_node(val);
2616 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2618 new_val = create_immediate_or_transform(val, 0);
2619 assert(mode != mode_b);
2621 if (get_mode_size_bits(mode) == 8) {
2622 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2623 addr.index, addr.mem, new_val);
2625 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2626 addr.index, addr.mem, new_val);
2631 set_irn_pinned(store, get_irn_pinned(node));
2632 set_ia32_op_type(store, ia32_AddrModeD);
2633 set_ia32_ls_mode(store, mode);
2635 set_address(store, &addr);
2636 SET_IA32_ORIG_NODE(store, node);
2642 * Transforms a Store.
2644 * @return the created ia32 Store node
2646 static ir_node *gen_Store(ir_node *node)
2648 ir_node *val = get_Store_value(node);
2649 ir_mode *mode = get_irn_mode(val);
2651 if (mode_is_float(mode) && is_Const(val)) {
2652 /* We can transform every floating const store
2653 into a sequence of integer stores.
2654 If the constant is already in a register,
2655 it would be better to use it, but we don't
2656 have this information here. */
2657 return gen_float_const_Store(node, val);
2659 return gen_general_Store(node);
2663 * Transforms a Switch.
2665 * @return the created ia32 SwitchJmp node
2667 static ir_node *create_Switch(ir_node *node)
2669 dbg_info *dbgi = get_irn_dbg_info(node);
2670 ir_node *block = be_transform_node(get_nodes_block(node));
2671 ir_node *sel = get_Cond_selector(node);
2672 ir_node *new_sel = be_transform_node(sel);
2673 long switch_min = LONG_MAX;
2674 long switch_max = LONG_MIN;
2675 long default_pn = get_Cond_default_proj(node);
2677 const ir_edge_t *edge;
2679 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2681 /* determine the smallest switch case value */
2682 foreach_out_edge(node, edge) {
2683 ir_node *proj = get_edge_src_irn(edge);
2684 long pn = get_Proj_proj(proj);
2685 if (pn == default_pn)
2688 if (pn < switch_min)
2690 if (pn > switch_max)
2694 if ((unsigned long) (switch_max - switch_min) > 128000) {
2695 panic("Size of switch %+F bigger than 128000", node);
2698 if (switch_min != 0) {
2699 /* if smallest switch case is not 0 we need an additional sub */
2700 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2701 add_ia32_am_offs_int(new_sel, -switch_min);
2702 set_ia32_op_type(new_sel, ia32_AddrModeS);
2704 SET_IA32_ORIG_NODE(new_sel, node);
2707 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2708 SET_IA32_ORIG_NODE(new_node, node);
2714 * Transform a Cond node.
2716 static ir_node *gen_Cond(ir_node *node)
2718 ir_node *block = get_nodes_block(node);
2719 ir_node *new_block = be_transform_node(block);
2720 dbg_info *dbgi = get_irn_dbg_info(node);
2721 ir_node *sel = get_Cond_selector(node);
2722 ir_mode *sel_mode = get_irn_mode(sel);
2723 ir_node *flags = NULL;
2727 if (sel_mode != mode_b) {
2728 return create_Switch(node);
2731 /* we get flags from a Cmp */
2732 flags = get_flags_node(sel, &pnc);
2734 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2735 SET_IA32_ORIG_NODE(new_node, node);
2741 * Transform a be_Copy.
2743 static ir_node *gen_be_Copy(ir_node *node)
2745 ir_node *new_node = be_duplicate_node(node);
2746 ir_mode *mode = get_irn_mode(new_node);
2748 if (ia32_mode_needs_gp_reg(mode)) {
2749 set_irn_mode(new_node, mode_Iu);
2755 static ir_node *create_Fucom(ir_node *node)
2757 dbg_info *dbgi = get_irn_dbg_info(node);
2758 ir_node *block = get_nodes_block(node);
2759 ir_node *new_block = be_transform_node(block);
2760 ir_node *left = get_Cmp_left(node);
2761 ir_node *new_left = be_transform_node(left);
2762 ir_node *right = get_Cmp_right(node);
2766 if (ia32_cg_config.use_fucomi) {
2767 new_right = be_transform_node(right);
2768 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2770 set_ia32_commutative(new_node);
2771 SET_IA32_ORIG_NODE(new_node, node);
2773 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2774 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2776 new_right = be_transform_node(right);
2777 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2780 set_ia32_commutative(new_node);
2782 SET_IA32_ORIG_NODE(new_node, node);
2784 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2785 SET_IA32_ORIG_NODE(new_node, node);
2791 static ir_node *create_Ucomi(ir_node *node)
2793 dbg_info *dbgi = get_irn_dbg_info(node);
2794 ir_node *src_block = get_nodes_block(node);
2795 ir_node *new_block = be_transform_node(src_block);
2796 ir_node *left = get_Cmp_left(node);
2797 ir_node *right = get_Cmp_right(node);
2799 ia32_address_mode_t am;
2800 ia32_address_t *addr = &am.addr;
2802 match_arguments(&am, src_block, left, right, NULL,
2803 match_commutative | match_am);
2805 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2806 addr->mem, am.new_op1, am.new_op2,
2808 set_am_attributes(new_node, &am);
2810 SET_IA32_ORIG_NODE(new_node, node);
2812 new_node = fix_mem_proj(new_node, &am);
2818 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2819 * to fold an and into a test node
2821 static bool can_fold_test_and(ir_node *node)
2823 const ir_edge_t *edge;
2825 /** we can only have eq and lg projs */
2826 foreach_out_edge(node, edge) {
2827 ir_node *proj = get_edge_src_irn(edge);
2828 pn_Cmp pnc = get_Proj_proj(proj);
2829 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2837 * returns true if it is assured, that the upper bits of a node are "clean"
2838 * which means for a 16 or 8 bit value, that the upper bits in the register
2839 * are 0 for unsigned and a copy of the last significant bit for signed
2842 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2844 assert(ia32_mode_needs_gp_reg(mode));
2845 if (get_mode_size_bits(mode) >= 32)
2848 if (is_Proj(transformed_node))
2849 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2851 switch (get_ia32_irn_opcode(transformed_node)) {
2852 case iro_ia32_Conv_I2I:
2853 case iro_ia32_Conv_I2I8Bit: {
2854 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2855 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2857 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2864 if (mode_is_signed(mode)) {
2865 return false; /* TODO handle signed modes */
2867 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2868 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2869 const ia32_immediate_attr_t *attr
2870 = get_ia32_immediate_attr_const(right);
2871 if (attr->symconst == 0 &&
2872 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2876 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2880 /* TODO too conservative if shift amount is constant */
2881 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2884 if (!mode_is_signed(mode)) {
2886 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2887 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2889 /* TODO if one is known to be zero extended, then || is sufficient */
2894 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2895 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2897 case iro_ia32_Const:
2898 case iro_ia32_Immediate: {
2899 const ia32_immediate_attr_t *attr =
2900 get_ia32_immediate_attr_const(transformed_node);
2901 if (mode_is_signed(mode)) {
2902 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2903 return shifted == 0 || shifted == -1;
2905 unsigned long shifted = (unsigned long)attr->offset;
2906 shifted >>= get_mode_size_bits(mode);
2907 return shifted == 0;
2917 * Generate code for a Cmp.
2919 static ir_node *gen_Cmp(ir_node *node)
2921 dbg_info *dbgi = get_irn_dbg_info(node);
2922 ir_node *block = get_nodes_block(node);
2923 ir_node *new_block = be_transform_node(block);
2924 ir_node *left = get_Cmp_left(node);
2925 ir_node *right = get_Cmp_right(node);
2926 ir_mode *cmp_mode = get_irn_mode(left);
2928 ia32_address_mode_t am;
2929 ia32_address_t *addr = &am.addr;
2932 if (mode_is_float(cmp_mode)) {
2933 if (ia32_cg_config.use_sse2) {
2934 return create_Ucomi(node);
2936 return create_Fucom(node);
2940 assert(ia32_mode_needs_gp_reg(cmp_mode));
2942 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2943 cmp_unsigned = !mode_is_signed(cmp_mode);
2944 if (is_Const_0(right) &&
2946 get_irn_n_edges(left) == 1 &&
2947 can_fold_test_and(node)) {
2948 /* Test(and_left, and_right) */
2949 ir_node *and_left = get_And_left(left);
2950 ir_node *and_right = get_And_right(left);
2952 /* matze: code here used mode instead of cmd_mode, I think it is always
2953 * the same as cmp_mode, but I leave this here to see if this is really
2956 assert(get_irn_mode(and_left) == cmp_mode);
2958 match_arguments(&am, block, and_left, and_right, NULL,
2960 match_am | match_8bit_am | match_16bit_am |
2961 match_am_and_immediates | match_immediate);
2963 /* use 32bit compare mode if possible since the opcode is smaller */
2964 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2965 upper_bits_clean(am.new_op2, cmp_mode)) {
2966 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2969 if (get_mode_size_bits(cmp_mode) == 8) {
2970 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2971 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2974 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2975 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2978 /* Cmp(left, right) */
2979 match_arguments(&am, block, left, right, NULL,
2980 match_commutative | match_am | match_8bit_am |
2981 match_16bit_am | match_am_and_immediates |
2983 /* use 32bit compare mode if possible since the opcode is smaller */
2984 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2985 upper_bits_clean(am.new_op2, cmp_mode)) {
2986 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2989 if (get_mode_size_bits(cmp_mode) == 8) {
2990 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2991 addr->index, addr->mem, am.new_op1,
2992 am.new_op2, am.ins_permuted,
2995 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2996 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2999 set_am_attributes(new_node, &am);
3000 set_ia32_ls_mode(new_node, cmp_mode);
3002 SET_IA32_ORIG_NODE(new_node, node);
3004 new_node = fix_mem_proj(new_node, &am);
3009 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3012 dbg_info *dbgi = get_irn_dbg_info(node);
3013 ir_node *block = get_nodes_block(node);
3014 ir_node *new_block = be_transform_node(block);
3015 ir_node *val_true = get_Mux_true(node);
3016 ir_node *val_false = get_Mux_false(node);
3018 ia32_address_mode_t am;
3019 ia32_address_t *addr;
3021 assert(ia32_cg_config.use_cmov);
3022 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3026 match_arguments(&am, block, val_false, val_true, flags,
3027 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3029 if (am.ins_permuted)
3030 pnc = ia32_get_negated_pnc(pnc);
3032 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3033 addr->mem, am.new_op1, am.new_op2, new_flags,
3035 set_am_attributes(new_node, &am);
3037 SET_IA32_ORIG_NODE(new_node, node);
3039 new_node = fix_mem_proj(new_node, &am);
3045 * Creates a ia32 Setcc instruction.
3047 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3048 ir_node *flags, pn_Cmp pnc,
3051 ir_mode *mode = get_irn_mode(orig_node);
3054 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3055 SET_IA32_ORIG_NODE(new_node, orig_node);
3057 /* we might need to conv the result up */
3058 if (get_mode_size_bits(mode) > 8) {
3059 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3060 nomem, new_node, mode_Bu);
3061 SET_IA32_ORIG_NODE(new_node, orig_node);
3068 * Create instruction for an unsigned Difference or Zero.
3070 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3072 ir_mode *mode = get_irn_mode(psi);
3082 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3083 match_mode_neutral | match_am | match_immediate | match_two_users);
3085 block = get_nodes_block(new_node);
3087 if (is_Proj(new_node)) {
3088 sub = get_Proj_pred(new_node);
3089 assert(is_ia32_Sub(sub));
3092 set_irn_mode(sub, mode_T);
3093 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3095 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3097 dbgi = get_irn_dbg_info(psi);
3098 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3099 not = new_bd_ia32_Not(dbgi, block, sbb);
3101 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3102 set_ia32_commutative(new_node);
3107 * Create an const array of two float consts.
3109 * @param c0 the first constant
3110 * @param c1 the second constant
3111 * @param new_mode IN/OUT for the mode of the constants, if NULL
3112 * smallest possible mode will be used
3114 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3117 ir_mode *mode = *new_mode;
3119 ir_initializer_t *initializer;
3120 tarval *tv0 = get_Const_tarval(c0);
3121 tarval *tv1 = get_Const_tarval(c1);
3124 /* detect the best mode for the constants */
3125 mode = get_tarval_mode(tv0);
3127 if (mode != mode_F) {
3128 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3129 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3131 tv0 = tarval_convert_to(tv0, mode);
3132 tv1 = tarval_convert_to(tv1, mode);
3133 } else if (mode != mode_D) {
3134 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3135 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3137 tv0 = tarval_convert_to(tv0, mode);
3138 tv1 = tarval_convert_to(tv1, mode);
3145 tp = ia32_create_float_type(mode, 4);
3146 tp = ia32_create_float_array(tp);
3148 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3150 set_entity_ld_ident(ent, get_entity_ident(ent));
3151 set_entity_visibility(ent, ir_visibility_private);
3152 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3154 initializer = create_initializer_compound(2);
3156 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3157 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3159 set_entity_initializer(ent, initializer);
3166 * Possible transformations for creating a Setcc.
3168 enum setcc_transform_insn {
3181 typedef struct setcc_transform {
3183 unsigned permutate_cmp_ins;
3186 enum setcc_transform_insn transform;
3190 } setcc_transform_t;
3193 * Setcc can only handle 0 and 1 result.
3194 * Find a transformation that creates 0 and 1 from
3197 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f,
3198 setcc_transform_t *res)
3203 res->permutate_cmp_ins = 0;
3205 if (tarval_is_null(t)) {
3209 pnc = ia32_get_negated_pnc(pnc);
3210 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3211 // now, t is the bigger one
3215 pnc = ia32_get_negated_pnc(pnc);
3219 if (! tarval_is_null(f)) {
3220 tarval *t_sub = tarval_sub(t, f, NULL);
3223 res->steps[step].transform = SETCC_TR_ADD;
3225 if (t == tarval_bad)
3226 panic("constant subtract failed");
3227 if (! tarval_is_long(f))
3228 panic("tarval is not long");
3230 res->steps[step].val = get_tarval_long(f);
3232 f = tarval_sub(f, f, NULL);
3233 assert(tarval_is_null(f));
3236 if (tarval_is_one(t)) {
3237 res->steps[step].transform = SETCC_TR_SET;
3238 res->num_steps = ++step;
3242 if (tarval_is_minus_one(t)) {
3243 res->steps[step].transform = SETCC_TR_NEG;
3245 res->steps[step].transform = SETCC_TR_SET;
3246 res->num_steps = ++step;
3249 if (tarval_is_long(t)) {
3250 long v = get_tarval_long(t);
3252 res->steps[step].val = 0;
3255 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3257 res->steps[step].transform = SETCC_TR_LEAxx;
3258 res->steps[step].scale = 3; /* (a << 3) + a */
3261 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3263 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3264 res->steps[step].scale = 3; /* (a << 3) */
3267 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3269 res->steps[step].transform = SETCC_TR_LEAxx;
3270 res->steps[step].scale = 2; /* (a << 2) + a */
3273 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3275 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3276 res->steps[step].scale = 2; /* (a << 2) */
3279 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3281 res->steps[step].transform = SETCC_TR_LEAxx;
3282 res->steps[step].scale = 1; /* (a << 1) + a */
3285 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3287 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3288 res->steps[step].scale = 1; /* (a << 1) */
3291 res->num_steps = step;
3294 if (! tarval_is_single_bit(t)) {
3295 res->steps[step].transform = SETCC_TR_AND;
3296 res->steps[step].val = v;
3298 res->steps[step].transform = SETCC_TR_NEG;
3300 int v = get_tarval_lowest_bit(t);
3303 res->steps[step].transform = SETCC_TR_SHL;
3304 res->steps[step].scale = v;
3308 res->steps[step].transform = SETCC_TR_SET;
3309 res->num_steps = ++step;
3312 panic("tarval is not long");
3316 * Transforms a Mux node into some code sequence.
3318 * @return The transformed node.
3320 static ir_node *gen_Mux(ir_node *node)
3322 dbg_info *dbgi = get_irn_dbg_info(node);
3323 ir_node *block = get_nodes_block(node);
3324 ir_node *new_block = be_transform_node(block);
3325 ir_node *mux_true = get_Mux_true(node);
3326 ir_node *mux_false = get_Mux_false(node);
3327 ir_node *cond = get_Mux_sel(node);
3328 ir_mode *mode = get_irn_mode(node);
3333 assert(get_irn_mode(cond) == mode_b);
3335 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3336 if (mode_is_float(mode)) {
3337 ir_node *cmp = get_Proj_pred(cond);
3338 ir_node *cmp_left = get_Cmp_left(cmp);
3339 ir_node *cmp_right = get_Cmp_right(cmp);
3340 pn_Cmp pnc = get_Proj_proj(cond);
3342 if (ia32_cg_config.use_sse2) {
3343 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3344 if (cmp_left == mux_true && cmp_right == mux_false) {
3345 /* Mux(a <= b, a, b) => MIN */
3346 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3347 match_commutative | match_am | match_two_users);
3348 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3349 /* Mux(a <= b, b, a) => MAX */
3350 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3351 match_commutative | match_am | match_two_users);
3353 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3354 if (cmp_left == mux_true && cmp_right == mux_false) {
3355 /* Mux(a >= b, a, b) => MAX */
3356 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3357 match_commutative | match_am | match_two_users);
3358 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3359 /* Mux(a >= b, b, a) => MIN */
3360 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3361 match_commutative | match_am | match_two_users);
3366 if (is_Const(mux_true) && is_Const(mux_false)) {
3367 ia32_address_mode_t am;
3372 flags = get_flags_node(cond, &pnc);
3373 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3375 if (ia32_cg_config.use_sse2) {
3376 /* cannot load from different mode on SSE */
3379 /* x87 can load any mode */
3383 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3385 switch (get_mode_size_bytes(new_mode)) {
3395 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3396 set_ia32_am_scale(new_node, 2);
3401 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3402 set_ia32_am_scale(new_node, 1);
3405 /* arg, shift 16 NOT supported */
3407 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3410 panic("Unsupported constant size");
3413 am.ls_mode = new_mode;
3414 am.addr.base = get_symconst_base();
3415 am.addr.index = new_node;
3416 am.addr.mem = nomem;
3418 am.addr.scale = scale;
3419 am.addr.use_frame = 0;
3420 am.addr.frame_entity = NULL;
3421 am.addr.symconst_sign = 0;
3422 am.mem_proj = am.addr.mem;
3423 am.op_type = ia32_AddrModeS;
3426 am.pinned = op_pin_state_floats;
3428 am.ins_permuted = 0;
3430 if (ia32_cg_config.use_sse2)
3431 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3433 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3434 set_am_attributes(load, &am);
3436 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3438 panic("cannot transform floating point Mux");
3441 assert(ia32_mode_needs_gp_reg(mode));
3443 if (is_Proj(cond)) {
3444 ir_node *cmp = get_Proj_pred(cond);
3446 ir_node *cmp_left = get_Cmp_left(cmp);
3447 ir_node *cmp_right = get_Cmp_right(cmp);
3448 pn_Cmp pnc = get_Proj_proj(cond);
3450 /* check for unsigned Doz first */
3451 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3452 is_Const_0(mux_false) && is_Sub(mux_true) &&
3453 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3454 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3455 return create_doz(node, cmp_left, cmp_right);
3456 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3457 is_Const_0(mux_true) && is_Sub(mux_false) &&
3458 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3459 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3460 return create_doz(node, cmp_left, cmp_right);
3465 flags = get_flags_node(cond, &pnc);
3467 if (is_Const(mux_true) && is_Const(mux_false)) {
3468 /* both are const, good */
3469 tarval *tv_true = get_Const_tarval(mux_true);
3470 tarval *tv_false = get_Const_tarval(mux_false);
3471 setcc_transform_t res;
3474 find_const_transform(pnc, tv_true, tv_false, &res);
3476 if (res.permutate_cmp_ins) {
3477 ia32_attr_t *attr = get_ia32_attr(flags);
3478 attr->data.ins_permuted ^= 1;
3480 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3483 switch (res.steps[step].transform) {
3485 imm = ia32_immediate_from_long(res.steps[step].val);
3486 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3488 case SETCC_TR_ADDxx:
3489 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3492 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3493 set_ia32_am_scale(new_node, res.steps[step].scale);
3494 set_ia32_am_offs_int(new_node, res.steps[step].val);
3496 case SETCC_TR_LEAxx:
3497 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3498 set_ia32_am_scale(new_node, res.steps[step].scale);
3499 set_ia32_am_offs_int(new_node, res.steps[step].val);
3502 imm = ia32_immediate_from_long(res.steps[step].scale);
3503 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3506 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3509 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3512 imm = ia32_immediate_from_long(res.steps[step].val);
3513 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3516 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, new_node);
3519 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3522 panic("unknown setcc transform");
3526 new_node = create_CMov(node, cond, flags, pnc);
3534 * Create a conversion from x87 state register to general purpose.
3536 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3538 ir_node *block = be_transform_node(get_nodes_block(node));
3539 ir_node *op = get_Conv_op(node);
3540 ir_node *new_op = be_transform_node(op);
3541 ir_graph *irg = current_ir_graph;
3542 dbg_info *dbgi = get_irn_dbg_info(node);
3543 ir_mode *mode = get_irn_mode(node);
3544 ir_node *fist, *load, *mem;
3546 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3547 set_irn_pinned(fist, op_pin_state_floats);
3548 set_ia32_use_frame(fist);
3549 set_ia32_op_type(fist, ia32_AddrModeD);
3551 assert(get_mode_size_bits(mode) <= 32);
3552 /* exception we can only store signed 32 bit integers, so for unsigned
3553 we store a 64bit (signed) integer and load the lower bits */
3554 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3555 set_ia32_ls_mode(fist, mode_Ls);
3557 set_ia32_ls_mode(fist, mode_Is);
3559 SET_IA32_ORIG_NODE(fist, node);
3562 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3564 set_irn_pinned(load, op_pin_state_floats);
3565 set_ia32_use_frame(load);
3566 set_ia32_op_type(load, ia32_AddrModeS);
3567 set_ia32_ls_mode(load, mode_Is);
3568 if (get_ia32_ls_mode(fist) == mode_Ls) {
3569 ia32_attr_t *attr = get_ia32_attr(load);
3570 attr->data.need_64bit_stackent = 1;
3572 ia32_attr_t *attr = get_ia32_attr(load);
3573 attr->data.need_32bit_stackent = 1;
3575 SET_IA32_ORIG_NODE(load, node);
3577 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3581 * Creates a x87 strict Conv by placing a Store and a Load
3583 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3585 ir_node *block = get_nodes_block(node);
3586 ir_graph *irg = get_Block_irg(block);
3587 dbg_info *dbgi = get_irn_dbg_info(node);
3588 ir_node *frame = get_irg_frame(irg);
3589 ir_node *store, *load;
3592 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3593 set_ia32_use_frame(store);
3594 set_ia32_op_type(store, ia32_AddrModeD);
3595 SET_IA32_ORIG_NODE(store, node);
3597 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3598 set_ia32_use_frame(load);
3599 set_ia32_op_type(load, ia32_AddrModeS);
3600 SET_IA32_ORIG_NODE(load, node);
3602 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3606 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3607 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3609 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3611 func = get_mode_size_bits(mode) == 8 ?
3612 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3613 return func(dbgi, block, base, index, mem, val, mode);
3617 * Create a conversion from general purpose to x87 register
3619 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3621 ir_node *src_block = get_nodes_block(node);
3622 ir_node *block = be_transform_node(src_block);
3623 ir_graph *irg = get_Block_irg(block);
3624 dbg_info *dbgi = get_irn_dbg_info(node);
3625 ir_node *op = get_Conv_op(node);
3626 ir_node *new_op = NULL;
3628 ir_mode *store_mode;
3633 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3634 if (possible_int_mode_for_fp(src_mode)) {
3635 ia32_address_mode_t am;
3637 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3638 if (am.op_type == ia32_AddrModeS) {
3639 ia32_address_t *addr = &am.addr;
3641 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3642 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3644 set_am_attributes(fild, &am);
3645 SET_IA32_ORIG_NODE(fild, node);
3647 fix_mem_proj(fild, &am);
3652 if (new_op == NULL) {
3653 new_op = be_transform_node(op);
3656 mode = get_irn_mode(op);
3658 /* first convert to 32 bit signed if necessary */
3659 if (get_mode_size_bits(src_mode) < 32) {
3660 if (!upper_bits_clean(new_op, src_mode)) {
3661 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3662 SET_IA32_ORIG_NODE(new_op, node);
3667 assert(get_mode_size_bits(mode) == 32);
3670 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3672 set_ia32_use_frame(store);
3673 set_ia32_op_type(store, ia32_AddrModeD);
3674 set_ia32_ls_mode(store, mode_Iu);
3676 /* exception for 32bit unsigned, do a 64bit spill+load */
3677 if (!mode_is_signed(mode)) {
3680 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3682 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3683 noreg_GP, nomem, zero_const);
3685 set_ia32_use_frame(zero_store);
3686 set_ia32_op_type(zero_store, ia32_AddrModeD);
3687 add_ia32_am_offs_int(zero_store, 4);
3688 set_ia32_ls_mode(zero_store, mode_Iu);
3693 store = new_rd_Sync(dbgi, block, 2, in);
3694 store_mode = mode_Ls;
3696 store_mode = mode_Is;
3700 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3702 set_ia32_use_frame(fild);
3703 set_ia32_op_type(fild, ia32_AddrModeS);
3704 set_ia32_ls_mode(fild, store_mode);
3706 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3712 * Create a conversion from one integer mode into another one
3714 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3715 dbg_info *dbgi, ir_node *block, ir_node *op,
3718 ir_node *new_block = be_transform_node(block);
3720 ir_mode *smaller_mode;
3721 ia32_address_mode_t am;
3722 ia32_address_t *addr = &am.addr;
3725 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3726 smaller_mode = src_mode;
3728 smaller_mode = tgt_mode;
3731 #ifdef DEBUG_libfirm
3733 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3738 match_arguments(&am, block, NULL, op, NULL,
3739 match_am | match_8bit_am | match_16bit_am);
3741 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3742 /* unnecessary conv. in theory it shouldn't have been AM */
3743 assert(is_ia32_NoReg_GP(addr->base));
3744 assert(is_ia32_NoReg_GP(addr->index));
3745 assert(is_NoMem(addr->mem));
3746 assert(am.addr.offset == 0);
3747 assert(am.addr.symconst_ent == NULL);
3751 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3752 addr->mem, am.new_op2, smaller_mode);
3753 set_am_attributes(new_node, &am);
3754 /* match_arguments assume that out-mode = in-mode, this isn't true here
3756 set_ia32_ls_mode(new_node, smaller_mode);
3757 SET_IA32_ORIG_NODE(new_node, node);
3758 new_node = fix_mem_proj(new_node, &am);
3763 * Transforms a Conv node.
3765 * @return The created ia32 Conv node
3767 static ir_node *gen_Conv(ir_node *node)
3769 ir_node *block = get_nodes_block(node);
3770 ir_node *new_block = be_transform_node(block);
3771 ir_node *op = get_Conv_op(node);
3772 ir_node *new_op = NULL;
3773 dbg_info *dbgi = get_irn_dbg_info(node);
3774 ir_mode *src_mode = get_irn_mode(op);
3775 ir_mode *tgt_mode = get_irn_mode(node);
3776 int src_bits = get_mode_size_bits(src_mode);
3777 int tgt_bits = get_mode_size_bits(tgt_mode);
3778 ir_node *res = NULL;
3780 assert(!mode_is_int(src_mode) || src_bits <= 32);
3781 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3783 /* modeB -> X should already be lowered by the lower_mode_b pass */
3784 if (src_mode == mode_b) {
3785 panic("ConvB not lowered %+F", node);
3788 if (src_mode == tgt_mode) {
3789 if (get_Conv_strict(node)) {
3790 if (ia32_cg_config.use_sse2) {
3791 /* when we are in SSE mode, we can kill all strict no-op conversion */
3792 return be_transform_node(op);
3795 /* this should be optimized already, but who knows... */
3796 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3797 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3798 return be_transform_node(op);
3802 if (mode_is_float(src_mode)) {
3803 new_op = be_transform_node(op);
3804 /* we convert from float ... */
3805 if (mode_is_float(tgt_mode)) {
3807 if (ia32_cg_config.use_sse2) {
3808 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3809 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3811 set_ia32_ls_mode(res, tgt_mode);
3813 if (get_Conv_strict(node)) {
3814 /* if fp_no_float_fold is not set then we assume that we
3815 * don't have any float operations in a non
3816 * mode_float_arithmetic mode and can skip strict upconvs */
3817 if (src_bits < tgt_bits
3818 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3819 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3822 res = gen_x87_strict_conv(tgt_mode, new_op);
3823 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3827 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3832 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3833 if (ia32_cg_config.use_sse2) {
3834 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3836 set_ia32_ls_mode(res, src_mode);
3838 return gen_x87_fp_to_gp(node);
3842 /* we convert from int ... */
3843 if (mode_is_float(tgt_mode)) {
3845 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3846 if (ia32_cg_config.use_sse2) {
3847 new_op = be_transform_node(op);
3848 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3850 set_ia32_ls_mode(res, tgt_mode);
3852 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3853 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3854 res = gen_x87_gp_to_fp(node, src_mode);
3856 /* we need a strict-Conv, if the int mode has more bits than the
3858 if (float_mantissa < int_mantissa) {
3859 res = gen_x87_strict_conv(tgt_mode, res);
3860 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3864 } else if (tgt_mode == mode_b) {
3865 /* mode_b lowering already took care that we only have 0/1 values */
3866 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3867 src_mode, tgt_mode));
3868 return be_transform_node(op);
3871 if (src_bits == tgt_bits) {
3872 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3873 src_mode, tgt_mode));
3874 return be_transform_node(op);
3877 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3885 static ir_node *create_immediate_or_transform(ir_node *node,
3886 char immediate_constraint_type)
3888 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3889 if (new_node == NULL) {
3890 new_node = be_transform_node(node);
3896 * Transforms a FrameAddr into an ia32 Add.
3898 static ir_node *gen_be_FrameAddr(ir_node *node)
3900 ir_node *block = be_transform_node(get_nodes_block(node));
3901 ir_node *op = be_get_FrameAddr_frame(node);
3902 ir_node *new_op = be_transform_node(op);
3903 dbg_info *dbgi = get_irn_dbg_info(node);
3906 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3907 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3908 set_ia32_use_frame(new_node);
3910 SET_IA32_ORIG_NODE(new_node, node);
3916 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3918 static ir_node *gen_be_Return(ir_node *node)
3920 ir_graph *irg = current_ir_graph;
3921 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3922 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3923 ir_entity *ent = get_irg_entity(irg);
3924 ir_type *tp = get_entity_type(ent);
3929 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3930 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3932 int pn_ret_val, pn_ret_mem, arity, i;
3934 assert(ret_val != NULL);
3935 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3936 return be_duplicate_node(node);
3939 res_type = get_method_res_type(tp, 0);
3941 if (! is_Primitive_type(res_type)) {
3942 return be_duplicate_node(node);
3945 mode = get_type_mode(res_type);
3946 if (! mode_is_float(mode)) {
3947 return be_duplicate_node(node);
3950 assert(get_method_n_ress(tp) == 1);
3952 pn_ret_val = get_Proj_proj(ret_val);
3953 pn_ret_mem = get_Proj_proj(ret_mem);
3955 /* get the Barrier */
3956 barrier = get_Proj_pred(ret_val);
3958 /* get result input of the Barrier */
3959 ret_val = get_irn_n(barrier, pn_ret_val);
3960 new_ret_val = be_transform_node(ret_val);
3962 /* get memory input of the Barrier */
3963 ret_mem = get_irn_n(barrier, pn_ret_mem);
3964 new_ret_mem = be_transform_node(ret_mem);
3966 frame = get_irg_frame(irg);
3968 dbgi = get_irn_dbg_info(barrier);
3969 block = be_transform_node(get_nodes_block(barrier));
3971 /* store xmm0 onto stack */
3972 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3973 new_ret_mem, new_ret_val);
3974 set_ia32_ls_mode(sse_store, mode);
3975 set_ia32_op_type(sse_store, ia32_AddrModeD);
3976 set_ia32_use_frame(sse_store);
3978 /* load into x87 register */
3979 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3980 set_ia32_op_type(fld, ia32_AddrModeS);
3981 set_ia32_use_frame(fld);
3983 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3984 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3986 /* create a new barrier */
3987 arity = get_irn_arity(barrier);
3988 in = ALLOCAN(ir_node*, arity);
3989 for (i = 0; i < arity; ++i) {
3992 if (i == pn_ret_val) {
3994 } else if (i == pn_ret_mem) {
3997 ir_node *in = get_irn_n(barrier, i);
3998 new_in = be_transform_node(in);
4003 new_barrier = new_ir_node(dbgi, irg, block,
4004 get_irn_op(barrier), get_irn_mode(barrier),
4006 copy_node_attr(irg, barrier, new_barrier);
4007 be_duplicate_deps(barrier, new_barrier);
4008 be_set_transformed_node(barrier, new_barrier);
4010 /* transform normally */
4011 return be_duplicate_node(node);
4015 * Transform a be_AddSP into an ia32_SubSP.
4017 static ir_node *gen_be_AddSP(ir_node *node)
4019 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4020 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4022 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4023 match_am | match_immediate);
4027 * Transform a be_SubSP into an ia32_AddSP
4029 static ir_node *gen_be_SubSP(ir_node *node)
4031 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4032 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4034 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4035 match_am | match_immediate);
4039 * Change some phi modes
4041 static ir_node *gen_Phi(ir_node *node)
4043 const arch_register_req_t *req;
4044 ir_node *block = be_transform_node(get_nodes_block(node));
4045 ir_graph *irg = current_ir_graph;
4046 dbg_info *dbgi = get_irn_dbg_info(node);
4047 ir_mode *mode = get_irn_mode(node);
4050 if (ia32_mode_needs_gp_reg(mode)) {
4051 /* we shouldn't have any 64bit stuff around anymore */
4052 assert(get_mode_size_bits(mode) <= 32);
4053 /* all integer operations are on 32bit registers now */
4055 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4056 } else if (mode_is_float(mode)) {
4057 if (ia32_cg_config.use_sse2) {
4059 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4062 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4065 req = arch_no_register_req;
4068 /* phi nodes allow loops, so we use the old arguments for now
4069 * and fix this later */
4070 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4071 get_irn_in(node) + 1);
4072 copy_node_attr(irg, node, phi);
4073 be_duplicate_deps(node, phi);
4075 arch_set_out_register_req(phi, 0, req);
4077 be_enqueue_preds(node);
4082 static ir_node *gen_Jmp(ir_node *node)
4084 ir_node *block = get_nodes_block(node);
4085 ir_node *new_block = be_transform_node(block);
4086 dbg_info *dbgi = get_irn_dbg_info(node);
4089 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4090 SET_IA32_ORIG_NODE(new_node, node);
4098 static ir_node *gen_IJmp(ir_node *node)
4100 ir_node *block = get_nodes_block(node);
4101 ir_node *new_block = be_transform_node(block);
4102 dbg_info *dbgi = get_irn_dbg_info(node);
4103 ir_node *op = get_IJmp_target(node);
4105 ia32_address_mode_t am;
4106 ia32_address_t *addr = &am.addr;
4108 assert(get_irn_mode(op) == mode_P);
4110 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4112 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4113 addr->mem, am.new_op2);
4114 set_am_attributes(new_node, &am);
4115 SET_IA32_ORIG_NODE(new_node, node);
4117 new_node = fix_mem_proj(new_node, &am);
4123 * Transform a Bound node.
4125 static ir_node *gen_Bound(ir_node *node)
4128 ir_node *lower = get_Bound_lower(node);
4129 dbg_info *dbgi = get_irn_dbg_info(node);
4131 if (is_Const_0(lower)) {
4132 /* typical case for Java */
4133 ir_node *sub, *res, *flags, *block;
4135 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4136 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
4138 block = get_nodes_block(res);
4139 if (! is_Proj(res)) {
4141 set_irn_mode(sub, mode_T);
4142 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4144 sub = get_Proj_pred(res);
4146 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4147 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4148 SET_IA32_ORIG_NODE(new_node, node);
4150 panic("generic Bound not supported in ia32 Backend");
4156 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4158 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4159 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4161 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4162 match_immediate | match_mode_neutral);
4165 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4167 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4168 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4169 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4173 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4175 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4176 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4177 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4181 static ir_node *gen_ia32_l_Add(ir_node *node)
4183 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4184 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4185 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4186 match_commutative | match_am | match_immediate |
4187 match_mode_neutral);
4189 if (is_Proj(lowered)) {
4190 lowered = get_Proj_pred(lowered);
4192 assert(is_ia32_Add(lowered));
4193 set_irn_mode(lowered, mode_T);
4199 static ir_node *gen_ia32_l_Adc(ir_node *node)
4201 return gen_binop_flags(node, new_bd_ia32_Adc,
4202 match_commutative | match_am | match_immediate |
4203 match_mode_neutral);
4207 * Transforms a l_MulS into a "real" MulS node.
4209 * @return the created ia32 Mul node
4211 static ir_node *gen_ia32_l_Mul(ir_node *node)
4213 ir_node *left = get_binop_left(node);
4214 ir_node *right = get_binop_right(node);
4216 return gen_binop(node, left, right, new_bd_ia32_Mul,
4217 match_commutative | match_am | match_mode_neutral);
4221 * Transforms a l_IMulS into a "real" IMul1OPS node.
4223 * @return the created ia32 IMul1OP node
4225 static ir_node *gen_ia32_l_IMul(ir_node *node)
4227 ir_node *left = get_binop_left(node);
4228 ir_node *right = get_binop_right(node);
4230 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4231 match_commutative | match_am | match_mode_neutral);
4234 static ir_node *gen_ia32_l_Sub(ir_node *node)
4236 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4237 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4238 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4239 match_am | match_immediate | match_mode_neutral);
4241 if (is_Proj(lowered)) {
4242 lowered = get_Proj_pred(lowered);
4244 assert(is_ia32_Sub(lowered));
4245 set_irn_mode(lowered, mode_T);
4251 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4253 return gen_binop_flags(node, new_bd_ia32_Sbb,
4254 match_am | match_immediate | match_mode_neutral);
4258 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4259 * op1 - target to be shifted
4260 * op2 - contains bits to be shifted into target
4262 * Only op3 can be an immediate.
4264 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4265 ir_node *low, ir_node *count)
4267 ir_node *block = get_nodes_block(node);
4268 ir_node *new_block = be_transform_node(block);
4269 dbg_info *dbgi = get_irn_dbg_info(node);
4270 ir_node *new_high = be_transform_node(high);
4271 ir_node *new_low = be_transform_node(low);
4275 /* the shift amount can be any mode that is bigger than 5 bits, since all
4276 * other bits are ignored anyway */
4277 while (is_Conv(count) &&
4278 get_irn_n_edges(count) == 1 &&
4279 mode_is_int(get_irn_mode(count))) {
4280 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4281 count = get_Conv_op(count);
4283 new_count = create_immediate_or_transform(count, 0);
4285 if (is_ia32_l_ShlD(node)) {
4286 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4289 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4292 SET_IA32_ORIG_NODE(new_node, node);
4297 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4299 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4300 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4301 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4302 return gen_lowered_64bit_shifts(node, high, low, count);
4305 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4307 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4308 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4309 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4310 return gen_lowered_64bit_shifts(node, high, low, count);
4313 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4315 ir_node *src_block = get_nodes_block(node);
4316 ir_node *block = be_transform_node(src_block);
4317 ir_graph *irg = current_ir_graph;
4318 dbg_info *dbgi = get_irn_dbg_info(node);
4319 ir_node *frame = get_irg_frame(irg);
4320 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4321 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4322 ir_node *new_val_low = be_transform_node(val_low);
4323 ir_node *new_val_high = be_transform_node(val_high);
4325 ir_node *sync, *fild, *res;
4326 ir_node *store_low, *store_high;
4328 if (ia32_cg_config.use_sse2) {
4329 panic("ia32_l_LLtoFloat not implemented for SSE2");
4333 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4335 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4337 SET_IA32_ORIG_NODE(store_low, node);
4338 SET_IA32_ORIG_NODE(store_high, node);
4340 set_ia32_use_frame(store_low);
4341 set_ia32_use_frame(store_high);
4342 set_ia32_op_type(store_low, ia32_AddrModeD);
4343 set_ia32_op_type(store_high, ia32_AddrModeD);
4344 set_ia32_ls_mode(store_low, mode_Iu);
4345 set_ia32_ls_mode(store_high, mode_Is);
4346 add_ia32_am_offs_int(store_high, 4);
4350 sync = new_rd_Sync(dbgi, block, 2, in);
4353 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4355 set_ia32_use_frame(fild);
4356 set_ia32_op_type(fild, ia32_AddrModeS);
4357 set_ia32_ls_mode(fild, mode_Ls);
4359 SET_IA32_ORIG_NODE(fild, node);
4361 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4363 if (! mode_is_signed(get_irn_mode(val_high))) {
4364 ia32_address_mode_t am;
4366 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4369 am.addr.base = get_symconst_base();
4370 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4371 am.addr.mem = nomem;
4374 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4375 am.addr.use_frame = 0;
4376 am.addr.frame_entity = NULL;
4377 am.addr.symconst_sign = 0;
4378 am.ls_mode = mode_F;
4379 am.mem_proj = nomem;
4380 am.op_type = ia32_AddrModeS;
4382 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4383 am.pinned = op_pin_state_floats;
4385 am.ins_permuted = 0;
4387 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4388 am.new_op1, am.new_op2, get_fpcw());
4389 set_am_attributes(fadd, &am);
4391 set_irn_mode(fadd, mode_T);
4392 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4397 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4399 ir_node *src_block = get_nodes_block(node);
4400 ir_node *block = be_transform_node(src_block);
4401 ir_graph *irg = get_Block_irg(block);
4402 dbg_info *dbgi = get_irn_dbg_info(node);
4403 ir_node *frame = get_irg_frame(irg);
4404 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4405 ir_node *new_val = be_transform_node(val);
4406 ir_node *fist, *mem;
4408 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4409 SET_IA32_ORIG_NODE(fist, node);
4410 set_ia32_use_frame(fist);
4411 set_ia32_op_type(fist, ia32_AddrModeD);
4412 set_ia32_ls_mode(fist, mode_Ls);
4417 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4419 ir_node *block = be_transform_node(get_nodes_block(node));
4420 ir_graph *irg = get_Block_irg(block);
4421 ir_node *pred = get_Proj_pred(node);
4422 ir_node *new_pred = be_transform_node(pred);
4423 ir_node *frame = get_irg_frame(irg);
4424 dbg_info *dbgi = get_irn_dbg_info(node);
4425 long pn = get_Proj_proj(node);
4430 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4431 SET_IA32_ORIG_NODE(load, node);
4432 set_ia32_use_frame(load);
4433 set_ia32_op_type(load, ia32_AddrModeS);
4434 set_ia32_ls_mode(load, mode_Iu);
4435 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4436 * 32 bit from it with this particular load */
4437 attr = get_ia32_attr(load);
4438 attr->data.need_64bit_stackent = 1;
4440 if (pn == pn_ia32_l_FloattoLL_res_high) {
4441 add_ia32_am_offs_int(load, 4);
4443 assert(pn == pn_ia32_l_FloattoLL_res_low);
4446 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4452 * Transform the Projs of an AddSP.
4454 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4456 ir_node *pred = get_Proj_pred(node);
4457 ir_node *new_pred = be_transform_node(pred);
4458 dbg_info *dbgi = get_irn_dbg_info(node);
4459 long proj = get_Proj_proj(node);
4461 if (proj == pn_be_AddSP_sp) {
4462 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4463 pn_ia32_SubSP_stack);
4464 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4466 } else if (proj == pn_be_AddSP_res) {
4467 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4468 pn_ia32_SubSP_addr);
4469 } else if (proj == pn_be_AddSP_M) {
4470 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4473 panic("No idea how to transform proj->AddSP");
4477 * Transform the Projs of a SubSP.
4479 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4481 ir_node *pred = get_Proj_pred(node);
4482 ir_node *new_pred = be_transform_node(pred);
4483 dbg_info *dbgi = get_irn_dbg_info(node);
4484 long proj = get_Proj_proj(node);
4486 if (proj == pn_be_SubSP_sp) {
4487 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4488 pn_ia32_AddSP_stack);
4489 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4491 } else if (proj == pn_be_SubSP_M) {
4492 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4495 panic("No idea how to transform proj->SubSP");
4499 * Transform and renumber the Projs from a Load.
4501 static ir_node *gen_Proj_Load(ir_node *node)
4504 ir_node *block = be_transform_node(get_nodes_block(node));
4505 ir_node *pred = get_Proj_pred(node);
4506 dbg_info *dbgi = get_irn_dbg_info(node);
4507 long proj = get_Proj_proj(node);
4509 /* loads might be part of source address mode matches, so we don't
4510 * transform the ProjMs yet (with the exception of loads whose result is
4513 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4516 /* this is needed, because sometimes we have loops that are only
4517 reachable through the ProjM */
4518 be_enqueue_preds(node);
4519 /* do it in 2 steps, to silence firm verifier */
4520 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4521 set_Proj_proj(res, pn_ia32_mem);
4525 /* renumber the proj */
4526 new_pred = be_transform_node(pred);
4527 if (is_ia32_Load(new_pred)) {
4530 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4532 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4533 case pn_Load_X_regular:
4534 return new_rd_Jmp(dbgi, block);
4535 case pn_Load_X_except:
4536 /* This Load might raise an exception. Mark it. */
4537 set_ia32_exc_label(new_pred, 1);
4538 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4542 } else if (is_ia32_Conv_I2I(new_pred) ||
4543 is_ia32_Conv_I2I8Bit(new_pred)) {
4544 set_irn_mode(new_pred, mode_T);
4545 if (proj == pn_Load_res) {
4546 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4547 } else if (proj == pn_Load_M) {
4548 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4550 } else if (is_ia32_xLoad(new_pred)) {
4553 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4555 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4556 case pn_Load_X_regular:
4557 return new_rd_Jmp(dbgi, block);
4558 case pn_Load_X_except:
4559 /* This Load might raise an exception. Mark it. */
4560 set_ia32_exc_label(new_pred, 1);
4561 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4565 } else if (is_ia32_vfld(new_pred)) {
4568 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4570 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4571 case pn_Load_X_regular:
4572 return new_rd_Jmp(dbgi, block);
4573 case pn_Load_X_except:
4574 /* This Load might raise an exception. Mark it. */
4575 set_ia32_exc_label(new_pred, 1);
4576 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4581 /* can happen for ProJMs when source address mode happened for the
4584 /* however it should not be the result proj, as that would mean the
4585 load had multiple users and should not have been used for
4587 if (proj != pn_Load_M) {
4588 panic("internal error: transformed node not a Load");
4590 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4593 panic("No idea how to transform proj");
4597 * Transform and renumber the Projs from a DivMod like instruction.
4599 static ir_node *gen_Proj_DivMod(ir_node *node)
4601 ir_node *block = be_transform_node(get_nodes_block(node));
4602 ir_node *pred = get_Proj_pred(node);
4603 ir_node *new_pred = be_transform_node(pred);
4604 dbg_info *dbgi = get_irn_dbg_info(node);
4605 long proj = get_Proj_proj(node);
4607 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4609 switch (get_irn_opcode(pred)) {
4613 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4615 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4616 case pn_Div_X_regular:
4617 return new_rd_Jmp(dbgi, block);
4618 case pn_Div_X_except:
4619 set_ia32_exc_label(new_pred, 1);
4620 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4628 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4630 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4631 case pn_Mod_X_except:
4632 set_ia32_exc_label(new_pred, 1);
4633 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4641 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4642 case pn_DivMod_res_div:
4643 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4644 case pn_DivMod_res_mod:
4645 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4646 case pn_DivMod_X_regular:
4647 return new_rd_Jmp(dbgi, block);
4648 case pn_DivMod_X_except:
4649 set_ia32_exc_label(new_pred, 1);
4650 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4659 panic("No idea how to transform proj->DivMod");
4663 * Transform and renumber the Projs from a CopyB.
4665 static ir_node *gen_Proj_CopyB(ir_node *node)
4667 ir_node *pred = get_Proj_pred(node);
4668 ir_node *new_pred = be_transform_node(pred);
4669 dbg_info *dbgi = get_irn_dbg_info(node);
4670 long proj = get_Proj_proj(node);
4674 if (is_ia32_CopyB_i(new_pred)) {
4675 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4676 } else if (is_ia32_CopyB(new_pred)) {
4677 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4684 panic("No idea how to transform proj->CopyB");
4688 * Transform and renumber the Projs from a Quot.
4690 static ir_node *gen_Proj_Quot(ir_node *node)
4692 ir_node *pred = get_Proj_pred(node);
4693 ir_node *new_pred = be_transform_node(pred);
4694 dbg_info *dbgi = get_irn_dbg_info(node);
4695 long proj = get_Proj_proj(node);
4699 if (is_ia32_xDiv(new_pred)) {
4700 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4701 } else if (is_ia32_vfdiv(new_pred)) {
4702 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4706 if (is_ia32_xDiv(new_pred)) {
4707 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4708 } else if (is_ia32_vfdiv(new_pred)) {
4709 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4712 case pn_Quot_X_regular:
4713 case pn_Quot_X_except:
4718 panic("No idea how to transform proj->Quot");
4721 static ir_node *gen_be_Call(ir_node *node)
4723 dbg_info *const dbgi = get_irn_dbg_info(node);
4724 ir_node *const src_block = get_nodes_block(node);
4725 ir_node *const block = be_transform_node(src_block);
4726 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4727 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4728 ir_node *const sp = be_transform_node(src_sp);
4729 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4730 ia32_address_mode_t am;
4731 ia32_address_t *const addr = &am.addr;
4736 ir_node * eax = noreg_GP;
4737 ir_node * ecx = noreg_GP;
4738 ir_node * edx = noreg_GP;
4739 unsigned const pop = be_Call_get_pop(node);
4740 ir_type *const call_tp = be_Call_get_type(node);
4741 int old_no_pic_adjust;
4743 /* Run the x87 simulator if the call returns a float value */
4744 if (get_method_n_ress(call_tp) > 0) {
4745 ir_type *const res_type = get_method_res_type(call_tp, 0);
4746 ir_mode *const res_mode = get_type_mode(res_type);
4748 if (res_mode != NULL && mode_is_float(res_mode)) {
4749 env_cg->do_x87_sim = 1;
4753 /* We do not want be_Call direct calls */
4754 assert(be_Call_get_entity(node) == NULL);
4756 /* special case for PIC trampoline calls */
4757 old_no_pic_adjust = no_pic_adjust;
4758 no_pic_adjust = be_get_irg_options(env_cg->irg)->pic;
4760 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4761 match_am | match_immediate);
4763 no_pic_adjust = old_no_pic_adjust;
4765 i = get_irn_arity(node) - 1;
4766 fpcw = be_transform_node(get_irn_n(node, i--));
4767 for (; i >= be_pos_Call_first_arg; --i) {
4768 arch_register_req_t const *const req = arch_get_register_req(node, i);
4769 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4771 assert(req->type == arch_register_req_type_limited);
4772 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4774 switch (*req->limited) {
4775 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4776 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4777 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4778 default: panic("Invalid GP register for register parameter");
4782 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4783 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4784 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4785 set_am_attributes(call, &am);
4786 call = fix_mem_proj(call, &am);
4788 if (get_irn_pinned(node) == op_pin_state_pinned)
4789 set_irn_pinned(call, op_pin_state_pinned);
4791 SET_IA32_ORIG_NODE(call, node);
4793 if (ia32_cg_config.use_sse2) {
4794 /* remember this call for post-processing */
4795 ARR_APP1(ir_node *, call_list, call);
4796 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4803 * Transform Builtin trap
4805 static ir_node *gen_trap(ir_node *node)
4807 dbg_info *dbgi = get_irn_dbg_info(node);
4808 ir_node *block = be_transform_node(get_nodes_block(node));
4809 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4811 return new_bd_ia32_UD2(dbgi, block, mem);
4815 * Transform Builtin debugbreak
4817 static ir_node *gen_debugbreak(ir_node *node)
4819 dbg_info *dbgi = get_irn_dbg_info(node);
4820 ir_node *block = be_transform_node(get_nodes_block(node));
4821 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4823 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4827 * Transform Builtin return_address
4829 static ir_node *gen_return_address(ir_node *node)
4831 ir_node *param = get_Builtin_param(node, 0);
4832 ir_node *frame = get_Builtin_param(node, 1);
4833 dbg_info *dbgi = get_irn_dbg_info(node);
4834 tarval *tv = get_Const_tarval(param);
4835 unsigned long value = get_tarval_long(tv);
4837 ir_node *block = be_transform_node(get_nodes_block(node));
4838 ir_node *ptr = be_transform_node(frame);
4842 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4843 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4844 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4847 /* load the return address from this frame */
4848 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4850 set_irn_pinned(load, get_irn_pinned(node));
4851 set_ia32_op_type(load, ia32_AddrModeS);
4852 set_ia32_ls_mode(load, mode_Iu);
4854 set_ia32_am_offs_int(load, 0);
4855 set_ia32_use_frame(load);
4856 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4858 if (get_irn_pinned(node) == op_pin_state_floats) {
4859 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4860 && pn_ia32_vfld_res == pn_ia32_Load_res
4861 && pn_ia32_Load_res == pn_ia32_res);
4862 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4865 SET_IA32_ORIG_NODE(load, node);
4866 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4870 * Transform Builtin frame_address
4872 static ir_node *gen_frame_address(ir_node *node)
4874 ir_node *param = get_Builtin_param(node, 0);
4875 ir_node *frame = get_Builtin_param(node, 1);
4876 dbg_info *dbgi = get_irn_dbg_info(node);
4877 tarval *tv = get_Const_tarval(param);
4878 unsigned long value = get_tarval_long(tv);
4880 ir_node *block = be_transform_node(get_nodes_block(node));
4881 ir_node *ptr = be_transform_node(frame);
4886 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4887 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4888 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4891 /* load the frame address from this frame */
4892 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4894 set_irn_pinned(load, get_irn_pinned(node));
4895 set_ia32_op_type(load, ia32_AddrModeS);
4896 set_ia32_ls_mode(load, mode_Iu);
4898 ent = ia32_get_frame_address_entity();
4900 set_ia32_am_offs_int(load, 0);
4901 set_ia32_use_frame(load);
4902 set_ia32_frame_ent(load, ent);
4904 /* will fail anyway, but gcc does this: */
4905 set_ia32_am_offs_int(load, 0);
4908 if (get_irn_pinned(node) == op_pin_state_floats) {
4909 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4910 && pn_ia32_vfld_res == pn_ia32_Load_res
4911 && pn_ia32_Load_res == pn_ia32_res);
4912 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4915 SET_IA32_ORIG_NODE(load, node);
4916 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4920 * Transform Builtin frame_address
4922 static ir_node *gen_prefetch(ir_node *node)
4925 ir_node *ptr, *block, *mem, *base, *index;
4926 ir_node *param, *new_node;
4929 ia32_address_t addr;
4931 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4932 /* no prefetch at all, route memory */
4933 return be_transform_node(get_Builtin_mem(node));
4936 param = get_Builtin_param(node, 1);
4937 tv = get_Const_tarval(param);
4938 rw = get_tarval_long(tv);
4940 /* construct load address */
4941 memset(&addr, 0, sizeof(addr));
4942 ptr = get_Builtin_param(node, 0);
4943 ia32_create_address_mode(&addr, ptr, 0);
4950 base = be_transform_node(base);
4953 if (index == NULL) {
4956 index = be_transform_node(index);
4959 dbgi = get_irn_dbg_info(node);
4960 block = be_transform_node(get_nodes_block(node));
4961 mem = be_transform_node(get_Builtin_mem(node));
4963 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4964 /* we have 3DNow!, this was already checked above */
4965 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4966 } else if (ia32_cg_config.use_sse_prefetch) {
4967 /* note: rw == 1 is IGNORED in that case */
4968 param = get_Builtin_param(node, 2);
4969 tv = get_Const_tarval(param);
4970 locality = get_tarval_long(tv);
4972 /* SSE style prefetch */
4975 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4978 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4981 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4984 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4988 assert(ia32_cg_config.use_3dnow_prefetch);
4989 /* 3DNow! style prefetch */
4990 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4993 set_irn_pinned(new_node, get_irn_pinned(node));
4994 set_ia32_op_type(new_node, ia32_AddrModeS);
4995 set_ia32_ls_mode(new_node, mode_Bu);
4996 set_address(new_node, &addr);
4998 SET_IA32_ORIG_NODE(new_node, node);
5000 be_dep_on_frame(new_node);
5001 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5005 * Transform bsf like node
5007 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5009 ir_node *param = get_Builtin_param(node, 0);
5010 dbg_info *dbgi = get_irn_dbg_info(node);
5012 ir_node *block = get_nodes_block(node);
5013 ir_node *new_block = be_transform_node(block);
5015 ia32_address_mode_t am;
5016 ia32_address_t *addr = &am.addr;
5019 match_arguments(&am, block, NULL, param, NULL, match_am);
5021 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5022 set_am_attributes(cnt, &am);
5023 set_ia32_ls_mode(cnt, get_irn_mode(param));
5025 SET_IA32_ORIG_NODE(cnt, node);
5026 return fix_mem_proj(cnt, &am);
5030 * Transform builtin ffs.
5032 static ir_node *gen_ffs(ir_node *node)
5034 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5035 ir_node *real = skip_Proj(bsf);
5036 dbg_info *dbgi = get_irn_dbg_info(real);
5037 ir_node *block = get_nodes_block(real);
5038 ir_node *flag, *set, *conv, *neg, *or;
5041 if (get_irn_mode(real) != mode_T) {
5042 set_irn_mode(real, mode_T);
5043 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5046 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5049 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5050 SET_IA32_ORIG_NODE(set, node);
5053 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5054 SET_IA32_ORIG_NODE(conv, node);
5057 neg = new_bd_ia32_Neg(dbgi, block, conv);
5060 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5061 set_ia32_commutative(or);
5064 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5068 * Transform builtin clz.
5070 static ir_node *gen_clz(ir_node *node)
5072 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5073 ir_node *real = skip_Proj(bsr);
5074 dbg_info *dbgi = get_irn_dbg_info(real);
5075 ir_node *block = get_nodes_block(real);
5076 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5078 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5082 * Transform builtin ctz.
5084 static ir_node *gen_ctz(ir_node *node)
5086 return gen_unop_AM(node, new_bd_ia32_Bsf);
5090 * Transform builtin parity.
5092 static ir_node *gen_parity(ir_node *node)
5094 ir_node *param = get_Builtin_param(node, 0);
5095 dbg_info *dbgi = get_irn_dbg_info(node);
5097 ir_node *block = get_nodes_block(node);
5099 ir_node *new_block = be_transform_node(block);
5100 ir_node *imm, *cmp, *new_node;
5102 ia32_address_mode_t am;
5103 ia32_address_t *addr = &am.addr;
5107 match_arguments(&am, block, NULL, param, NULL, match_am);
5108 imm = ia32_create_Immediate(NULL, 0, 0);
5109 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5110 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5111 set_am_attributes(cmp, &am);
5112 set_ia32_ls_mode(cmp, mode_Iu);
5114 SET_IA32_ORIG_NODE(cmp, node);
5116 cmp = fix_mem_proj(cmp, &am);
5119 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5120 SET_IA32_ORIG_NODE(new_node, node);
5123 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5124 nomem, new_node, mode_Bu);
5125 SET_IA32_ORIG_NODE(new_node, node);
5130 * Transform builtin popcount
5132 static ir_node *gen_popcount(ir_node *node)
5134 ir_node *param = get_Builtin_param(node, 0);
5135 dbg_info *dbgi = get_irn_dbg_info(node);
5137 ir_node *block = get_nodes_block(node);
5138 ir_node *new_block = be_transform_node(block);
5141 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5143 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5144 if (ia32_cg_config.use_popcnt) {
5145 ia32_address_mode_t am;
5146 ia32_address_t *addr = &am.addr;
5149 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5151 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5152 set_am_attributes(cnt, &am);
5153 set_ia32_ls_mode(cnt, get_irn_mode(param));
5155 SET_IA32_ORIG_NODE(cnt, node);
5156 return fix_mem_proj(cnt, &am);
5159 new_param = be_transform_node(param);
5161 /* do the standard popcount algo */
5163 /* m1 = x & 0x55555555 */
5164 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5165 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5168 simm = ia32_create_Immediate(NULL, 0, 1);
5169 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5171 /* m2 = s1 & 0x55555555 */
5172 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5175 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5177 /* m4 = m3 & 0x33333333 */
5178 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5179 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5182 simm = ia32_create_Immediate(NULL, 0, 2);
5183 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5185 /* m5 = s2 & 0x33333333 */
5186 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5189 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5191 /* m7 = m6 & 0x0F0F0F0F */
5192 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5193 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5196 simm = ia32_create_Immediate(NULL, 0, 4);
5197 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5199 /* m8 = s3 & 0x0F0F0F0F */
5200 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5203 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5205 /* m10 = m9 & 0x00FF00FF */
5206 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5207 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5210 simm = ia32_create_Immediate(NULL, 0, 8);
5211 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5213 /* m11 = s4 & 0x00FF00FF */
5214 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5216 /* m12 = m10 + m11 */
5217 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5219 /* m13 = m12 & 0x0000FFFF */
5220 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5221 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5223 /* s5 = m12 >> 16 */
5224 simm = ia32_create_Immediate(NULL, 0, 16);
5225 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5227 /* res = m13 + s5 */
5228 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5232 * Transform builtin byte swap.
5234 static ir_node *gen_bswap(ir_node *node)
5236 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5237 dbg_info *dbgi = get_irn_dbg_info(node);
5239 ir_node *block = get_nodes_block(node);
5240 ir_node *new_block = be_transform_node(block);
5241 ir_mode *mode = get_irn_mode(param);
5242 unsigned size = get_mode_size_bits(mode);
5243 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5247 if (ia32_cg_config.use_i486) {
5248 /* swap available */
5249 return new_bd_ia32_Bswap(dbgi, new_block, param);
5251 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5252 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5254 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5255 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5257 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5259 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5260 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5262 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5263 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5266 /* swap16 always available */
5267 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5270 panic("Invalid bswap size (%d)", size);
5275 * Transform builtin outport.
5277 static ir_node *gen_outport(ir_node *node)
5279 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5280 ir_node *oldv = get_Builtin_param(node, 1);
5281 ir_mode *mode = get_irn_mode(oldv);
5282 ir_node *value = be_transform_node(oldv);
5283 ir_node *block = be_transform_node(get_nodes_block(node));
5284 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5285 dbg_info *dbgi = get_irn_dbg_info(node);
5287 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5288 set_ia32_ls_mode(res, mode);
5293 * Transform builtin inport.
5295 static ir_node *gen_inport(ir_node *node)
5297 ir_type *tp = get_Builtin_type(node);
5298 ir_type *rstp = get_method_res_type(tp, 0);
5299 ir_mode *mode = get_type_mode(rstp);
5300 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5301 ir_node *block = be_transform_node(get_nodes_block(node));
5302 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5303 dbg_info *dbgi = get_irn_dbg_info(node);
5305 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5306 set_ia32_ls_mode(res, mode);
5308 /* check for missing Result Proj */
5313 * Transform a builtin inner trampoline
5315 static ir_node *gen_inner_trampoline(ir_node *node)
5317 ir_node *ptr = get_Builtin_param(node, 0);
5318 ir_node *callee = get_Builtin_param(node, 1);
5319 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5320 ir_node *mem = get_Builtin_mem(node);
5321 ir_node *block = get_nodes_block(node);
5322 ir_node *new_block = be_transform_node(block);
5326 ir_node *trampoline;
5328 dbg_info *dbgi = get_irn_dbg_info(node);
5329 ia32_address_t addr;
5331 /* construct store address */
5332 memset(&addr, 0, sizeof(addr));
5333 ia32_create_address_mode(&addr, ptr, 0);
5335 if (addr.base == NULL) {
5336 addr.base = noreg_GP;
5338 addr.base = be_transform_node(addr.base);
5341 if (addr.index == NULL) {
5342 addr.index = noreg_GP;
5344 addr.index = be_transform_node(addr.index);
5346 addr.mem = be_transform_node(mem);
5348 /* mov ecx, <env> */
5349 val = ia32_create_Immediate(NULL, 0, 0xB9);
5350 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5351 addr.index, addr.mem, val);
5352 set_irn_pinned(store, get_irn_pinned(node));
5353 set_ia32_op_type(store, ia32_AddrModeD);
5354 set_ia32_ls_mode(store, mode_Bu);
5355 set_address(store, &addr);
5359 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5360 addr.index, addr.mem, env);
5361 set_irn_pinned(store, get_irn_pinned(node));
5362 set_ia32_op_type(store, ia32_AddrModeD);
5363 set_ia32_ls_mode(store, mode_Iu);
5364 set_address(store, &addr);
5368 /* jmp rel <callee> */
5369 val = ia32_create_Immediate(NULL, 0, 0xE9);
5370 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5371 addr.index, addr.mem, val);
5372 set_irn_pinned(store, get_irn_pinned(node));
5373 set_ia32_op_type(store, ia32_AddrModeD);
5374 set_ia32_ls_mode(store, mode_Bu);
5375 set_address(store, &addr);
5379 trampoline = be_transform_node(ptr);
5381 /* the callee is typically an immediate */
5382 if (is_SymConst(callee)) {
5383 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5385 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5387 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5389 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5390 addr.index, addr.mem, rel);
5391 set_irn_pinned(store, get_irn_pinned(node));
5392 set_ia32_op_type(store, ia32_AddrModeD);
5393 set_ia32_ls_mode(store, mode_Iu);
5394 set_address(store, &addr);
5399 return new_r_Tuple(new_block, 2, in);
5403 * Transform Builtin node.
5405 static ir_node *gen_Builtin(ir_node *node)
5407 ir_builtin_kind kind = get_Builtin_kind(node);
5411 return gen_trap(node);
5412 case ir_bk_debugbreak:
5413 return gen_debugbreak(node);
5414 case ir_bk_return_address:
5415 return gen_return_address(node);
5416 case ir_bk_frame_address:
5417 return gen_frame_address(node);
5418 case ir_bk_prefetch:
5419 return gen_prefetch(node);
5421 return gen_ffs(node);
5423 return gen_clz(node);
5425 return gen_ctz(node);
5427 return gen_parity(node);
5428 case ir_bk_popcount:
5429 return gen_popcount(node);
5431 return gen_bswap(node);
5433 return gen_outport(node);
5435 return gen_inport(node);
5436 case ir_bk_inner_trampoline:
5437 return gen_inner_trampoline(node);
5439 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5443 * Transform Proj(Builtin) node.
5445 static ir_node *gen_Proj_Builtin(ir_node *proj)
5447 ir_node *node = get_Proj_pred(proj);
5448 ir_node *new_node = be_transform_node(node);
5449 ir_builtin_kind kind = get_Builtin_kind(node);
5452 case ir_bk_return_address:
5453 case ir_bk_frame_address:
5458 case ir_bk_popcount:
5460 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5463 case ir_bk_debugbreak:
5464 case ir_bk_prefetch:
5466 assert(get_Proj_proj(proj) == pn_Builtin_M);
5469 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5470 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5472 assert(get_Proj_proj(proj) == pn_Builtin_M);
5473 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5475 case ir_bk_inner_trampoline:
5476 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5477 return get_Tuple_pred(new_node, 1);
5479 assert(get_Proj_proj(proj) == pn_Builtin_M);
5480 return get_Tuple_pred(new_node, 0);
5483 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5486 static ir_node *gen_be_IncSP(ir_node *node)
5488 ir_node *res = be_duplicate_node(node);
5489 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5495 * Transform the Projs from a be_Call.
5497 static ir_node *gen_Proj_be_Call(ir_node *node)
5499 ir_node *call = get_Proj_pred(node);
5500 ir_node *new_call = be_transform_node(call);
5501 dbg_info *dbgi = get_irn_dbg_info(node);
5502 long proj = get_Proj_proj(node);
5503 ir_mode *mode = get_irn_mode(node);
5506 if (proj == pn_be_Call_M_regular) {
5507 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5509 /* transform call modes */
5510 if (mode_is_data(mode)) {
5511 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5515 /* Map from be_Call to ia32_Call proj number */
5516 if (proj == pn_be_Call_sp) {
5517 proj = pn_ia32_Call_stack;
5518 } else if (proj == pn_be_Call_M_regular) {
5519 proj = pn_ia32_Call_M;
5521 arch_register_req_t const *const req = arch_get_register_req_out(node);
5522 int const n_outs = arch_irn_get_n_outs(new_call);
5525 assert(proj >= pn_be_Call_first_res);
5526 assert(req->type & arch_register_req_type_limited);
5528 for (i = 0; i < n_outs; ++i) {
5529 arch_register_req_t const *const new_req
5530 = arch_get_out_register_req(new_call, i);
5532 if (!(new_req->type & arch_register_req_type_limited) ||
5533 new_req->cls != req->cls ||
5534 *new_req->limited != *req->limited)
5543 res = new_rd_Proj(dbgi, new_call, mode, proj);
5545 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5547 case pn_ia32_Call_stack:
5548 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5551 case pn_ia32_Call_fpcw:
5552 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5560 * Transform the Projs from a Cmp.
5562 static ir_node *gen_Proj_Cmp(ir_node *node)
5564 /* this probably means not all mode_b nodes were lowered... */
5565 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5570 * Transform the Projs from a Bound.
5572 static ir_node *gen_Proj_Bound(ir_node *node)
5575 ir_node *pred = get_Proj_pred(node);
5577 switch (get_Proj_proj(node)) {
5579 return be_transform_node(get_Bound_mem(pred));
5580 case pn_Bound_X_regular:
5581 new_node = be_transform_node(pred);
5582 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5583 case pn_Bound_X_except:
5584 new_node = be_transform_node(pred);
5585 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5587 return be_transform_node(get_Bound_index(pred));
5589 panic("unsupported Proj from Bound");
5593 static ir_node *gen_Proj_ASM(ir_node *node)
5595 ir_mode *mode = get_irn_mode(node);
5596 ir_node *pred = get_Proj_pred(node);
5597 ir_node *new_pred = be_transform_node(pred);
5598 long pos = get_Proj_proj(node);
5600 if (mode == mode_M) {
5601 pos = arch_irn_get_n_outs(new_pred)-1;
5602 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5604 } else if (mode_is_float(mode)) {
5607 panic("unexpected proj mode at ASM");
5610 return new_r_Proj(new_pred, mode, pos);
5614 * Transform and potentially renumber Proj nodes.
5616 static ir_node *gen_Proj(ir_node *node)
5618 ir_node *pred = get_Proj_pred(node);
5621 switch (get_irn_opcode(pred)) {
5623 proj = get_Proj_proj(node);
5624 if (proj == pn_Store_M) {
5625 return be_transform_node(pred);
5627 panic("No idea how to transform proj->Store");
5630 return gen_Proj_Load(node);
5632 return gen_Proj_ASM(node);
5634 return gen_Proj_Builtin(node);
5638 return gen_Proj_DivMod(node);
5640 return gen_Proj_CopyB(node);
5642 return gen_Proj_Quot(node);
5644 return gen_Proj_be_SubSP(node);
5646 return gen_Proj_be_AddSP(node);
5648 return gen_Proj_be_Call(node);
5650 return gen_Proj_Cmp(node);
5652 return gen_Proj_Bound(node);
5654 proj = get_Proj_proj(node);
5656 case pn_Start_X_initial_exec: {
5657 ir_node *block = get_nodes_block(pred);
5658 ir_node *new_block = be_transform_node(block);
5659 dbg_info *dbgi = get_irn_dbg_info(node);
5660 /* we exchange the ProjX with a jump */
5661 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5666 case pn_Start_P_tls:
5667 return gen_Proj_tls(node);
5672 if (is_ia32_l_FloattoLL(pred)) {
5673 return gen_Proj_l_FloattoLL(node);
5675 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5679 ir_mode *mode = get_irn_mode(node);
5680 if (ia32_mode_needs_gp_reg(mode)) {
5681 ir_node *new_pred = be_transform_node(pred);
5682 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5683 get_Proj_proj(node));
5684 new_proj->node_nr = node->node_nr;
5689 return be_duplicate_node(node);
5693 * Enters all transform functions into the generic pointer
5695 static void register_transformers(void)
5697 /* first clear the generic function pointer for all ops */
5698 be_start_transform_setup();
5700 be_set_transform_function(op_Abs, gen_Abs);
5701 be_set_transform_function(op_Add, gen_Add);
5702 be_set_transform_function(op_And, gen_And);
5703 be_set_transform_function(op_ASM, gen_ASM);
5704 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5705 be_set_transform_function(op_be_Call, gen_be_Call);
5706 be_set_transform_function(op_be_Copy, gen_be_Copy);
5707 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5708 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5709 be_set_transform_function(op_be_Return, gen_be_Return);
5710 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5711 be_set_transform_function(op_Bound, gen_Bound);
5712 be_set_transform_function(op_Builtin, gen_Builtin);
5713 be_set_transform_function(op_Cmp, gen_Cmp);
5714 be_set_transform_function(op_Cond, gen_Cond);
5715 be_set_transform_function(op_Const, gen_Const);
5716 be_set_transform_function(op_Conv, gen_Conv);
5717 be_set_transform_function(op_CopyB, gen_CopyB);
5718 be_set_transform_function(op_Div, gen_Div);
5719 be_set_transform_function(op_DivMod, gen_DivMod);
5720 be_set_transform_function(op_Eor, gen_Eor);
5721 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5722 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5723 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5724 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5725 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5726 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5727 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5728 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5729 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5730 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5731 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5732 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5733 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5734 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5735 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5736 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5737 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5738 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5739 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5740 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5741 be_set_transform_function(op_IJmp, gen_IJmp);
5742 be_set_transform_function(op_Jmp, gen_Jmp);
5743 be_set_transform_function(op_Load, gen_Load);
5744 be_set_transform_function(op_Minus, gen_Minus);
5745 be_set_transform_function(op_Mod, gen_Mod);
5746 be_set_transform_function(op_Mul, gen_Mul);
5747 be_set_transform_function(op_Mulh, gen_Mulh);
5748 be_set_transform_function(op_Mux, gen_Mux);
5749 be_set_transform_function(op_Not, gen_Not);
5750 be_set_transform_function(op_Or, gen_Or);
5751 be_set_transform_function(op_Phi, gen_Phi);
5752 be_set_transform_function(op_Proj, gen_Proj);
5753 be_set_transform_function(op_Quot, gen_Quot);
5754 be_set_transform_function(op_Rotl, gen_Rotl);
5755 be_set_transform_function(op_Shl, gen_Shl);
5756 be_set_transform_function(op_Shr, gen_Shr);
5757 be_set_transform_function(op_Shrs, gen_Shrs);
5758 be_set_transform_function(op_Store, gen_Store);
5759 be_set_transform_function(op_Sub, gen_Sub);
5760 be_set_transform_function(op_SymConst, gen_SymConst);
5761 be_set_transform_function(op_Unknown, gen_Unknown);
5765 * Pre-transform all unknown and noreg nodes.
5767 static void ia32_pretransform_node(void)
5769 ia32_code_gen_t *cg = env_cg;
5771 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5772 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5773 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5775 nomem = get_irg_no_mem(current_ir_graph);
5776 noreg_GP = ia32_new_NoReg_gp(cg);
5782 * Post-process all calls if we are in SSE mode.
5783 * The ABI requires that the results are in st0, copy them
5784 * to a xmm register.
5786 static void postprocess_fp_call_results(void)
5790 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5791 ir_node *call = call_list[i];
5792 ir_type *mtp = call_types[i];
5795 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5796 ir_type *res_tp = get_method_res_type(mtp, j);
5797 ir_node *res, *new_res;
5798 const ir_edge_t *edge, *next;
5801 if (! is_atomic_type(res_tp)) {
5802 /* no floating point return */
5805 mode = get_type_mode(res_tp);
5806 if (! mode_is_float(mode)) {
5807 /* no floating point return */
5811 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5814 /* now patch the users */
5815 foreach_out_edge_safe(res, edge, next) {
5816 ir_node *succ = get_edge_src_irn(edge);
5819 if (be_is_Keep(succ))
5822 if (is_ia32_xStore(succ)) {
5823 /* an xStore can be patched into an vfst */
5824 dbg_info *db = get_irn_dbg_info(succ);
5825 ir_node *block = get_nodes_block(succ);
5826 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5827 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5828 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5829 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5830 ir_mode *mode = get_ia32_ls_mode(succ);
5832 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5833 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5834 if (is_ia32_use_frame(succ))
5835 set_ia32_use_frame(st);
5836 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5837 set_irn_pinned(st, get_irn_pinned(succ));
5838 set_ia32_op_type(st, ia32_AddrModeD);
5842 if (new_res == NULL) {
5843 dbg_info *db = get_irn_dbg_info(call);
5844 ir_node *block = get_nodes_block(call);
5845 ir_node *frame = get_irg_frame(current_ir_graph);
5846 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5847 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5848 ir_node *vfst, *xld, *new_mem;
5850 /* store st(0) on stack */
5851 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5852 set_ia32_op_type(vfst, ia32_AddrModeD);
5853 set_ia32_use_frame(vfst);
5855 /* load into SSE register */
5856 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5857 set_ia32_op_type(xld, ia32_AddrModeS);
5858 set_ia32_use_frame(xld);
5860 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5861 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5863 if (old_mem != NULL) {
5864 edges_reroute(old_mem, new_mem, current_ir_graph);
5868 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5875 /* do the transformation */
5876 void ia32_transform_graph(ia32_code_gen_t *cg)
5880 register_transformers();
5882 initial_fpcw = NULL;
5885 be_timer_push(T_HEIGHTS);
5886 heights = heights_new(cg->irg);
5887 be_timer_pop(T_HEIGHTS);
5888 ia32_calculate_non_address_mode_nodes(cg->irg);
5890 /* the transform phase is not safe for CSE (yet) because several nodes get
5891 * attributes set after their creation */
5892 cse_last = get_opt_cse();
5895 call_list = NEW_ARR_F(ir_node *, 0);
5896 call_types = NEW_ARR_F(ir_type *, 0);
5897 be_transform_graph(cg->irg, ia32_pretransform_node);
5899 if (ia32_cg_config.use_sse2)
5900 postprocess_fp_call_results();
5901 DEL_ARR_F(call_types);
5902 DEL_ARR_F(call_list);
5904 set_opt_cse(cse_last);
5906 ia32_free_non_address_mode_nodes();
5907 heights_free(heights);
5911 void ia32_init_transform(void)
5913 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");