2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_map_regs.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_util.h"
67 #include "ia32_address_mode.h"
68 #include "ia32_architecture.h"
70 #include "gen_ia32_regalloc_if.h"
72 /* define this to construct SSE constants instead of load them */
73 #undef CONSTRUCT_SSE_CONST
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
81 #define ULL_BIAS "18446744073709551616"
83 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
84 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
85 #define ENT_SFP_ABS "C_ia32_sfp_abs"
86 #define ENT_DFP_ABS "C_ia32_dfp_abs"
87 #define ENT_ULL_BIAS "C_ia32_ull_bias"
89 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
90 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
92 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
94 static ir_node *initial_fpcw = NULL;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 if (be_get_irg_options(env_cg->irg)->pic) {
204 return arch_code_generator_get_pic_base(env_cg);
211 * Transforms a Const.
213 static ir_node *gen_Const(ir_node *node)
215 ir_node *old_block = get_nodes_block(node);
216 ir_node *block = be_transform_node(old_block);
217 dbg_info *dbgi = get_irn_dbg_info(node);
218 ir_mode *mode = get_irn_mode(node);
220 assert(is_Const(node));
222 if (mode_is_float(mode)) {
228 if (ia32_cg_config.use_sse2) {
229 tarval *tv = get_Const_tarval(node);
230 if (tarval_is_null(tv)) {
231 load = new_bd_ia32_xZero(dbgi, block);
232 set_ia32_ls_mode(load, mode);
234 #ifdef CONSTRUCT_SSE_CONST
235 } else if (tarval_is_one(tv)) {
236 int cnst = mode == mode_F ? 26 : 55;
237 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
238 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
239 ir_node *pslld, *psrld;
241 load = new_bd_ia32_xAllOnes(dbgi, block);
242 set_ia32_ls_mode(load, mode);
243 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
244 set_ia32_ls_mode(pslld, mode);
245 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
246 set_ia32_ls_mode(psrld, mode);
248 #endif /* CONSTRUCT_SSE_CONST */
249 } else if (mode == mode_F) {
250 /* we can place any 32bit constant by using a movd gp, sse */
251 unsigned val = get_tarval_sub_bits(tv, 0) |
252 (get_tarval_sub_bits(tv, 1) << 8) |
253 (get_tarval_sub_bits(tv, 2) << 16) |
254 (get_tarval_sub_bits(tv, 3) << 24);
255 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
256 load = new_bd_ia32_xMovd(dbgi, block, cnst);
257 set_ia32_ls_mode(load, mode);
260 #ifdef CONSTRUCT_SSE_CONST
261 if (mode == mode_D) {
262 unsigned val = get_tarval_sub_bits(tv, 0) |
263 (get_tarval_sub_bits(tv, 1) << 8) |
264 (get_tarval_sub_bits(tv, 2) << 16) |
265 (get_tarval_sub_bits(tv, 3) << 24);
267 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
268 ir_node *cnst, *psllq;
270 /* fine, lower 32bit are zero, produce 32bit value */
271 val = get_tarval_sub_bits(tv, 4) |
272 (get_tarval_sub_bits(tv, 5) << 8) |
273 (get_tarval_sub_bits(tv, 6) << 16) |
274 (get_tarval_sub_bits(tv, 7) << 24);
275 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
276 load = new_bd_ia32_xMovd(dbgi, block, cnst);
277 set_ia32_ls_mode(load, mode);
278 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
279 set_ia32_ls_mode(psllq, mode);
284 #endif /* CONSTRUCT_SSE_CONST */
285 floatent = create_float_const_entity(node);
287 base = get_symconst_base();
288 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
290 set_ia32_op_type(load, ia32_AddrModeS);
291 set_ia32_am_sc(load, floatent);
292 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
293 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
296 if (is_Const_null(node)) {
297 load = new_bd_ia32_vfldz(dbgi, block);
299 set_ia32_ls_mode(load, mode);
300 } else if (is_Const_one(node)) {
301 load = new_bd_ia32_vfld1(dbgi, block);
303 set_ia32_ls_mode(load, mode);
308 floatent = create_float_const_entity(node);
309 /* create_float_const_ent is smart and sometimes creates
311 ls_mode = get_type_mode(get_entity_type(floatent));
312 base = get_symconst_base();
313 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
315 set_ia32_op_type(load, ia32_AddrModeS);
316 set_ia32_am_sc(load, floatent);
317 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
318 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
321 #ifdef CONSTRUCT_SSE_CONST
323 #endif /* CONSTRUCT_SSE_CONST */
324 SET_IA32_ORIG_NODE(load, node);
326 be_dep_on_frame(load);
328 } else { /* non-float mode */
330 tarval *tv = get_Const_tarval(node);
333 tv = tarval_convert_to(tv, mode_Iu);
335 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
337 panic("couldn't convert constant tarval (%+F)", node);
339 val = get_tarval_long(tv);
341 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
342 SET_IA32_ORIG_NODE(cnst, node);
344 be_dep_on_frame(cnst);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
377 SET_IA32_ORIG_NODE(cnst, node);
379 be_dep_on_frame(cnst);
384 * Create a float type for the given mode and cache it.
386 * @param mode the mode for the float type (might be integer mode for SSE2 types)
387 * @param align alignment
389 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
395 if (mode == mode_Iu) {
396 static ir_type *int_Iu[16] = {NULL, };
398 if (int_Iu[align] == NULL) {
399 int_Iu[align] = tp = new_type_primitive(mode);
400 /* set the specified alignment */
401 set_type_alignment_bytes(tp, align);
403 return int_Iu[align];
404 } else if (mode == mode_Lu) {
405 static ir_type *int_Lu[16] = {NULL, };
407 if (int_Lu[align] == NULL) {
408 int_Lu[align] = tp = new_type_primitive(mode);
409 /* set the specified alignment */
410 set_type_alignment_bytes(tp, align);
412 return int_Lu[align];
413 } else if (mode == mode_F) {
414 static ir_type *float_F[16] = {NULL, };
416 if (float_F[align] == NULL) {
417 float_F[align] = tp = new_type_primitive(mode);
418 /* set the specified alignment */
419 set_type_alignment_bytes(tp, align);
421 return float_F[align];
422 } else if (mode == mode_D) {
423 static ir_type *float_D[16] = {NULL, };
425 if (float_D[align] == NULL) {
426 float_D[align] = tp = new_type_primitive(mode);
427 /* set the specified alignment */
428 set_type_alignment_bytes(tp, align);
430 return float_D[align];
432 static ir_type *float_E[16] = {NULL, };
434 if (float_E[align] == NULL) {
435 float_E[align] = tp = new_type_primitive(mode);
436 /* set the specified alignment */
437 set_type_alignment_bytes(tp, align);
439 return float_E[align];
444 * Create a float[2] array type for the given atomic type.
446 * @param tp the atomic type
448 static ir_type *ia32_create_float_array(ir_type *tp)
450 ir_mode *mode = get_type_mode(tp);
451 unsigned align = get_type_alignment_bytes(tp);
456 if (mode == mode_F) {
457 static ir_type *float_F[16] = {NULL, };
459 if (float_F[align] != NULL)
460 return float_F[align];
461 arr = float_F[align] = new_type_array(1, tp);
462 } else if (mode == mode_D) {
463 static ir_type *float_D[16] = {NULL, };
465 if (float_D[align] != NULL)
466 return float_D[align];
467 arr = float_D[align] = new_type_array(1, tp);
469 static ir_type *float_E[16] = {NULL, };
471 if (float_E[align] != NULL)
472 return float_E[align];
473 arr = float_E[align] = new_type_array(1, tp);
475 set_type_alignment_bytes(arr, align);
476 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
477 set_type_state(arr, layout_fixed);
481 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
482 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
484 static const struct {
485 const char *ent_name;
486 const char *cnst_str;
489 } names [ia32_known_const_max] = {
490 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
491 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
492 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
493 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
494 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
496 static ir_entity *ent_cache[ia32_known_const_max];
498 const char *ent_name, *cnst_str;
504 ent_name = names[kct].ent_name;
505 if (! ent_cache[kct]) {
506 cnst_str = names[kct].cnst_str;
508 switch (names[kct].mode) {
509 case 0: mode = mode_Iu; break;
510 case 1: mode = mode_Lu; break;
511 default: mode = mode_F; break;
513 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
514 tp = ia32_create_float_type(mode, names[kct].align);
516 if (kct == ia32_ULLBIAS)
517 tp = ia32_create_float_array(tp);
518 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
520 set_entity_ld_ident(ent, get_entity_ident(ent));
521 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
522 set_entity_visibility(ent, ir_visibility_private);
524 if (kct == ia32_ULLBIAS) {
525 ir_initializer_t *initializer = create_initializer_compound(2);
527 set_initializer_compound_value(initializer, 0,
528 create_initializer_tarval(get_mode_null(mode)));
529 set_initializer_compound_value(initializer, 1,
530 create_initializer_tarval(tv));
532 set_entity_initializer(ent, initializer);
534 set_entity_initializer(ent, create_initializer_tarval(tv));
537 /* cache the entry */
538 ent_cache[kct] = ent;
541 return ent_cache[kct];
545 * return true if the node is a Proj(Load) and could be used in source address
546 * mode for another node. Will return only true if the @p other node is not
547 * dependent on the memory of the Load (for binary operations use the other
548 * input here, for unary operations use NULL).
550 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
551 ir_node *other, ir_node *other2, match_flags_t flags)
556 /* float constants are always available */
557 if (is_Const(node)) {
558 ir_mode *mode = get_irn_mode(node);
559 if (mode_is_float(mode)) {
560 if (ia32_cg_config.use_sse2) {
561 if (is_simple_sse_Const(node))
564 if (is_simple_x87_Const(node))
567 if (get_irn_n_edges(node) > 1)
575 load = get_Proj_pred(node);
576 pn = get_Proj_proj(node);
577 if (!is_Load(load) || pn != pn_Load_res)
579 if (get_nodes_block(load) != block)
581 /* we only use address mode if we're the only user of the load */
582 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
584 /* in some edge cases with address mode we might reach the load normally
585 * and through some AM sequence, if it is already materialized then we
586 * can't create an AM node from it */
587 if (be_is_transformed(node))
590 /* don't do AM if other node inputs depend on the load (via mem-proj) */
591 if (other != NULL && prevents_AM(block, load, other))
594 if (other2 != NULL && prevents_AM(block, load, other2))
600 typedef struct ia32_address_mode_t ia32_address_mode_t;
601 struct ia32_address_mode_t {
606 ia32_op_type_t op_type;
610 unsigned commutative : 1;
611 unsigned ins_permuted : 1;
614 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
616 /* construct load address */
617 memset(addr, 0, sizeof(addr[0]));
618 ia32_create_address_mode(addr, ptr, 0);
620 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
621 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
622 addr->mem = be_transform_node(mem);
625 static void build_address(ia32_address_mode_t *am, ir_node *node,
626 ia32_create_am_flags_t flags)
628 ia32_address_t *addr = &am->addr;
634 /* floating point immediates */
635 if (is_Const(node)) {
636 ir_entity *entity = create_float_const_entity(node);
637 addr->base = get_symconst_base();
638 addr->index = noreg_GP;
640 addr->symconst_ent = entity;
642 am->ls_mode = get_type_mode(get_entity_type(entity));
643 am->pinned = op_pin_state_floats;
647 load = get_Proj_pred(node);
648 ptr = get_Load_ptr(load);
649 mem = get_Load_mem(load);
650 new_mem = be_transform_node(mem);
651 am->pinned = get_irn_pinned(load);
652 am->ls_mode = get_Load_mode(load);
653 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
656 /* construct load address */
657 ia32_create_address_mode(addr, ptr, flags);
659 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
660 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
664 static void set_address(ir_node *node, const ia32_address_t *addr)
666 set_ia32_am_scale(node, addr->scale);
667 set_ia32_am_sc(node, addr->symconst_ent);
668 set_ia32_am_offs_int(node, addr->offset);
669 if (addr->symconst_sign)
670 set_ia32_am_sc_sign(node);
672 set_ia32_use_frame(node);
673 set_ia32_frame_ent(node, addr->frame_entity);
677 * Apply attributes of a given address mode to a node.
679 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
681 set_address(node, &am->addr);
683 set_ia32_op_type(node, am->op_type);
684 set_ia32_ls_mode(node, am->ls_mode);
685 if (am->pinned == op_pin_state_pinned) {
686 /* beware: some nodes are already pinned and did not allow to change the state */
687 if (get_irn_pinned(node) != op_pin_state_pinned)
688 set_irn_pinned(node, op_pin_state_pinned);
691 set_ia32_commutative(node);
695 * Check, if a given node is a Down-Conv, ie. a integer Conv
696 * from a mode with a mode with more bits to a mode with lesser bits.
697 * Moreover, we return only true if the node has not more than 1 user.
699 * @param node the node
700 * @return non-zero if node is a Down-Conv
702 static int is_downconv(const ir_node *node)
710 /* we only want to skip the conv when we're the only user
711 * (because this test is used in the context of address-mode selection
712 * and we don't want to use address mode for multiple users) */
713 if (get_irn_n_edges(node) > 1)
716 src_mode = get_irn_mode(get_Conv_op(node));
717 dest_mode = get_irn_mode(node);
719 ia32_mode_needs_gp_reg(src_mode) &&
720 ia32_mode_needs_gp_reg(dest_mode) &&
721 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
724 /** Skip all Down-Conv's on a given node and return the resulting node. */
725 ir_node *ia32_skip_downconv(ir_node *node)
727 while (is_downconv(node))
728 node = get_Conv_op(node);
733 static bool is_sameconv(ir_node *node)
741 /* we only want to skip the conv when we're the only user
742 * (because this test is used in the context of address-mode selection
743 * and we don't want to use address mode for multiple users) */
744 if (get_irn_n_edges(node) > 1)
747 src_mode = get_irn_mode(get_Conv_op(node));
748 dest_mode = get_irn_mode(node);
750 ia32_mode_needs_gp_reg(src_mode) &&
751 ia32_mode_needs_gp_reg(dest_mode) &&
752 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
755 /** Skip all signedness convs */
756 static ir_node *ia32_skip_sameconv(ir_node *node)
758 while (is_sameconv(node))
759 node = get_Conv_op(node);
764 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
766 ir_mode *mode = get_irn_mode(node);
771 if (mode_is_signed(mode)) {
776 block = get_nodes_block(node);
777 dbgi = get_irn_dbg_info(node);
779 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
783 * matches operands of a node into ia32 addressing/operand modes. This covers
784 * usage of source address mode, immediates, operations with non 32-bit modes,
786 * The resulting data is filled into the @p am struct. block is the block
787 * of the node whose arguments are matched. op1, op2 are the first and second
788 * input that are matched (op1 may be NULL). other_op is another unrelated
789 * input that is not matched! but which is needed sometimes to check if AM
790 * for op1/op2 is legal.
791 * @p flags describes the supported modes of the operation in detail.
793 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
794 ir_node *op1, ir_node *op2, ir_node *other_op,
797 ia32_address_t *addr = &am->addr;
798 ir_mode *mode = get_irn_mode(op2);
799 int mode_bits = get_mode_size_bits(mode);
800 ir_node *new_op1, *new_op2;
802 unsigned commutative;
803 int use_am_and_immediates;
806 memset(am, 0, sizeof(am[0]));
808 commutative = (flags & match_commutative) != 0;
809 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
810 use_am = (flags & match_am) != 0;
811 use_immediate = (flags & match_immediate) != 0;
812 assert(!use_am_and_immediates || use_immediate);
815 assert(!commutative || op1 != NULL);
816 assert(use_am || !(flags & match_8bit_am));
817 assert(use_am || !(flags & match_16bit_am));
819 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
820 (mode_bits == 16 && !(flags & match_16bit_am))) {
824 /* we can simply skip downconvs for mode neutral nodes: the upper bits
825 * can be random for these operations */
826 if (flags & match_mode_neutral) {
827 op2 = ia32_skip_downconv(op2);
829 op1 = ia32_skip_downconv(op1);
832 op2 = ia32_skip_sameconv(op2);
834 op1 = ia32_skip_sameconv(op1);
838 /* match immediates. firm nodes are normalized: constants are always on the
841 if (!(flags & match_try_am) && use_immediate) {
842 new_op2 = try_create_Immediate(op2, 0);
845 if (new_op2 == NULL &&
846 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
847 build_address(am, op2, 0);
848 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
849 if (mode_is_float(mode)) {
850 new_op2 = ia32_new_NoReg_vfp(env_cg);
854 am->op_type = ia32_AddrModeS;
855 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
857 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
859 build_address(am, op1, 0);
861 if (mode_is_float(mode)) {
862 noreg = ia32_new_NoReg_vfp(env_cg);
867 if (new_op2 != NULL) {
870 new_op1 = be_transform_node(op2);
872 am->ins_permuted = 1;
874 am->op_type = ia32_AddrModeS;
877 am->op_type = ia32_Normal;
879 if (flags & match_try_am) {
885 mode = get_irn_mode(op2);
886 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
887 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
889 new_op2 = create_upconv(op2, NULL);
890 am->ls_mode = mode_Iu;
892 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
894 new_op2 = be_transform_node(op2);
895 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
898 if (addr->base == NULL)
899 addr->base = noreg_GP;
900 if (addr->index == NULL)
901 addr->index = noreg_GP;
902 if (addr->mem == NULL)
905 am->new_op1 = new_op1;
906 am->new_op2 = new_op2;
907 am->commutative = commutative;
911 * "Fixes" a node that uses address mode by turning it into mode_T
912 * and returning a pn_ia32_res Proj.
914 * @param node the node
915 * @param am its address mode
917 * @return a Proj(pn_ia32_res) if a memory address mode is used,
920 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
925 if (am->mem_proj == NULL)
928 /* we have to create a mode_T so the old MemProj can attach to us */
929 mode = get_irn_mode(node);
930 load = get_Proj_pred(am->mem_proj);
932 be_set_transformed_node(load, node);
934 if (mode != mode_T) {
935 set_irn_mode(node, mode_T);
936 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
943 * Construct a standard binary operation, set AM and immediate if required.
945 * @param node The original node for which the binop is created
946 * @param op1 The first operand
947 * @param op2 The second operand
948 * @param func The node constructor function
949 * @return The constructed ia32 node.
951 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
952 construct_binop_func *func, match_flags_t flags)
955 ir_node *block, *new_block, *new_node;
956 ia32_address_mode_t am;
957 ia32_address_t *addr = &am.addr;
959 block = get_nodes_block(node);
960 match_arguments(&am, block, op1, op2, NULL, flags);
962 dbgi = get_irn_dbg_info(node);
963 new_block = be_transform_node(block);
964 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
965 am.new_op1, am.new_op2);
966 set_am_attributes(new_node, &am);
967 /* we can't use source address mode anymore when using immediates */
968 if (!(flags & match_am_and_immediates) &&
969 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
970 set_ia32_am_support(new_node, ia32_am_none);
971 SET_IA32_ORIG_NODE(new_node, node);
973 new_node = fix_mem_proj(new_node, &am);
979 * Generic names for the inputs of an ia32 binary op.
982 n_ia32_l_binop_left, /**< ia32 left input */
983 n_ia32_l_binop_right, /**< ia32 right input */
984 n_ia32_l_binop_eflags /**< ia32 eflags input */
986 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
987 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
988 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
989 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
990 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
991 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
994 * Construct a binary operation which also consumes the eflags.
996 * @param node The node to transform
997 * @param func The node constructor function
998 * @param flags The match flags
999 * @return The constructor ia32 node
1001 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1002 match_flags_t flags)
1004 ir_node *src_block = get_nodes_block(node);
1005 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1006 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1007 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1009 ir_node *block, *new_node, *new_eflags;
1010 ia32_address_mode_t am;
1011 ia32_address_t *addr = &am.addr;
1013 match_arguments(&am, src_block, op1, op2, eflags, flags);
1015 dbgi = get_irn_dbg_info(node);
1016 block = be_transform_node(src_block);
1017 new_eflags = be_transform_node(eflags);
1018 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1019 am.new_op1, am.new_op2, new_eflags);
1020 set_am_attributes(new_node, &am);
1021 /* we can't use source address mode anymore when using immediates */
1022 if (!(flags & match_am_and_immediates) &&
1023 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1024 set_ia32_am_support(new_node, ia32_am_none);
1025 SET_IA32_ORIG_NODE(new_node, node);
1027 new_node = fix_mem_proj(new_node, &am);
1032 static ir_node *get_fpcw(void)
1035 if (initial_fpcw != NULL)
1036 return initial_fpcw;
1038 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(env_cg->irg),
1039 &ia32_fp_cw_regs[REG_FPCW]);
1040 initial_fpcw = be_transform_node(fpcw);
1042 return initial_fpcw;
1046 * Construct a standard binary operation, set AM and immediate if required.
1048 * @param op1 The first operand
1049 * @param op2 The second operand
1050 * @param func The node constructor function
1051 * @return The constructed ia32 node.
1053 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1054 construct_binop_float_func *func)
1056 ir_mode *mode = get_irn_mode(node);
1058 ir_node *block, *new_block, *new_node;
1059 ia32_address_mode_t am;
1060 ia32_address_t *addr = &am.addr;
1061 ia32_x87_attr_t *attr;
1062 /* All operations are considered commutative, because there are reverse
1064 match_flags_t flags = match_commutative;
1066 /* happens for div nodes... */
1068 mode = get_divop_resmod(node);
1070 /* cannot use address mode with long double on x87 */
1071 if (get_mode_size_bits(mode) <= 64)
1074 block = get_nodes_block(node);
1075 match_arguments(&am, block, op1, op2, NULL, flags);
1077 dbgi = get_irn_dbg_info(node);
1078 new_block = be_transform_node(block);
1079 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1080 am.new_op1, am.new_op2, get_fpcw());
1081 set_am_attributes(new_node, &am);
1083 attr = get_ia32_x87_attr(new_node);
1084 attr->attr.data.ins_permuted = am.ins_permuted;
1086 SET_IA32_ORIG_NODE(new_node, node);
1088 new_node = fix_mem_proj(new_node, &am);
1094 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1096 * @param op1 The first operand
1097 * @param op2 The second operand
1098 * @param func The node constructor function
1099 * @return The constructed ia32 node.
1101 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1102 construct_shift_func *func,
1103 match_flags_t flags)
1106 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1108 assert(! mode_is_float(get_irn_mode(node)));
1109 assert(flags & match_immediate);
1110 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1112 if (flags & match_mode_neutral) {
1113 op1 = ia32_skip_downconv(op1);
1114 new_op1 = be_transform_node(op1);
1115 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1116 new_op1 = create_upconv(op1, node);
1118 new_op1 = be_transform_node(op1);
1121 /* the shift amount can be any mode that is bigger than 5 bits, since all
1122 * other bits are ignored anyway */
1123 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1124 ir_node *const op = get_Conv_op(op2);
1125 if (mode_is_float(get_irn_mode(op)))
1128 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1130 new_op2 = create_immediate_or_transform(op2, 0);
1132 dbgi = get_irn_dbg_info(node);
1133 block = get_nodes_block(node);
1134 new_block = be_transform_node(block);
1135 new_node = func(dbgi, new_block, new_op1, new_op2);
1136 SET_IA32_ORIG_NODE(new_node, node);
1138 /* lowered shift instruction may have a dependency operand, handle it here */
1139 if (get_irn_arity(node) == 3) {
1140 /* we have a dependency */
1141 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1142 add_irn_dep(new_node, new_dep);
1150 * Construct a standard unary operation, set AM and immediate if required.
1152 * @param op The operand
1153 * @param func The node constructor function
1154 * @return The constructed ia32 node.
1156 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1157 match_flags_t flags)
1160 ir_node *block, *new_block, *new_op, *new_node;
1162 assert(flags == 0 || flags == match_mode_neutral);
1163 if (flags & match_mode_neutral) {
1164 op = ia32_skip_downconv(op);
1167 new_op = be_transform_node(op);
1168 dbgi = get_irn_dbg_info(node);
1169 block = get_nodes_block(node);
1170 new_block = be_transform_node(block);
1171 new_node = func(dbgi, new_block, new_op);
1173 SET_IA32_ORIG_NODE(new_node, node);
1178 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1179 ia32_address_t *addr)
1181 ir_node *base, *index, *res;
1187 base = be_transform_node(base);
1190 index = addr->index;
1191 if (index == NULL) {
1194 index = be_transform_node(index);
1197 res = new_bd_ia32_Lea(dbgi, block, base, index);
1198 set_address(res, addr);
1204 * Returns non-zero if a given address mode has a symbolic or
1205 * numerical offset != 0.
1207 static int am_has_immediates(const ia32_address_t *addr)
1209 return addr->offset != 0 || addr->symconst_ent != NULL
1210 || addr->frame_entity || addr->use_frame;
1214 * Creates an ia32 Add.
1216 * @return the created ia32 Add node
1218 static ir_node *gen_Add(ir_node *node)
1220 ir_mode *mode = get_irn_mode(node);
1221 ir_node *op1 = get_Add_left(node);
1222 ir_node *op2 = get_Add_right(node);
1224 ir_node *block, *new_block, *new_node, *add_immediate_op;
1225 ia32_address_t addr;
1226 ia32_address_mode_t am;
1228 if (mode_is_float(mode)) {
1229 if (ia32_cg_config.use_sse2)
1230 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1231 match_commutative | match_am);
1233 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1236 ia32_mark_non_am(node);
1238 op2 = ia32_skip_downconv(op2);
1239 op1 = ia32_skip_downconv(op1);
1243 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1244 * 1. Add with immediate -> Lea
1245 * 2. Add with possible source address mode -> Add
1246 * 3. Otherwise -> Lea
1248 memset(&addr, 0, sizeof(addr));
1249 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1250 add_immediate_op = NULL;
1252 dbgi = get_irn_dbg_info(node);
1253 block = get_nodes_block(node);
1254 new_block = be_transform_node(block);
1257 if (addr.base == NULL && addr.index == NULL) {
1258 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1259 addr.symconst_sign, 0, addr.offset);
1260 be_dep_on_frame(new_node);
1261 SET_IA32_ORIG_NODE(new_node, node);
1264 /* add with immediate? */
1265 if (addr.index == NULL) {
1266 add_immediate_op = addr.base;
1267 } else if (addr.base == NULL && addr.scale == 0) {
1268 add_immediate_op = addr.index;
1271 if (add_immediate_op != NULL) {
1272 if (!am_has_immediates(&addr)) {
1273 #ifdef DEBUG_libfirm
1274 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1277 return be_transform_node(add_immediate_op);
1280 new_node = create_lea_from_address(dbgi, new_block, &addr);
1281 SET_IA32_ORIG_NODE(new_node, node);
1285 /* test if we can use source address mode */
1286 match_arguments(&am, block, op1, op2, NULL, match_commutative
1287 | match_mode_neutral | match_am | match_immediate | match_try_am);
1289 /* construct an Add with source address mode */
1290 if (am.op_type == ia32_AddrModeS) {
1291 ia32_address_t *am_addr = &am.addr;
1292 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1293 am_addr->index, am_addr->mem, am.new_op1,
1295 set_am_attributes(new_node, &am);
1296 SET_IA32_ORIG_NODE(new_node, node);
1298 new_node = fix_mem_proj(new_node, &am);
1303 /* otherwise construct a lea */
1304 new_node = create_lea_from_address(dbgi, new_block, &addr);
1305 SET_IA32_ORIG_NODE(new_node, node);
1310 * Creates an ia32 Mul.
1312 * @return the created ia32 Mul node
1314 static ir_node *gen_Mul(ir_node *node)
1316 ir_node *op1 = get_Mul_left(node);
1317 ir_node *op2 = get_Mul_right(node);
1318 ir_mode *mode = get_irn_mode(node);
1320 if (mode_is_float(mode)) {
1321 if (ia32_cg_config.use_sse2)
1322 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1323 match_commutative | match_am);
1325 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1327 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1328 match_commutative | match_am | match_mode_neutral |
1329 match_immediate | match_am_and_immediates);
1333 * Creates an ia32 Mulh.
1334 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1335 * this result while Mul returns the lower 32 bit.
1337 * @return the created ia32 Mulh node
1339 static ir_node *gen_Mulh(ir_node *node)
1341 dbg_info *dbgi = get_irn_dbg_info(node);
1342 ir_node *op1 = get_Mulh_left(node);
1343 ir_node *op2 = get_Mulh_right(node);
1344 ir_mode *mode = get_irn_mode(node);
1346 ir_node *proj_res_high;
1348 if (get_mode_size_bits(mode) != 32) {
1349 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1352 if (mode_is_signed(mode)) {
1353 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1354 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1356 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1357 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1359 return proj_res_high;
1363 * Creates an ia32 And.
1365 * @return The created ia32 And node
1367 static ir_node *gen_And(ir_node *node)
1369 ir_node *op1 = get_And_left(node);
1370 ir_node *op2 = get_And_right(node);
1371 assert(! mode_is_float(get_irn_mode(node)));
1373 /* is it a zero extension? */
1374 if (is_Const(op2)) {
1375 tarval *tv = get_Const_tarval(op2);
1376 long v = get_tarval_long(tv);
1378 if (v == 0xFF || v == 0xFFFF) {
1379 dbg_info *dbgi = get_irn_dbg_info(node);
1380 ir_node *block = get_nodes_block(node);
1387 assert(v == 0xFFFF);
1390 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1395 return gen_binop(node, op1, op2, new_bd_ia32_And,
1396 match_commutative | match_mode_neutral | match_am | match_immediate);
1402 * Creates an ia32 Or.
1404 * @return The created ia32 Or node
1406 static ir_node *gen_Or(ir_node *node)
1408 ir_node *op1 = get_Or_left(node);
1409 ir_node *op2 = get_Or_right(node);
1411 assert (! mode_is_float(get_irn_mode(node)));
1412 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1413 | match_mode_neutral | match_am | match_immediate);
1419 * Creates an ia32 Eor.
1421 * @return The created ia32 Eor node
1423 static ir_node *gen_Eor(ir_node *node)
1425 ir_node *op1 = get_Eor_left(node);
1426 ir_node *op2 = get_Eor_right(node);
1428 assert(! mode_is_float(get_irn_mode(node)));
1429 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1430 | match_mode_neutral | match_am | match_immediate);
1435 * Creates an ia32 Sub.
1437 * @return The created ia32 Sub node
1439 static ir_node *gen_Sub(ir_node *node)
1441 ir_node *op1 = get_Sub_left(node);
1442 ir_node *op2 = get_Sub_right(node);
1443 ir_mode *mode = get_irn_mode(node);
1445 if (mode_is_float(mode)) {
1446 if (ia32_cg_config.use_sse2)
1447 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1449 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1452 if (is_Const(op2)) {
1453 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1457 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1458 | match_am | match_immediate);
1461 static ir_node *transform_AM_mem(ir_node *const block,
1462 ir_node *const src_val,
1463 ir_node *const src_mem,
1464 ir_node *const am_mem)
1466 if (is_NoMem(am_mem)) {
1467 return be_transform_node(src_mem);
1468 } else if (is_Proj(src_val) &&
1470 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1471 /* avoid memory loop */
1473 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1474 ir_node *const ptr_pred = get_Proj_pred(src_val);
1475 int const arity = get_Sync_n_preds(src_mem);
1480 NEW_ARR_A(ir_node*, ins, arity + 1);
1482 /* NOTE: This sometimes produces dead-code because the old sync in
1483 * src_mem might not be used anymore, we should detect this case
1484 * and kill the sync... */
1485 for (i = arity - 1; i >= 0; --i) {
1486 ir_node *const pred = get_Sync_pred(src_mem, i);
1488 /* avoid memory loop */
1489 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1492 ins[n++] = be_transform_node(pred);
1497 return new_r_Sync(block, n, ins);
1501 ins[0] = be_transform_node(src_mem);
1503 return new_r_Sync(block, 2, ins);
1508 * Create a 32bit to 64bit signed extension.
1510 * @param dbgi debug info
1511 * @param block the block where node nodes should be placed
1512 * @param val the value to extend
1513 * @param orig the original node
1515 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1516 ir_node *val, const ir_node *orig)
1521 if (ia32_cg_config.use_short_sex_eax) {
1522 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1523 be_dep_on_frame(pval);
1524 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1526 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1527 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1529 SET_IA32_ORIG_NODE(res, orig);
1534 * Generates an ia32 DivMod with additional infrastructure for the
1535 * register allocator if needed.
1537 static ir_node *create_Div(ir_node *node)
1539 dbg_info *dbgi = get_irn_dbg_info(node);
1540 ir_node *block = get_nodes_block(node);
1541 ir_node *new_block = be_transform_node(block);
1548 ir_node *sign_extension;
1549 ia32_address_mode_t am;
1550 ia32_address_t *addr = &am.addr;
1552 /* the upper bits have random contents for smaller modes */
1553 switch (get_irn_opcode(node)) {
1555 op1 = get_Div_left(node);
1556 op2 = get_Div_right(node);
1557 mem = get_Div_mem(node);
1558 mode = get_Div_resmode(node);
1561 op1 = get_Mod_left(node);
1562 op2 = get_Mod_right(node);
1563 mem = get_Mod_mem(node);
1564 mode = get_Mod_resmode(node);
1567 op1 = get_DivMod_left(node);
1568 op2 = get_DivMod_right(node);
1569 mem = get_DivMod_mem(node);
1570 mode = get_DivMod_resmode(node);
1573 panic("invalid divmod node %+F", node);
1576 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1578 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1579 is the memory of the consumed address. We can have only the second op as address
1580 in Div nodes, so check only op2. */
1581 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1583 if (mode_is_signed(mode)) {
1584 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1585 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1586 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1588 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1589 be_dep_on_frame(sign_extension);
1591 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1592 addr->index, new_mem, am.new_op2,
1593 am.new_op1, sign_extension);
1596 set_irn_pinned(new_node, get_irn_pinned(node));
1598 set_am_attributes(new_node, &am);
1599 SET_IA32_ORIG_NODE(new_node, node);
1601 new_node = fix_mem_proj(new_node, &am);
1607 * Generates an ia32 Mod.
1609 static ir_node *gen_Mod(ir_node *node)
1611 return create_Div(node);
1615 * Generates an ia32 Div.
1617 static ir_node *gen_Div(ir_node *node)
1619 return create_Div(node);
1623 * Generates an ia32 DivMod.
1625 static ir_node *gen_DivMod(ir_node *node)
1627 return create_Div(node);
1633 * Creates an ia32 floating Div.
1635 * @return The created ia32 xDiv node
1637 static ir_node *gen_Quot(ir_node *node)
1639 ir_node *op1 = get_Quot_left(node);
1640 ir_node *op2 = get_Quot_right(node);
1642 if (ia32_cg_config.use_sse2) {
1643 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1645 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1651 * Creates an ia32 Shl.
1653 * @return The created ia32 Shl node
1655 static ir_node *gen_Shl(ir_node *node)
1657 ir_node *left = get_Shl_left(node);
1658 ir_node *right = get_Shl_right(node);
1660 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1661 match_mode_neutral | match_immediate);
1665 * Creates an ia32 Shr.
1667 * @return The created ia32 Shr node
1669 static ir_node *gen_Shr(ir_node *node)
1671 ir_node *left = get_Shr_left(node);
1672 ir_node *right = get_Shr_right(node);
1674 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1680 * Creates an ia32 Sar.
1682 * @return The created ia32 Shrs node
1684 static ir_node *gen_Shrs(ir_node *node)
1686 ir_node *left = get_Shrs_left(node);
1687 ir_node *right = get_Shrs_right(node);
1689 if (is_Const(right)) {
1690 tarval *tv = get_Const_tarval(right);
1691 long val = get_tarval_long(tv);
1693 /* this is a sign extension */
1694 dbg_info *dbgi = get_irn_dbg_info(node);
1695 ir_node *block = be_transform_node(get_nodes_block(node));
1696 ir_node *new_op = be_transform_node(left);
1698 return create_sex_32_64(dbgi, block, new_op, node);
1702 /* 8 or 16 bit sign extension? */
1703 if (is_Const(right) && is_Shl(left)) {
1704 ir_node *shl_left = get_Shl_left(left);
1705 ir_node *shl_right = get_Shl_right(left);
1706 if (is_Const(shl_right)) {
1707 tarval *tv1 = get_Const_tarval(right);
1708 tarval *tv2 = get_Const_tarval(shl_right);
1709 if (tv1 == tv2 && tarval_is_long(tv1)) {
1710 long val = get_tarval_long(tv1);
1711 if (val == 16 || val == 24) {
1712 dbg_info *dbgi = get_irn_dbg_info(node);
1713 ir_node *block = get_nodes_block(node);
1723 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1732 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1738 * Creates an ia32 Rol.
1740 * @param op1 The first operator
1741 * @param op2 The second operator
1742 * @return The created ia32 RotL node
1744 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1746 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1752 * Creates an ia32 Ror.
1753 * NOTE: There is no RotR with immediate because this would always be a RotL
1754 * "imm-mode_size_bits" which can be pre-calculated.
1756 * @param op1 The first operator
1757 * @param op2 The second operator
1758 * @return The created ia32 RotR node
1760 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1762 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1768 * Creates an ia32 RotR or RotL (depending on the found pattern).
1770 * @return The created ia32 RotL or RotR node
1772 static ir_node *gen_Rotl(ir_node *node)
1774 ir_node *op1 = get_Rotl_left(node);
1775 ir_node *op2 = get_Rotl_right(node);
1777 if (is_Minus(op2)) {
1778 return gen_Ror(node, op1, get_Minus_op(op2));
1781 return gen_Rol(node, op1, op2);
1787 * Transforms a Minus node.
1789 * @return The created ia32 Minus node
1791 static ir_node *gen_Minus(ir_node *node)
1793 ir_node *op = get_Minus_op(node);
1794 ir_node *block = be_transform_node(get_nodes_block(node));
1795 dbg_info *dbgi = get_irn_dbg_info(node);
1796 ir_mode *mode = get_irn_mode(node);
1801 if (mode_is_float(mode)) {
1802 ir_node *new_op = be_transform_node(op);
1803 if (ia32_cg_config.use_sse2) {
1804 /* TODO: non-optimal... if we have many xXors, then we should
1805 * rather create a load for the const and use that instead of
1806 * several AM nodes... */
1807 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1809 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1810 noreg_GP, nomem, new_op, noreg_xmm);
1812 size = get_mode_size_bits(mode);
1813 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1815 set_ia32_am_sc(new_node, ent);
1816 set_ia32_op_type(new_node, ia32_AddrModeS);
1817 set_ia32_ls_mode(new_node, mode);
1819 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1822 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1825 SET_IA32_ORIG_NODE(new_node, node);
1831 * Transforms a Not node.
1833 * @return The created ia32 Not node
1835 static ir_node *gen_Not(ir_node *node)
1837 ir_node *op = get_Not_op(node);
1839 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1840 assert (! mode_is_float(get_irn_mode(node)));
1842 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1845 static ir_node *create_abs(dbg_info *dbgi, ir_node *block, ir_node *op,
1846 bool negate, ir_node *node)
1848 ir_node *new_block = be_transform_node(block);
1849 ir_mode *mode = get_irn_mode(op);
1855 if (mode_is_float(mode)) {
1856 new_op = be_transform_node(op);
1858 if (ia32_cg_config.use_sse2) {
1859 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1860 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1861 noreg_GP, nomem, new_op, noreg_fp);
1863 size = get_mode_size_bits(mode);
1864 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1866 set_ia32_am_sc(new_node, ent);
1868 SET_IA32_ORIG_NODE(new_node, node);
1870 set_ia32_op_type(new_node, ia32_AddrModeS);
1871 set_ia32_ls_mode(new_node, mode);
1873 /* TODO, implement -Abs case */
1876 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1877 SET_IA32_ORIG_NODE(new_node, node);
1879 new_node = new_bd_ia32_vfchs(dbgi, new_block, new_node);
1880 SET_IA32_ORIG_NODE(new_node, node);
1885 ir_node *sign_extension;
1887 if (get_mode_size_bits(mode) == 32) {
1888 new_op = be_transform_node(op);
1890 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1893 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1895 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1896 nomem, new_op, sign_extension);
1897 SET_IA32_ORIG_NODE(xor, node);
1900 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1901 nomem, sign_extension, xor);
1903 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1904 nomem, xor, sign_extension);
1906 SET_IA32_ORIG_NODE(new_node, node);
1913 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1915 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1917 dbg_info *dbgi = get_irn_dbg_info(cmp);
1918 ir_node *block = get_nodes_block(cmp);
1919 ir_node *new_block = be_transform_node(block);
1920 ir_node *op1 = be_transform_node(x);
1921 ir_node *op2 = be_transform_node(n);
1923 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1927 * Transform a node returning a "flag" result.
1929 * @param node the node to transform
1930 * @param pnc_out the compare mode to use
1932 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1939 /* we have a Cmp as input */
1940 if (is_Proj(node)) {
1941 ir_node *pred = get_Proj_pred(node);
1943 pn_Cmp pnc = get_Proj_proj(node);
1944 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1945 ir_node *l = get_Cmp_left(pred);
1946 ir_node *r = get_Cmp_right(pred);
1948 ir_node *la = get_And_left(l);
1949 ir_node *ra = get_And_right(l);
1951 ir_node *c = get_Shl_left(la);
1952 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1953 /* (1 << n) & ra) */
1954 ir_node *n = get_Shl_right(la);
1955 flags = gen_bt(pred, ra, n);
1956 /* we must generate a Jc/Jnc jump */
1957 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1960 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1965 ir_node *c = get_Shl_left(ra);
1966 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1967 /* la & (1 << n)) */
1968 ir_node *n = get_Shl_right(ra);
1969 flags = gen_bt(pred, la, n);
1970 /* we must generate a Jc/Jnc jump */
1971 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1974 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1980 /* add ia32 compare flags */
1982 ir_node *l = get_Cmp_left(pred);
1983 ir_mode *mode = get_irn_mode(l);
1984 if (mode_is_float(mode))
1985 pnc |= ia32_pn_Cmp_float;
1986 else if (! mode_is_signed(mode))
1987 pnc |= ia32_pn_Cmp_unsigned;
1990 flags = be_transform_node(pred);
1995 /* a mode_b value, we have to compare it against 0 */
1996 dbgi = get_irn_dbg_info(node);
1997 new_block = be_transform_node(get_nodes_block(node));
1998 new_op = be_transform_node(node);
1999 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
2000 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
2001 *pnc_out = pn_Cmp_Lg;
2006 * Transforms a Load.
2008 * @return the created ia32 Load node
2010 static ir_node *gen_Load(ir_node *node)
2012 ir_node *old_block = get_nodes_block(node);
2013 ir_node *block = be_transform_node(old_block);
2014 ir_node *ptr = get_Load_ptr(node);
2015 ir_node *mem = get_Load_mem(node);
2016 ir_node *new_mem = be_transform_node(mem);
2019 dbg_info *dbgi = get_irn_dbg_info(node);
2020 ir_mode *mode = get_Load_mode(node);
2022 ia32_address_t addr;
2024 /* construct load address */
2025 memset(&addr, 0, sizeof(addr));
2026 ia32_create_address_mode(&addr, ptr, 0);
2033 base = be_transform_node(base);
2036 if (index == NULL) {
2039 index = be_transform_node(index);
2042 if (mode_is_float(mode)) {
2043 if (ia32_cg_config.use_sse2) {
2044 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2047 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2051 assert(mode != mode_b);
2053 /* create a conv node with address mode for smaller modes */
2054 if (get_mode_size_bits(mode) < 32) {
2055 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2056 new_mem, noreg_GP, mode);
2058 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2062 set_irn_pinned(new_node, get_irn_pinned(node));
2063 set_ia32_op_type(new_node, ia32_AddrModeS);
2064 set_ia32_ls_mode(new_node, mode);
2065 set_address(new_node, &addr);
2067 if (get_irn_pinned(node) == op_pin_state_floats) {
2068 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2069 && pn_ia32_vfld_res == pn_ia32_Load_res
2070 && pn_ia32_Load_res == pn_ia32_res);
2071 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2074 SET_IA32_ORIG_NODE(new_node, node);
2076 be_dep_on_frame(new_node);
2080 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2081 ir_node *ptr, ir_node *other)
2088 /* we only use address mode if we're the only user of the load */
2089 if (get_irn_n_edges(node) > 1)
2092 load = get_Proj_pred(node);
2095 if (get_nodes_block(load) != block)
2098 /* store should have the same pointer as the load */
2099 if (get_Load_ptr(load) != ptr)
2102 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2103 if (other != NULL &&
2104 get_nodes_block(other) == block &&
2105 heights_reachable_in_block(heights, other, load)) {
2109 if (prevents_AM(block, load, mem))
2111 /* Store should be attached to the load via mem */
2112 assert(heights_reachable_in_block(heights, mem, load));
2117 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2118 ir_node *mem, ir_node *ptr, ir_mode *mode,
2119 construct_binop_dest_func *func,
2120 construct_binop_dest_func *func8bit,
2121 match_flags_t flags)
2123 ir_node *src_block = get_nodes_block(node);
2131 ia32_address_mode_t am;
2132 ia32_address_t *addr = &am.addr;
2133 memset(&am, 0, sizeof(am));
2135 assert(flags & match_immediate); /* there is no destam node without... */
2136 commutative = (flags & match_commutative) != 0;
2138 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2139 build_address(&am, op1, ia32_create_am_double_use);
2140 new_op = create_immediate_or_transform(op2, 0);
2141 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2142 build_address(&am, op2, ia32_create_am_double_use);
2143 new_op = create_immediate_or_transform(op1, 0);
2148 if (addr->base == NULL)
2149 addr->base = noreg_GP;
2150 if (addr->index == NULL)
2151 addr->index = noreg_GP;
2152 if (addr->mem == NULL)
2155 dbgi = get_irn_dbg_info(node);
2156 block = be_transform_node(src_block);
2157 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2159 if (get_mode_size_bits(mode) == 8) {
2160 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2162 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2164 set_address(new_node, addr);
2165 set_ia32_op_type(new_node, ia32_AddrModeD);
2166 set_ia32_ls_mode(new_node, mode);
2167 SET_IA32_ORIG_NODE(new_node, node);
2169 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2170 mem_proj = be_transform_node(am.mem_proj);
2171 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2176 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2177 ir_node *ptr, ir_mode *mode,
2178 construct_unop_dest_func *func)
2180 ir_node *src_block = get_nodes_block(node);
2186 ia32_address_mode_t am;
2187 ia32_address_t *addr = &am.addr;
2189 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2192 memset(&am, 0, sizeof(am));
2193 build_address(&am, op, ia32_create_am_double_use);
2195 dbgi = get_irn_dbg_info(node);
2196 block = be_transform_node(src_block);
2197 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2198 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2199 set_address(new_node, addr);
2200 set_ia32_op_type(new_node, ia32_AddrModeD);
2201 set_ia32_ls_mode(new_node, mode);
2202 SET_IA32_ORIG_NODE(new_node, node);
2204 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2205 mem_proj = be_transform_node(am.mem_proj);
2206 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2211 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2213 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2214 return get_negated_pnc(pnc, mode);
2217 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2219 ir_mode *mode = get_irn_mode(node);
2220 ir_node *mux_true = get_Mux_true(node);
2221 ir_node *mux_false = get_Mux_false(node);
2230 ia32_address_t addr;
2232 if (get_mode_size_bits(mode) != 8)
2235 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2237 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2243 cond = get_Mux_sel(node);
2244 flags = get_flags_node(cond, &pnc);
2245 /* we can't handle the float special cases with SetM */
2246 if (pnc & ia32_pn_Cmp_float)
2249 pnc = ia32_get_negated_pnc(pnc);
2251 build_address_ptr(&addr, ptr, mem);
2253 dbgi = get_irn_dbg_info(node);
2254 block = get_nodes_block(node);
2255 new_block = be_transform_node(block);
2256 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2257 addr.index, addr.mem, flags, pnc);
2258 set_address(new_node, &addr);
2259 set_ia32_op_type(new_node, ia32_AddrModeD);
2260 set_ia32_ls_mode(new_node, mode);
2261 SET_IA32_ORIG_NODE(new_node, node);
2266 static ir_node *try_create_dest_am(ir_node *node)
2268 ir_node *val = get_Store_value(node);
2269 ir_node *mem = get_Store_mem(node);
2270 ir_node *ptr = get_Store_ptr(node);
2271 ir_mode *mode = get_irn_mode(val);
2272 unsigned bits = get_mode_size_bits(mode);
2277 /* handle only GP modes for now... */
2278 if (!ia32_mode_needs_gp_reg(mode))
2282 /* store must be the only user of the val node */
2283 if (get_irn_n_edges(val) > 1)
2285 /* skip pointless convs */
2287 ir_node *conv_op = get_Conv_op(val);
2288 ir_mode *pred_mode = get_irn_mode(conv_op);
2289 if (!ia32_mode_needs_gp_reg(pred_mode))
2291 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2299 /* value must be in the same block */
2300 if (get_nodes_block(node) != get_nodes_block(val))
2303 switch (get_irn_opcode(val)) {
2305 op1 = get_Add_left(val);
2306 op2 = get_Add_right(val);
2307 if (ia32_cg_config.use_incdec) {
2308 if (is_Const_1(op2)) {
2309 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2311 } else if (is_Const_Minus_1(op2)) {
2312 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2316 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2317 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2318 match_commutative | match_immediate);
2321 op1 = get_Sub_left(val);
2322 op2 = get_Sub_right(val);
2323 if (is_Const(op2)) {
2324 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2326 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2327 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2331 op1 = get_And_left(val);
2332 op2 = get_And_right(val);
2333 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2334 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2335 match_commutative | match_immediate);
2338 op1 = get_Or_left(val);
2339 op2 = get_Or_right(val);
2340 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2341 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2342 match_commutative | match_immediate);
2345 op1 = get_Eor_left(val);
2346 op2 = get_Eor_right(val);
2347 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2348 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2349 match_commutative | match_immediate);
2352 op1 = get_Shl_left(val);
2353 op2 = get_Shl_right(val);
2354 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2355 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2359 op1 = get_Shr_left(val);
2360 op2 = get_Shr_right(val);
2361 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2362 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2366 op1 = get_Shrs_left(val);
2367 op2 = get_Shrs_right(val);
2368 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2369 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2373 op1 = get_Rotl_left(val);
2374 op2 = get_Rotl_right(val);
2375 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2376 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2379 /* TODO: match ROR patterns... */
2381 new_node = try_create_SetMem(val, ptr, mem);
2385 op1 = get_Minus_op(val);
2386 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2389 /* should be lowered already */
2390 assert(mode != mode_b);
2391 op1 = get_Not_op(val);
2392 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2398 if (new_node != NULL) {
2399 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2400 get_irn_pinned(node) == op_pin_state_pinned) {
2401 set_irn_pinned(new_node, op_pin_state_pinned);
2408 static bool possible_int_mode_for_fp(ir_mode *mode)
2412 if (!mode_is_signed(mode))
2414 size = get_mode_size_bits(mode);
2415 if (size != 16 && size != 32)
2420 static int is_float_to_int_conv(const ir_node *node)
2422 ir_mode *mode = get_irn_mode(node);
2426 if (!possible_int_mode_for_fp(mode))
2431 conv_op = get_Conv_op(node);
2432 conv_mode = get_irn_mode(conv_op);
2434 if (!mode_is_float(conv_mode))
2441 * Transform a Store(floatConst) into a sequence of
2444 * @return the created ia32 Store node
2446 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2448 ir_mode *mode = get_irn_mode(cns);
2449 unsigned size = get_mode_size_bytes(mode);
2450 tarval *tv = get_Const_tarval(cns);
2451 ir_node *block = get_nodes_block(node);
2452 ir_node *new_block = be_transform_node(block);
2453 ir_node *ptr = get_Store_ptr(node);
2454 ir_node *mem = get_Store_mem(node);
2455 dbg_info *dbgi = get_irn_dbg_info(node);
2459 ia32_address_t addr;
2461 assert(size % 4 == 0);
2464 build_address_ptr(&addr, ptr, mem);
2468 get_tarval_sub_bits(tv, ofs) |
2469 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2470 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2471 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2472 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2474 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2475 addr.index, addr.mem, imm);
2477 set_irn_pinned(new_node, get_irn_pinned(node));
2478 set_ia32_op_type(new_node, ia32_AddrModeD);
2479 set_ia32_ls_mode(new_node, mode_Iu);
2480 set_address(new_node, &addr);
2481 SET_IA32_ORIG_NODE(new_node, node);
2484 ins[i++] = new_node;
2489 } while (size != 0);
2492 return new_rd_Sync(dbgi, new_block, i, ins);
2499 * Generate a vfist or vfisttp instruction.
2501 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2502 ir_node *mem, ir_node *val, ir_node **fist)
2506 if (ia32_cg_config.use_fisttp) {
2507 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2508 if other users exists */
2509 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2510 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2511 be_new_Keep(block, 1, &value);
2513 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2516 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2519 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2525 * Transforms a general (no special case) Store.
2527 * @return the created ia32 Store node
2529 static ir_node *gen_general_Store(ir_node *node)
2531 ir_node *val = get_Store_value(node);
2532 ir_mode *mode = get_irn_mode(val);
2533 ir_node *block = get_nodes_block(node);
2534 ir_node *new_block = be_transform_node(block);
2535 ir_node *ptr = get_Store_ptr(node);
2536 ir_node *mem = get_Store_mem(node);
2537 dbg_info *dbgi = get_irn_dbg_info(node);
2538 ir_node *new_val, *new_node, *store;
2539 ia32_address_t addr;
2541 /* check for destination address mode */
2542 new_node = try_create_dest_am(node);
2543 if (new_node != NULL)
2546 /* construct store address */
2547 memset(&addr, 0, sizeof(addr));
2548 ia32_create_address_mode(&addr, ptr, 0);
2550 if (addr.base == NULL) {
2551 addr.base = noreg_GP;
2553 addr.base = be_transform_node(addr.base);
2556 if (addr.index == NULL) {
2557 addr.index = noreg_GP;
2559 addr.index = be_transform_node(addr.index);
2561 addr.mem = be_transform_node(mem);
2563 if (mode_is_float(mode)) {
2564 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2566 while (is_Conv(val) && mode == get_irn_mode(val)) {
2567 ir_node *op = get_Conv_op(val);
2568 if (!mode_is_float(get_irn_mode(op)))
2572 new_val = be_transform_node(val);
2573 if (ia32_cg_config.use_sse2) {
2574 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2575 addr.index, addr.mem, new_val);
2577 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2578 addr.index, addr.mem, new_val, mode);
2581 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2582 val = get_Conv_op(val);
2584 /* TODO: is this optimisation still necessary at all (middleend)? */
2585 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2586 while (is_Conv(val)) {
2587 ir_node *op = get_Conv_op(val);
2588 if (!mode_is_float(get_irn_mode(op)))
2590 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2594 new_val = be_transform_node(val);
2595 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2597 new_val = create_immediate_or_transform(val, 0);
2598 assert(mode != mode_b);
2600 if (get_mode_size_bits(mode) == 8) {
2601 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2602 addr.index, addr.mem, new_val);
2604 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2605 addr.index, addr.mem, new_val);
2610 set_irn_pinned(store, get_irn_pinned(node));
2611 set_ia32_op_type(store, ia32_AddrModeD);
2612 set_ia32_ls_mode(store, mode);
2614 set_address(store, &addr);
2615 SET_IA32_ORIG_NODE(store, node);
2621 * Transforms a Store.
2623 * @return the created ia32 Store node
2625 static ir_node *gen_Store(ir_node *node)
2627 ir_node *val = get_Store_value(node);
2628 ir_mode *mode = get_irn_mode(val);
2630 if (mode_is_float(mode) && is_Const(val)) {
2631 /* We can transform every floating const store
2632 into a sequence of integer stores.
2633 If the constant is already in a register,
2634 it would be better to use it, but we don't
2635 have this information here. */
2636 return gen_float_const_Store(node, val);
2638 return gen_general_Store(node);
2642 * Transforms a Switch.
2644 * @return the created ia32 SwitchJmp node
2646 static ir_node *create_Switch(ir_node *node)
2648 dbg_info *dbgi = get_irn_dbg_info(node);
2649 ir_node *block = be_transform_node(get_nodes_block(node));
2650 ir_node *sel = get_Cond_selector(node);
2651 ir_node *new_sel = be_transform_node(sel);
2652 long switch_min = LONG_MAX;
2653 long switch_max = LONG_MIN;
2654 long default_pn = get_Cond_default_proj(node);
2656 const ir_edge_t *edge;
2658 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2660 /* determine the smallest switch case value */
2661 foreach_out_edge(node, edge) {
2662 ir_node *proj = get_edge_src_irn(edge);
2663 long pn = get_Proj_proj(proj);
2664 if (pn == default_pn)
2667 if (pn < switch_min)
2669 if (pn > switch_max)
2673 if ((unsigned long) (switch_max - switch_min) > 128000) {
2674 panic("Size of switch %+F bigger than 128000", node);
2677 if (switch_min != 0) {
2678 /* if smallest switch case is not 0 we need an additional sub */
2679 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2680 add_ia32_am_offs_int(new_sel, -switch_min);
2681 set_ia32_op_type(new_sel, ia32_AddrModeS);
2683 SET_IA32_ORIG_NODE(new_sel, node);
2686 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2687 SET_IA32_ORIG_NODE(new_node, node);
2693 * Transform a Cond node.
2695 static ir_node *gen_Cond(ir_node *node)
2697 ir_node *block = get_nodes_block(node);
2698 ir_node *new_block = be_transform_node(block);
2699 dbg_info *dbgi = get_irn_dbg_info(node);
2700 ir_node *sel = get_Cond_selector(node);
2701 ir_mode *sel_mode = get_irn_mode(sel);
2702 ir_node *flags = NULL;
2706 if (sel_mode != mode_b) {
2707 return create_Switch(node);
2710 /* we get flags from a Cmp */
2711 flags = get_flags_node(sel, &pnc);
2713 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2714 SET_IA32_ORIG_NODE(new_node, node);
2720 * Transform a be_Copy.
2722 static ir_node *gen_be_Copy(ir_node *node)
2724 ir_node *new_node = be_duplicate_node(node);
2725 ir_mode *mode = get_irn_mode(new_node);
2727 if (ia32_mode_needs_gp_reg(mode)) {
2728 set_irn_mode(new_node, mode_Iu);
2734 static ir_node *create_Fucom(ir_node *node)
2736 dbg_info *dbgi = get_irn_dbg_info(node);
2737 ir_node *block = get_nodes_block(node);
2738 ir_node *new_block = be_transform_node(block);
2739 ir_node *left = get_Cmp_left(node);
2740 ir_node *new_left = be_transform_node(left);
2741 ir_node *right = get_Cmp_right(node);
2745 if (ia32_cg_config.use_fucomi) {
2746 new_right = be_transform_node(right);
2747 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2749 set_ia32_commutative(new_node);
2750 SET_IA32_ORIG_NODE(new_node, node);
2752 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2753 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2755 new_right = be_transform_node(right);
2756 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2759 set_ia32_commutative(new_node);
2761 SET_IA32_ORIG_NODE(new_node, node);
2763 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2764 SET_IA32_ORIG_NODE(new_node, node);
2770 static ir_node *create_Ucomi(ir_node *node)
2772 dbg_info *dbgi = get_irn_dbg_info(node);
2773 ir_node *src_block = get_nodes_block(node);
2774 ir_node *new_block = be_transform_node(src_block);
2775 ir_node *left = get_Cmp_left(node);
2776 ir_node *right = get_Cmp_right(node);
2778 ia32_address_mode_t am;
2779 ia32_address_t *addr = &am.addr;
2781 match_arguments(&am, src_block, left, right, NULL,
2782 match_commutative | match_am);
2784 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2785 addr->mem, am.new_op1, am.new_op2,
2787 set_am_attributes(new_node, &am);
2789 SET_IA32_ORIG_NODE(new_node, node);
2791 new_node = fix_mem_proj(new_node, &am);
2797 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2798 * to fold an and into a test node
2800 static bool can_fold_test_and(ir_node *node)
2802 const ir_edge_t *edge;
2804 /** we can only have eq and lg projs */
2805 foreach_out_edge(node, edge) {
2806 ir_node *proj = get_edge_src_irn(edge);
2807 pn_Cmp pnc = get_Proj_proj(proj);
2808 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2816 * returns true if it is assured, that the upper bits of a node are "clean"
2817 * which means for a 16 or 8 bit value, that the upper bits in the register
2818 * are 0 for unsigned and a copy of the last significant bit for signed
2821 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2823 assert(ia32_mode_needs_gp_reg(mode));
2824 if (get_mode_size_bits(mode) >= 32)
2827 if (is_Proj(transformed_node))
2828 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2830 switch (get_ia32_irn_opcode(transformed_node)) {
2831 case iro_ia32_Conv_I2I:
2832 case iro_ia32_Conv_I2I8Bit: {
2833 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2834 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2836 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2843 if (mode_is_signed(mode)) {
2844 return false; /* TODO handle signed modes */
2846 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2847 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2848 const ia32_immediate_attr_t *attr
2849 = get_ia32_immediate_attr_const(right);
2850 if (attr->symconst == 0 &&
2851 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2855 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2859 /* TODO too conservative if shift amount is constant */
2860 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2863 if (!mode_is_signed(mode)) {
2865 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2866 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2868 /* TODO if one is known to be zero extended, then || is sufficient */
2873 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2874 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2876 case iro_ia32_Const:
2877 case iro_ia32_Immediate: {
2878 const ia32_immediate_attr_t *attr =
2879 get_ia32_immediate_attr_const(transformed_node);
2880 if (mode_is_signed(mode)) {
2881 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2882 return shifted == 0 || shifted == -1;
2884 unsigned long shifted = (unsigned long)attr->offset;
2885 shifted >>= get_mode_size_bits(mode);
2886 return shifted == 0;
2896 * Generate code for a Cmp.
2898 static ir_node *gen_Cmp(ir_node *node)
2900 dbg_info *dbgi = get_irn_dbg_info(node);
2901 ir_node *block = get_nodes_block(node);
2902 ir_node *new_block = be_transform_node(block);
2903 ir_node *left = get_Cmp_left(node);
2904 ir_node *right = get_Cmp_right(node);
2905 ir_mode *cmp_mode = get_irn_mode(left);
2907 ia32_address_mode_t am;
2908 ia32_address_t *addr = &am.addr;
2911 if (mode_is_float(cmp_mode)) {
2912 if (ia32_cg_config.use_sse2) {
2913 return create_Ucomi(node);
2915 return create_Fucom(node);
2919 assert(ia32_mode_needs_gp_reg(cmp_mode));
2921 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2922 cmp_unsigned = !mode_is_signed(cmp_mode);
2923 if (is_Const_0(right) &&
2925 get_irn_n_edges(left) == 1 &&
2926 can_fold_test_and(node)) {
2927 /* Test(and_left, and_right) */
2928 ir_node *and_left = get_And_left(left);
2929 ir_node *and_right = get_And_right(left);
2931 /* matze: code here used mode instead of cmd_mode, I think it is always
2932 * the same as cmp_mode, but I leave this here to see if this is really
2935 assert(get_irn_mode(and_left) == cmp_mode);
2937 match_arguments(&am, block, and_left, and_right, NULL,
2939 match_am | match_8bit_am | match_16bit_am |
2940 match_am_and_immediates | match_immediate);
2942 /* use 32bit compare mode if possible since the opcode is smaller */
2943 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2944 upper_bits_clean(am.new_op2, cmp_mode)) {
2945 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2948 if (get_mode_size_bits(cmp_mode) == 8) {
2949 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2950 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2953 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2954 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2957 /* Cmp(left, right) */
2958 match_arguments(&am, block, left, right, NULL,
2959 match_commutative | match_am | match_8bit_am |
2960 match_16bit_am | match_am_and_immediates |
2962 /* use 32bit compare mode if possible since the opcode is smaller */
2963 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2964 upper_bits_clean(am.new_op2, cmp_mode)) {
2965 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2968 if (get_mode_size_bits(cmp_mode) == 8) {
2969 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2970 addr->index, addr->mem, am.new_op1,
2971 am.new_op2, am.ins_permuted,
2974 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2975 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2978 set_am_attributes(new_node, &am);
2979 set_ia32_ls_mode(new_node, cmp_mode);
2981 SET_IA32_ORIG_NODE(new_node, node);
2983 new_node = fix_mem_proj(new_node, &am);
2988 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2991 dbg_info *dbgi = get_irn_dbg_info(node);
2992 ir_node *block = get_nodes_block(node);
2993 ir_node *new_block = be_transform_node(block);
2994 ir_node *val_true = get_Mux_true(node);
2995 ir_node *val_false = get_Mux_false(node);
2997 ia32_address_mode_t am;
2998 ia32_address_t *addr;
3000 assert(ia32_cg_config.use_cmov);
3001 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3005 match_arguments(&am, block, val_false, val_true, flags,
3006 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3008 if (am.ins_permuted)
3009 pnc = ia32_get_negated_pnc(pnc);
3011 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3012 addr->mem, am.new_op1, am.new_op2, new_flags,
3014 set_am_attributes(new_node, &am);
3016 SET_IA32_ORIG_NODE(new_node, node);
3018 new_node = fix_mem_proj(new_node, &am);
3024 * Creates a ia32 Setcc instruction.
3026 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3027 ir_node *flags, pn_Cmp pnc,
3030 ir_mode *mode = get_irn_mode(orig_node);
3033 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3034 SET_IA32_ORIG_NODE(new_node, orig_node);
3036 /* we might need to conv the result up */
3037 if (get_mode_size_bits(mode) > 8) {
3038 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3039 nomem, new_node, mode_Bu);
3040 SET_IA32_ORIG_NODE(new_node, orig_node);
3047 * Create instruction for an unsigned Difference or Zero.
3049 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3051 ir_mode *mode = get_irn_mode(psi);
3061 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3062 match_mode_neutral | match_am | match_immediate | match_two_users);
3064 block = get_nodes_block(new_node);
3066 if (is_Proj(new_node)) {
3067 sub = get_Proj_pred(new_node);
3068 assert(is_ia32_Sub(sub));
3071 set_irn_mode(sub, mode_T);
3072 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3074 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3076 dbgi = get_irn_dbg_info(psi);
3077 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3078 not = new_bd_ia32_Not(dbgi, block, sbb);
3080 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3081 set_ia32_commutative(new_node);
3086 * Create an const array of two float consts.
3088 * @param c0 the first constant
3089 * @param c1 the second constant
3090 * @param new_mode IN/OUT for the mode of the constants, if NULL
3091 * smallest possible mode will be used
3093 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3096 ir_mode *mode = *new_mode;
3098 ir_initializer_t *initializer;
3099 tarval *tv0 = get_Const_tarval(c0);
3100 tarval *tv1 = get_Const_tarval(c1);
3103 /* detect the best mode for the constants */
3104 mode = get_tarval_mode(tv0);
3106 if (mode != mode_F) {
3107 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3108 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3110 tv0 = tarval_convert_to(tv0, mode);
3111 tv1 = tarval_convert_to(tv1, mode);
3112 } else if (mode != mode_D) {
3113 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3114 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3116 tv0 = tarval_convert_to(tv0, mode);
3117 tv1 = tarval_convert_to(tv1, mode);
3124 tp = ia32_create_float_type(mode, 4);
3125 tp = ia32_create_float_array(tp);
3127 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3129 set_entity_ld_ident(ent, get_entity_ident(ent));
3130 set_entity_visibility(ent, ir_visibility_private);
3131 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3133 initializer = create_initializer_compound(2);
3135 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3136 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3138 set_entity_initializer(ent, initializer);
3145 * Possible transformations for creating a Setcc.
3147 enum setcc_transform_insn {
3160 typedef struct setcc_transform {
3164 enum setcc_transform_insn transform;
3168 } setcc_transform_t;
3171 * Setcc can only handle 0 and 1 result.
3172 * Find a transformation that creates 0 and 1 from
3175 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f,
3176 setcc_transform_t *res)
3182 if (tarval_is_null(t)) {
3186 pnc = ia32_get_negated_pnc(pnc);
3187 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3188 // now, t is the bigger one
3192 pnc = ia32_get_negated_pnc(pnc);
3196 if (! tarval_is_null(f)) {
3197 tarval *t_sub = tarval_sub(t, f, NULL);
3200 res->steps[step].transform = SETCC_TR_ADD;
3202 if (t == tarval_bad)
3203 panic("constant subtract failed");
3204 if (! tarval_is_long(f))
3205 panic("tarval is not long");
3207 res->steps[step].val = get_tarval_long(f);
3209 f = tarval_sub(f, f, NULL);
3210 assert(tarval_is_null(f));
3213 if (tarval_is_one(t)) {
3214 res->steps[step].transform = SETCC_TR_SET;
3215 res->num_steps = ++step;
3219 if (tarval_is_minus_one(t)) {
3220 res->steps[step].transform = SETCC_TR_NEG;
3222 res->steps[step].transform = SETCC_TR_SET;
3223 res->num_steps = ++step;
3226 if (tarval_is_long(t)) {
3227 long v = get_tarval_long(t);
3229 res->steps[step].val = 0;
3232 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3234 res->steps[step].transform = SETCC_TR_LEAxx;
3235 res->steps[step].scale = 3; /* (a << 3) + a */
3238 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3240 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3241 res->steps[step].scale = 3; /* (a << 3) */
3244 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3246 res->steps[step].transform = SETCC_TR_LEAxx;
3247 res->steps[step].scale = 2; /* (a << 2) + a */
3250 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3252 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3253 res->steps[step].scale = 2; /* (a << 2) */
3256 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3258 res->steps[step].transform = SETCC_TR_LEAxx;
3259 res->steps[step].scale = 1; /* (a << 1) + a */
3262 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3264 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3265 res->steps[step].scale = 1; /* (a << 1) */
3268 res->num_steps = step;
3271 if (! tarval_is_single_bit(t)) {
3272 res->steps[step].transform = SETCC_TR_AND;
3273 res->steps[step].val = v;
3275 res->steps[step].transform = SETCC_TR_NEG;
3277 int v = get_tarval_lowest_bit(t);
3280 res->steps[step].transform = SETCC_TR_SHL;
3281 res->steps[step].scale = v;
3285 res->steps[step].transform = SETCC_TR_SET;
3286 res->num_steps = ++step;
3289 panic("tarval is not long");
3293 * Transforms a Mux node into some code sequence.
3295 * @return The transformed node.
3297 static ir_node *gen_Mux(ir_node *node)
3299 dbg_info *dbgi = get_irn_dbg_info(node);
3300 ir_node *block = get_nodes_block(node);
3301 ir_node *new_block = be_transform_node(block);
3302 ir_node *mux_true = get_Mux_true(node);
3303 ir_node *mux_false = get_Mux_false(node);
3304 ir_node *cond = get_Mux_sel(node);
3305 ir_mode *mode = get_irn_mode(node);
3311 assert(get_irn_mode(cond) == mode_b);
3313 is_abs = be_mux_is_abs(cond, mux_true, mux_false);
3315 return create_abs(dbgi, block, be_get_abs_op(cond), is_abs < 0, node);
3318 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3319 if (mode_is_float(mode)) {
3320 ir_node *cmp = get_Proj_pred(cond);
3321 ir_node *cmp_left = get_Cmp_left(cmp);
3322 ir_node *cmp_right = get_Cmp_right(cmp);
3323 pn_Cmp pnc = get_Proj_proj(cond);
3325 if (ia32_cg_config.use_sse2) {
3326 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3327 if (cmp_left == mux_true && cmp_right == mux_false) {
3328 /* Mux(a <= b, a, b) => MIN */
3329 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3330 match_commutative | match_am | match_two_users);
3331 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3332 /* Mux(a <= b, b, a) => MAX */
3333 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3334 match_commutative | match_am | match_two_users);
3336 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3337 if (cmp_left == mux_true && cmp_right == mux_false) {
3338 /* Mux(a >= b, a, b) => MAX */
3339 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3340 match_commutative | match_am | match_two_users);
3341 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3342 /* Mux(a >= b, b, a) => MIN */
3343 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3344 match_commutative | match_am | match_two_users);
3349 if (is_Const(mux_true) && is_Const(mux_false)) {
3350 ia32_address_mode_t am;
3355 flags = get_flags_node(cond, &pnc);
3356 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3358 if (ia32_cg_config.use_sse2) {
3359 /* cannot load from different mode on SSE */
3362 /* x87 can load any mode */
3366 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3368 switch (get_mode_size_bytes(new_mode)) {
3378 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3379 set_ia32_am_scale(new_node, 2);
3384 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3385 set_ia32_am_scale(new_node, 1);
3388 /* arg, shift 16 NOT supported */
3390 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3393 panic("Unsupported constant size");
3396 am.ls_mode = new_mode;
3397 am.addr.base = get_symconst_base();
3398 am.addr.index = new_node;
3399 am.addr.mem = nomem;
3401 am.addr.scale = scale;
3402 am.addr.use_frame = 0;
3403 am.addr.frame_entity = NULL;
3404 am.addr.symconst_sign = 0;
3405 am.mem_proj = am.addr.mem;
3406 am.op_type = ia32_AddrModeS;
3409 am.pinned = op_pin_state_floats;
3411 am.ins_permuted = 0;
3413 if (ia32_cg_config.use_sse2)
3414 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3416 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3417 set_am_attributes(load, &am);
3419 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3421 panic("cannot transform floating point Mux");
3424 assert(ia32_mode_needs_gp_reg(mode));
3426 if (is_Proj(cond)) {
3427 ir_node *cmp = get_Proj_pred(cond);
3429 ir_node *cmp_left = get_Cmp_left(cmp);
3430 ir_node *cmp_right = get_Cmp_right(cmp);
3431 ir_node *val_true = mux_true;
3432 ir_node *val_false = mux_false;
3433 pn_Cmp pnc = get_Proj_proj(cond);
3435 if (is_Const(val_true) && is_Const_null(val_true)) {
3436 ir_node *tmp = val_false;
3437 val_false = val_true;
3439 pnc = ia32_get_negated_pnc(pnc);
3441 if (is_Const_0(val_false) && is_Sub(val_true)) {
3442 if ((pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge)
3443 && get_Sub_left(val_true) == cmp_left
3444 && get_Sub_right(val_true) == cmp_right) {
3445 return create_doz(node, cmp_left, cmp_right);
3447 if ((pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le)
3448 && get_Sub_left(val_true) == cmp_right
3449 && get_Sub_right(val_true) == cmp_left) {
3450 return create_doz(node, cmp_right, cmp_left);
3456 flags = get_flags_node(cond, &pnc);
3458 if (is_Const(mux_true) && is_Const(mux_false)) {
3459 /* both are const, good */
3460 tarval *tv_true = get_Const_tarval(mux_true);
3461 tarval *tv_false = get_Const_tarval(mux_false);
3462 setcc_transform_t res;
3465 find_const_transform(pnc, tv_true, tv_false, &res);
3467 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3470 switch (res.steps[step].transform) {
3472 imm = ia32_immediate_from_long(res.steps[step].val);
3473 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3475 case SETCC_TR_ADDxx:
3476 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3479 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3480 set_ia32_am_scale(new_node, res.steps[step].scale);
3481 set_ia32_am_offs_int(new_node, res.steps[step].val);
3483 case SETCC_TR_LEAxx:
3484 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3485 set_ia32_am_scale(new_node, res.steps[step].scale);
3486 set_ia32_am_offs_int(new_node, res.steps[step].val);
3489 imm = ia32_immediate_from_long(res.steps[step].scale);
3490 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3493 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3496 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3499 imm = ia32_immediate_from_long(res.steps[step].val);
3500 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3503 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, node);
3506 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3509 panic("unknown setcc transform");
3513 new_node = create_CMov(node, cond, flags, pnc);
3521 * Create a conversion from x87 state register to general purpose.
3523 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3525 ir_node *block = be_transform_node(get_nodes_block(node));
3526 ir_node *op = get_Conv_op(node);
3527 ir_node *new_op = be_transform_node(op);
3528 ir_graph *irg = current_ir_graph;
3529 dbg_info *dbgi = get_irn_dbg_info(node);
3530 ir_mode *mode = get_irn_mode(node);
3531 ir_node *fist, *load, *mem;
3533 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3534 set_irn_pinned(fist, op_pin_state_floats);
3535 set_ia32_use_frame(fist);
3536 set_ia32_op_type(fist, ia32_AddrModeD);
3538 assert(get_mode_size_bits(mode) <= 32);
3539 /* exception we can only store signed 32 bit integers, so for unsigned
3540 we store a 64bit (signed) integer and load the lower bits */
3541 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3542 set_ia32_ls_mode(fist, mode_Ls);
3544 set_ia32_ls_mode(fist, mode_Is);
3546 SET_IA32_ORIG_NODE(fist, node);
3549 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3551 set_irn_pinned(load, op_pin_state_floats);
3552 set_ia32_use_frame(load);
3553 set_ia32_op_type(load, ia32_AddrModeS);
3554 set_ia32_ls_mode(load, mode_Is);
3555 if (get_ia32_ls_mode(fist) == mode_Ls) {
3556 ia32_attr_t *attr = get_ia32_attr(load);
3557 attr->data.need_64bit_stackent = 1;
3559 ia32_attr_t *attr = get_ia32_attr(load);
3560 attr->data.need_32bit_stackent = 1;
3562 SET_IA32_ORIG_NODE(load, node);
3564 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3568 * Creates a x87 strict Conv by placing a Store and a Load
3570 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3572 ir_node *block = get_nodes_block(node);
3573 ir_graph *irg = get_Block_irg(block);
3574 dbg_info *dbgi = get_irn_dbg_info(node);
3575 ir_node *frame = get_irg_frame(irg);
3576 ir_node *store, *load;
3579 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3580 set_ia32_use_frame(store);
3581 set_ia32_op_type(store, ia32_AddrModeD);
3582 SET_IA32_ORIG_NODE(store, node);
3584 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3585 set_ia32_use_frame(load);
3586 set_ia32_op_type(load, ia32_AddrModeS);
3587 SET_IA32_ORIG_NODE(load, node);
3589 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3593 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3594 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3596 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3598 func = get_mode_size_bits(mode) == 8 ?
3599 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3600 return func(dbgi, block, base, index, mem, val, mode);
3604 * Create a conversion from general purpose to x87 register
3606 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3608 ir_node *src_block = get_nodes_block(node);
3609 ir_node *block = be_transform_node(src_block);
3610 ir_graph *irg = get_Block_irg(block);
3611 dbg_info *dbgi = get_irn_dbg_info(node);
3612 ir_node *op = get_Conv_op(node);
3613 ir_node *new_op = NULL;
3615 ir_mode *store_mode;
3620 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3621 if (possible_int_mode_for_fp(src_mode)) {
3622 ia32_address_mode_t am;
3624 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3625 if (am.op_type == ia32_AddrModeS) {
3626 ia32_address_t *addr = &am.addr;
3628 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3629 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3631 set_am_attributes(fild, &am);
3632 SET_IA32_ORIG_NODE(fild, node);
3634 fix_mem_proj(fild, &am);
3639 if (new_op == NULL) {
3640 new_op = be_transform_node(op);
3643 mode = get_irn_mode(op);
3645 /* first convert to 32 bit signed if necessary */
3646 if (get_mode_size_bits(src_mode) < 32) {
3647 if (!upper_bits_clean(new_op, src_mode)) {
3648 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3649 SET_IA32_ORIG_NODE(new_op, node);
3654 assert(get_mode_size_bits(mode) == 32);
3657 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3659 set_ia32_use_frame(store);
3660 set_ia32_op_type(store, ia32_AddrModeD);
3661 set_ia32_ls_mode(store, mode_Iu);
3663 /* exception for 32bit unsigned, do a 64bit spill+load */
3664 if (!mode_is_signed(mode)) {
3667 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3669 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3670 noreg_GP, nomem, zero_const);
3672 set_ia32_use_frame(zero_store);
3673 set_ia32_op_type(zero_store, ia32_AddrModeD);
3674 add_ia32_am_offs_int(zero_store, 4);
3675 set_ia32_ls_mode(zero_store, mode_Iu);
3680 store = new_rd_Sync(dbgi, block, 2, in);
3681 store_mode = mode_Ls;
3683 store_mode = mode_Is;
3687 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3689 set_ia32_use_frame(fild);
3690 set_ia32_op_type(fild, ia32_AddrModeS);
3691 set_ia32_ls_mode(fild, store_mode);
3693 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3699 * Create a conversion from one integer mode into another one
3701 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3702 dbg_info *dbgi, ir_node *block, ir_node *op,
3705 ir_node *new_block = be_transform_node(block);
3707 ir_mode *smaller_mode;
3708 ia32_address_mode_t am;
3709 ia32_address_t *addr = &am.addr;
3712 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3713 smaller_mode = src_mode;
3715 smaller_mode = tgt_mode;
3718 #ifdef DEBUG_libfirm
3720 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3725 match_arguments(&am, block, NULL, op, NULL,
3726 match_am | match_8bit_am | match_16bit_am);
3728 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3729 /* unnecessary conv. in theory it shouldn't have been AM */
3730 assert(is_ia32_NoReg_GP(addr->base));
3731 assert(is_ia32_NoReg_GP(addr->index));
3732 assert(is_NoMem(addr->mem));
3733 assert(am.addr.offset == 0);
3734 assert(am.addr.symconst_ent == NULL);
3738 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3739 addr->mem, am.new_op2, smaller_mode);
3740 set_am_attributes(new_node, &am);
3741 /* match_arguments assume that out-mode = in-mode, this isn't true here
3743 set_ia32_ls_mode(new_node, smaller_mode);
3744 SET_IA32_ORIG_NODE(new_node, node);
3745 new_node = fix_mem_proj(new_node, &am);
3750 * Transforms a Conv node.
3752 * @return The created ia32 Conv node
3754 static ir_node *gen_Conv(ir_node *node)
3756 ir_node *block = get_nodes_block(node);
3757 ir_node *new_block = be_transform_node(block);
3758 ir_node *op = get_Conv_op(node);
3759 ir_node *new_op = NULL;
3760 dbg_info *dbgi = get_irn_dbg_info(node);
3761 ir_mode *src_mode = get_irn_mode(op);
3762 ir_mode *tgt_mode = get_irn_mode(node);
3763 int src_bits = get_mode_size_bits(src_mode);
3764 int tgt_bits = get_mode_size_bits(tgt_mode);
3765 ir_node *res = NULL;
3767 assert(!mode_is_int(src_mode) || src_bits <= 32);
3768 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3770 /* modeB -> X should already be lowered by the lower_mode_b pass */
3771 if (src_mode == mode_b) {
3772 panic("ConvB not lowered %+F", node);
3775 if (src_mode == tgt_mode) {
3776 if (get_Conv_strict(node)) {
3777 if (ia32_cg_config.use_sse2) {
3778 /* when we are in SSE mode, we can kill all strict no-op conversion */
3779 return be_transform_node(op);
3782 /* this should be optimized already, but who knows... */
3783 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3784 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3785 return be_transform_node(op);
3789 if (mode_is_float(src_mode)) {
3790 new_op = be_transform_node(op);
3791 /* we convert from float ... */
3792 if (mode_is_float(tgt_mode)) {
3794 if (ia32_cg_config.use_sse2) {
3795 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3796 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3798 set_ia32_ls_mode(res, tgt_mode);
3800 if (get_Conv_strict(node)) {
3801 /* if fp_no_float_fold is not set then we assume that we
3802 * don't have any float operations in a non
3803 * mode_float_arithmetic mode and can skip strict upconvs */
3804 if (src_bits < tgt_bits
3805 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3806 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3809 res = gen_x87_strict_conv(tgt_mode, new_op);
3810 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3814 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3819 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3820 if (ia32_cg_config.use_sse2) {
3821 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3823 set_ia32_ls_mode(res, src_mode);
3825 return gen_x87_fp_to_gp(node);
3829 /* we convert from int ... */
3830 if (mode_is_float(tgt_mode)) {
3832 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3833 if (ia32_cg_config.use_sse2) {
3834 new_op = be_transform_node(op);
3835 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3837 set_ia32_ls_mode(res, tgt_mode);
3839 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3840 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3841 res = gen_x87_gp_to_fp(node, src_mode);
3843 /* we need a strict-Conv, if the int mode has more bits than the
3845 if (float_mantissa < int_mantissa) {
3846 res = gen_x87_strict_conv(tgt_mode, res);
3847 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3851 } else if (tgt_mode == mode_b) {
3852 /* mode_b lowering already took care that we only have 0/1 values */
3853 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3854 src_mode, tgt_mode));
3855 return be_transform_node(op);
3858 if (src_bits == tgt_bits) {
3859 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3860 src_mode, tgt_mode));
3861 return be_transform_node(op);
3864 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3872 static ir_node *create_immediate_or_transform(ir_node *node,
3873 char immediate_constraint_type)
3875 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3876 if (new_node == NULL) {
3877 new_node = be_transform_node(node);
3883 * Transforms a FrameAddr into an ia32 Add.
3885 static ir_node *gen_be_FrameAddr(ir_node *node)
3887 ir_node *block = be_transform_node(get_nodes_block(node));
3888 ir_node *op = be_get_FrameAddr_frame(node);
3889 ir_node *new_op = be_transform_node(op);
3890 dbg_info *dbgi = get_irn_dbg_info(node);
3893 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3894 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3895 set_ia32_use_frame(new_node);
3897 SET_IA32_ORIG_NODE(new_node, node);
3903 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3905 static ir_node *gen_be_Return(ir_node *node)
3907 ir_graph *irg = current_ir_graph;
3908 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3909 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3910 ir_entity *ent = get_irg_entity(irg);
3911 ir_type *tp = get_entity_type(ent);
3916 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3917 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3919 int pn_ret_val, pn_ret_mem, arity, i;
3921 assert(ret_val != NULL);
3922 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3923 return be_duplicate_node(node);
3926 res_type = get_method_res_type(tp, 0);
3928 if (! is_Primitive_type(res_type)) {
3929 return be_duplicate_node(node);
3932 mode = get_type_mode(res_type);
3933 if (! mode_is_float(mode)) {
3934 return be_duplicate_node(node);
3937 assert(get_method_n_ress(tp) == 1);
3939 pn_ret_val = get_Proj_proj(ret_val);
3940 pn_ret_mem = get_Proj_proj(ret_mem);
3942 /* get the Barrier */
3943 barrier = get_Proj_pred(ret_val);
3945 /* get result input of the Barrier */
3946 ret_val = get_irn_n(barrier, pn_ret_val);
3947 new_ret_val = be_transform_node(ret_val);
3949 /* get memory input of the Barrier */
3950 ret_mem = get_irn_n(barrier, pn_ret_mem);
3951 new_ret_mem = be_transform_node(ret_mem);
3953 frame = get_irg_frame(irg);
3955 dbgi = get_irn_dbg_info(barrier);
3956 block = be_transform_node(get_nodes_block(barrier));
3958 /* store xmm0 onto stack */
3959 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3960 new_ret_mem, new_ret_val);
3961 set_ia32_ls_mode(sse_store, mode);
3962 set_ia32_op_type(sse_store, ia32_AddrModeD);
3963 set_ia32_use_frame(sse_store);
3965 /* load into x87 register */
3966 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3967 set_ia32_op_type(fld, ia32_AddrModeS);
3968 set_ia32_use_frame(fld);
3970 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3971 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3973 /* create a new barrier */
3974 arity = get_irn_arity(barrier);
3975 in = ALLOCAN(ir_node*, arity);
3976 for (i = 0; i < arity; ++i) {
3979 if (i == pn_ret_val) {
3981 } else if (i == pn_ret_mem) {
3984 ir_node *in = get_irn_n(barrier, i);
3985 new_in = be_transform_node(in);
3990 new_barrier = new_ir_node(dbgi, irg, block,
3991 get_irn_op(barrier), get_irn_mode(barrier),
3993 copy_node_attr(irg, barrier, new_barrier);
3994 be_duplicate_deps(barrier, new_barrier);
3995 be_set_transformed_node(barrier, new_barrier);
3997 /* transform normally */
3998 return be_duplicate_node(node);
4002 * Transform a be_AddSP into an ia32_SubSP.
4004 static ir_node *gen_be_AddSP(ir_node *node)
4006 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4007 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4009 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4010 match_am | match_immediate);
4014 * Transform a be_SubSP into an ia32_AddSP
4016 static ir_node *gen_be_SubSP(ir_node *node)
4018 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4019 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4021 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4022 match_am | match_immediate);
4026 * Change some phi modes
4028 static ir_node *gen_Phi(ir_node *node)
4030 const arch_register_req_t *req;
4031 ir_node *block = be_transform_node(get_nodes_block(node));
4032 ir_graph *irg = current_ir_graph;
4033 dbg_info *dbgi = get_irn_dbg_info(node);
4034 ir_mode *mode = get_irn_mode(node);
4037 if (ia32_mode_needs_gp_reg(mode)) {
4038 /* we shouldn't have any 64bit stuff around anymore */
4039 assert(get_mode_size_bits(mode) <= 32);
4040 /* all integer operations are on 32bit registers now */
4042 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4043 } else if (mode_is_float(mode)) {
4044 if (ia32_cg_config.use_sse2) {
4046 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4049 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4052 req = arch_no_register_req;
4055 /* phi nodes allow loops, so we use the old arguments for now
4056 * and fix this later */
4057 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4058 get_irn_in(node) + 1);
4059 copy_node_attr(irg, node, phi);
4060 be_duplicate_deps(node, phi);
4062 arch_set_out_register_req(phi, 0, req);
4064 be_enqueue_preds(node);
4069 static ir_node *gen_Jmp(ir_node *node)
4071 ir_node *block = get_nodes_block(node);
4072 ir_node *new_block = be_transform_node(block);
4073 dbg_info *dbgi = get_irn_dbg_info(node);
4076 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4077 SET_IA32_ORIG_NODE(new_node, node);
4085 static ir_node *gen_IJmp(ir_node *node)
4087 ir_node *block = get_nodes_block(node);
4088 ir_node *new_block = be_transform_node(block);
4089 dbg_info *dbgi = get_irn_dbg_info(node);
4090 ir_node *op = get_IJmp_target(node);
4092 ia32_address_mode_t am;
4093 ia32_address_t *addr = &am.addr;
4095 assert(get_irn_mode(op) == mode_P);
4097 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4099 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4100 addr->mem, am.new_op2);
4101 set_am_attributes(new_node, &am);
4102 SET_IA32_ORIG_NODE(new_node, node);
4104 new_node = fix_mem_proj(new_node, &am);
4110 * Transform a Bound node.
4112 static ir_node *gen_Bound(ir_node *node)
4115 ir_node *lower = get_Bound_lower(node);
4116 dbg_info *dbgi = get_irn_dbg_info(node);
4118 if (is_Const_0(lower)) {
4119 /* typical case for Java */
4120 ir_node *sub, *res, *flags, *block;
4122 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4123 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
4125 block = get_nodes_block(res);
4126 if (! is_Proj(res)) {
4128 set_irn_mode(sub, mode_T);
4129 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4131 sub = get_Proj_pred(res);
4133 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4134 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4135 SET_IA32_ORIG_NODE(new_node, node);
4137 panic("generic Bound not supported in ia32 Backend");
4143 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4145 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4146 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4148 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4149 match_immediate | match_mode_neutral);
4152 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4154 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4155 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4156 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4160 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4162 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4163 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4164 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4168 static ir_node *gen_ia32_l_Add(ir_node *node)
4170 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4171 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4172 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4173 match_commutative | match_am | match_immediate |
4174 match_mode_neutral);
4176 if (is_Proj(lowered)) {
4177 lowered = get_Proj_pred(lowered);
4179 assert(is_ia32_Add(lowered));
4180 set_irn_mode(lowered, mode_T);
4186 static ir_node *gen_ia32_l_Adc(ir_node *node)
4188 return gen_binop_flags(node, new_bd_ia32_Adc,
4189 match_commutative | match_am | match_immediate |
4190 match_mode_neutral);
4194 * Transforms a l_MulS into a "real" MulS node.
4196 * @return the created ia32 Mul node
4198 static ir_node *gen_ia32_l_Mul(ir_node *node)
4200 ir_node *left = get_binop_left(node);
4201 ir_node *right = get_binop_right(node);
4203 return gen_binop(node, left, right, new_bd_ia32_Mul,
4204 match_commutative | match_am | match_mode_neutral);
4208 * Transforms a l_IMulS into a "real" IMul1OPS node.
4210 * @return the created ia32 IMul1OP node
4212 static ir_node *gen_ia32_l_IMul(ir_node *node)
4214 ir_node *left = get_binop_left(node);
4215 ir_node *right = get_binop_right(node);
4217 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4218 match_commutative | match_am | match_mode_neutral);
4221 static ir_node *gen_ia32_l_Sub(ir_node *node)
4223 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4224 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4225 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4226 match_am | match_immediate | match_mode_neutral);
4228 if (is_Proj(lowered)) {
4229 lowered = get_Proj_pred(lowered);
4231 assert(is_ia32_Sub(lowered));
4232 set_irn_mode(lowered, mode_T);
4238 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4240 return gen_binop_flags(node, new_bd_ia32_Sbb,
4241 match_am | match_immediate | match_mode_neutral);
4245 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4246 * op1 - target to be shifted
4247 * op2 - contains bits to be shifted into target
4249 * Only op3 can be an immediate.
4251 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4252 ir_node *low, ir_node *count)
4254 ir_node *block = get_nodes_block(node);
4255 ir_node *new_block = be_transform_node(block);
4256 dbg_info *dbgi = get_irn_dbg_info(node);
4257 ir_node *new_high = be_transform_node(high);
4258 ir_node *new_low = be_transform_node(low);
4262 /* the shift amount can be any mode that is bigger than 5 bits, since all
4263 * other bits are ignored anyway */
4264 while (is_Conv(count) &&
4265 get_irn_n_edges(count) == 1 &&
4266 mode_is_int(get_irn_mode(count))) {
4267 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4268 count = get_Conv_op(count);
4270 new_count = create_immediate_or_transform(count, 0);
4272 if (is_ia32_l_ShlD(node)) {
4273 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4276 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4279 SET_IA32_ORIG_NODE(new_node, node);
4284 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4286 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4287 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4288 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4289 return gen_lowered_64bit_shifts(node, high, low, count);
4292 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4294 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4295 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4296 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4297 return gen_lowered_64bit_shifts(node, high, low, count);
4300 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4302 ir_node *src_block = get_nodes_block(node);
4303 ir_node *block = be_transform_node(src_block);
4304 ir_graph *irg = current_ir_graph;
4305 dbg_info *dbgi = get_irn_dbg_info(node);
4306 ir_node *frame = get_irg_frame(irg);
4307 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4308 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4309 ir_node *new_val_low = be_transform_node(val_low);
4310 ir_node *new_val_high = be_transform_node(val_high);
4312 ir_node *sync, *fild, *res;
4313 ir_node *store_low, *store_high;
4315 if (ia32_cg_config.use_sse2) {
4316 panic("ia32_l_LLtoFloat not implemented for SSE2");
4320 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4322 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4324 SET_IA32_ORIG_NODE(store_low, node);
4325 SET_IA32_ORIG_NODE(store_high, node);
4327 set_ia32_use_frame(store_low);
4328 set_ia32_use_frame(store_high);
4329 set_ia32_op_type(store_low, ia32_AddrModeD);
4330 set_ia32_op_type(store_high, ia32_AddrModeD);
4331 set_ia32_ls_mode(store_low, mode_Iu);
4332 set_ia32_ls_mode(store_high, mode_Is);
4333 add_ia32_am_offs_int(store_high, 4);
4337 sync = new_rd_Sync(dbgi, block, 2, in);
4340 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4342 set_ia32_use_frame(fild);
4343 set_ia32_op_type(fild, ia32_AddrModeS);
4344 set_ia32_ls_mode(fild, mode_Ls);
4346 SET_IA32_ORIG_NODE(fild, node);
4348 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4350 if (! mode_is_signed(get_irn_mode(val_high))) {
4351 ia32_address_mode_t am;
4353 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4356 am.addr.base = get_symconst_base();
4357 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4358 am.addr.mem = nomem;
4361 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4362 am.addr.use_frame = 0;
4363 am.addr.frame_entity = NULL;
4364 am.addr.symconst_sign = 0;
4365 am.ls_mode = mode_F;
4366 am.mem_proj = nomem;
4367 am.op_type = ia32_AddrModeS;
4369 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4370 am.pinned = op_pin_state_floats;
4372 am.ins_permuted = 0;
4374 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4375 am.new_op1, am.new_op2, get_fpcw());
4376 set_am_attributes(fadd, &am);
4378 set_irn_mode(fadd, mode_T);
4379 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4384 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4386 ir_node *src_block = get_nodes_block(node);
4387 ir_node *block = be_transform_node(src_block);
4388 ir_graph *irg = get_Block_irg(block);
4389 dbg_info *dbgi = get_irn_dbg_info(node);
4390 ir_node *frame = get_irg_frame(irg);
4391 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4392 ir_node *new_val = be_transform_node(val);
4393 ir_node *fist, *mem;
4395 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4396 SET_IA32_ORIG_NODE(fist, node);
4397 set_ia32_use_frame(fist);
4398 set_ia32_op_type(fist, ia32_AddrModeD);
4399 set_ia32_ls_mode(fist, mode_Ls);
4404 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4406 ir_node *block = be_transform_node(get_nodes_block(node));
4407 ir_graph *irg = get_Block_irg(block);
4408 ir_node *pred = get_Proj_pred(node);
4409 ir_node *new_pred = be_transform_node(pred);
4410 ir_node *frame = get_irg_frame(irg);
4411 dbg_info *dbgi = get_irn_dbg_info(node);
4412 long pn = get_Proj_proj(node);
4417 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4418 SET_IA32_ORIG_NODE(load, node);
4419 set_ia32_use_frame(load);
4420 set_ia32_op_type(load, ia32_AddrModeS);
4421 set_ia32_ls_mode(load, mode_Iu);
4422 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4423 * 32 bit from it with this particular load */
4424 attr = get_ia32_attr(load);
4425 attr->data.need_64bit_stackent = 1;
4427 if (pn == pn_ia32_l_FloattoLL_res_high) {
4428 add_ia32_am_offs_int(load, 4);
4430 assert(pn == pn_ia32_l_FloattoLL_res_low);
4433 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4439 * Transform the Projs of an AddSP.
4441 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4443 ir_node *pred = get_Proj_pred(node);
4444 ir_node *new_pred = be_transform_node(pred);
4445 dbg_info *dbgi = get_irn_dbg_info(node);
4446 long proj = get_Proj_proj(node);
4448 if (proj == pn_be_AddSP_sp) {
4449 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4450 pn_ia32_SubSP_stack);
4451 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4453 } else if (proj == pn_be_AddSP_res) {
4454 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4455 pn_ia32_SubSP_addr);
4456 } else if (proj == pn_be_AddSP_M) {
4457 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4460 panic("No idea how to transform proj->AddSP");
4464 * Transform the Projs of a SubSP.
4466 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4468 ir_node *pred = get_Proj_pred(node);
4469 ir_node *new_pred = be_transform_node(pred);
4470 dbg_info *dbgi = get_irn_dbg_info(node);
4471 long proj = get_Proj_proj(node);
4473 if (proj == pn_be_SubSP_sp) {
4474 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4475 pn_ia32_AddSP_stack);
4476 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4478 } else if (proj == pn_be_SubSP_M) {
4479 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4482 panic("No idea how to transform proj->SubSP");
4486 * Transform and renumber the Projs from a Load.
4488 static ir_node *gen_Proj_Load(ir_node *node)
4491 ir_node *block = be_transform_node(get_nodes_block(node));
4492 ir_node *pred = get_Proj_pred(node);
4493 dbg_info *dbgi = get_irn_dbg_info(node);
4494 long proj = get_Proj_proj(node);
4496 /* loads might be part of source address mode matches, so we don't
4497 * transform the ProjMs yet (with the exception of loads whose result is
4500 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4503 /* this is needed, because sometimes we have loops that are only
4504 reachable through the ProjM */
4505 be_enqueue_preds(node);
4506 /* do it in 2 steps, to silence firm verifier */
4507 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4508 set_Proj_proj(res, pn_ia32_mem);
4512 /* renumber the proj */
4513 new_pred = be_transform_node(pred);
4514 if (is_ia32_Load(new_pred)) {
4517 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4519 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4520 case pn_Load_X_regular:
4521 return new_rd_Jmp(dbgi, block);
4522 case pn_Load_X_except:
4523 /* This Load might raise an exception. Mark it. */
4524 set_ia32_exc_label(new_pred, 1);
4525 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4529 } else if (is_ia32_Conv_I2I(new_pred) ||
4530 is_ia32_Conv_I2I8Bit(new_pred)) {
4531 set_irn_mode(new_pred, mode_T);
4532 if (proj == pn_Load_res) {
4533 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4534 } else if (proj == pn_Load_M) {
4535 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4537 } else if (is_ia32_xLoad(new_pred)) {
4540 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4542 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4543 case pn_Load_X_regular:
4544 return new_rd_Jmp(dbgi, block);
4545 case pn_Load_X_except:
4546 /* This Load might raise an exception. Mark it. */
4547 set_ia32_exc_label(new_pred, 1);
4548 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4552 } else if (is_ia32_vfld(new_pred)) {
4555 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4557 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4558 case pn_Load_X_regular:
4559 return new_rd_Jmp(dbgi, block);
4560 case pn_Load_X_except:
4561 /* This Load might raise an exception. Mark it. */
4562 set_ia32_exc_label(new_pred, 1);
4563 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4568 /* can happen for ProJMs when source address mode happened for the
4571 /* however it should not be the result proj, as that would mean the
4572 load had multiple users and should not have been used for
4574 if (proj != pn_Load_M) {
4575 panic("internal error: transformed node not a Load");
4577 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4580 panic("No idea how to transform proj");
4584 * Transform and renumber the Projs from a DivMod like instruction.
4586 static ir_node *gen_Proj_DivMod(ir_node *node)
4588 ir_node *block = be_transform_node(get_nodes_block(node));
4589 ir_node *pred = get_Proj_pred(node);
4590 ir_node *new_pred = be_transform_node(pred);
4591 dbg_info *dbgi = get_irn_dbg_info(node);
4592 long proj = get_Proj_proj(node);
4594 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4596 switch (get_irn_opcode(pred)) {
4600 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4602 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4603 case pn_Div_X_regular:
4604 return new_rd_Jmp(dbgi, block);
4605 case pn_Div_X_except:
4606 set_ia32_exc_label(new_pred, 1);
4607 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4615 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4617 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4618 case pn_Mod_X_except:
4619 set_ia32_exc_label(new_pred, 1);
4620 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4628 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4629 case pn_DivMod_res_div:
4630 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4631 case pn_DivMod_res_mod:
4632 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4633 case pn_DivMod_X_regular:
4634 return new_rd_Jmp(dbgi, block);
4635 case pn_DivMod_X_except:
4636 set_ia32_exc_label(new_pred, 1);
4637 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4646 panic("No idea how to transform proj->DivMod");
4650 * Transform and renumber the Projs from a CopyB.
4652 static ir_node *gen_Proj_CopyB(ir_node *node)
4654 ir_node *pred = get_Proj_pred(node);
4655 ir_node *new_pred = be_transform_node(pred);
4656 dbg_info *dbgi = get_irn_dbg_info(node);
4657 long proj = get_Proj_proj(node);
4661 if (is_ia32_CopyB_i(new_pred)) {
4662 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4663 } else if (is_ia32_CopyB(new_pred)) {
4664 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4671 panic("No idea how to transform proj->CopyB");
4675 * Transform and renumber the Projs from a Quot.
4677 static ir_node *gen_Proj_Quot(ir_node *node)
4679 ir_node *pred = get_Proj_pred(node);
4680 ir_node *new_pred = be_transform_node(pred);
4681 dbg_info *dbgi = get_irn_dbg_info(node);
4682 long proj = get_Proj_proj(node);
4686 if (is_ia32_xDiv(new_pred)) {
4687 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4688 } else if (is_ia32_vfdiv(new_pred)) {
4689 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4693 if (is_ia32_xDiv(new_pred)) {
4694 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4695 } else if (is_ia32_vfdiv(new_pred)) {
4696 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4699 case pn_Quot_X_regular:
4700 case pn_Quot_X_except:
4705 panic("No idea how to transform proj->Quot");
4708 static ir_node *gen_be_Call(ir_node *node)
4710 dbg_info *const dbgi = get_irn_dbg_info(node);
4711 ir_node *const src_block = get_nodes_block(node);
4712 ir_node *const block = be_transform_node(src_block);
4713 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4714 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4715 ir_node *const sp = be_transform_node(src_sp);
4716 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4717 ia32_address_mode_t am;
4718 ia32_address_t *const addr = &am.addr;
4723 ir_node * eax = noreg_GP;
4724 ir_node * ecx = noreg_GP;
4725 ir_node * edx = noreg_GP;
4726 unsigned const pop = be_Call_get_pop(node);
4727 ir_type *const call_tp = be_Call_get_type(node);
4728 int old_no_pic_adjust;
4730 /* Run the x87 simulator if the call returns a float value */
4731 if (get_method_n_ress(call_tp) > 0) {
4732 ir_type *const res_type = get_method_res_type(call_tp, 0);
4733 ir_mode *const res_mode = get_type_mode(res_type);
4735 if (res_mode != NULL && mode_is_float(res_mode)) {
4736 env_cg->do_x87_sim = 1;
4740 /* We do not want be_Call direct calls */
4741 assert(be_Call_get_entity(node) == NULL);
4743 /* special case for PIC trampoline calls */
4744 old_no_pic_adjust = no_pic_adjust;
4745 no_pic_adjust = be_get_irg_options(env_cg->irg)->pic;
4747 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4748 match_am | match_immediate);
4750 no_pic_adjust = old_no_pic_adjust;
4752 i = get_irn_arity(node) - 1;
4753 fpcw = be_transform_node(get_irn_n(node, i--));
4754 for (; i >= be_pos_Call_first_arg; --i) {
4755 arch_register_req_t const *const req = arch_get_register_req(node, i);
4756 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4758 assert(req->type == arch_register_req_type_limited);
4759 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4761 switch (*req->limited) {
4762 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4763 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4764 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4765 default: panic("Invalid GP register for register parameter");
4769 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4770 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4771 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4772 set_am_attributes(call, &am);
4773 call = fix_mem_proj(call, &am);
4775 if (get_irn_pinned(node) == op_pin_state_pinned)
4776 set_irn_pinned(call, op_pin_state_pinned);
4778 SET_IA32_ORIG_NODE(call, node);
4780 if (ia32_cg_config.use_sse2) {
4781 /* remember this call for post-processing */
4782 ARR_APP1(ir_node *, call_list, call);
4783 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4790 * Transform Builtin trap
4792 static ir_node *gen_trap(ir_node *node)
4794 dbg_info *dbgi = get_irn_dbg_info(node);
4795 ir_node *block = be_transform_node(get_nodes_block(node));
4796 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4798 return new_bd_ia32_UD2(dbgi, block, mem);
4802 * Transform Builtin debugbreak
4804 static ir_node *gen_debugbreak(ir_node *node)
4806 dbg_info *dbgi = get_irn_dbg_info(node);
4807 ir_node *block = be_transform_node(get_nodes_block(node));
4808 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4810 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4814 * Transform Builtin return_address
4816 static ir_node *gen_return_address(ir_node *node)
4818 ir_node *param = get_Builtin_param(node, 0);
4819 ir_node *frame = get_Builtin_param(node, 1);
4820 dbg_info *dbgi = get_irn_dbg_info(node);
4821 tarval *tv = get_Const_tarval(param);
4822 unsigned long value = get_tarval_long(tv);
4824 ir_node *block = be_transform_node(get_nodes_block(node));
4825 ir_node *ptr = be_transform_node(frame);
4829 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4830 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4831 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4834 /* load the return address from this frame */
4835 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4837 set_irn_pinned(load, get_irn_pinned(node));
4838 set_ia32_op_type(load, ia32_AddrModeS);
4839 set_ia32_ls_mode(load, mode_Iu);
4841 set_ia32_am_offs_int(load, 0);
4842 set_ia32_use_frame(load);
4843 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4845 if (get_irn_pinned(node) == op_pin_state_floats) {
4846 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4847 && pn_ia32_vfld_res == pn_ia32_Load_res
4848 && pn_ia32_Load_res == pn_ia32_res);
4849 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4852 SET_IA32_ORIG_NODE(load, node);
4853 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4857 * Transform Builtin frame_address
4859 static ir_node *gen_frame_address(ir_node *node)
4861 ir_node *param = get_Builtin_param(node, 0);
4862 ir_node *frame = get_Builtin_param(node, 1);
4863 dbg_info *dbgi = get_irn_dbg_info(node);
4864 tarval *tv = get_Const_tarval(param);
4865 unsigned long value = get_tarval_long(tv);
4867 ir_node *block = be_transform_node(get_nodes_block(node));
4868 ir_node *ptr = be_transform_node(frame);
4873 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4874 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4875 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4878 /* load the frame address from this frame */
4879 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4881 set_irn_pinned(load, get_irn_pinned(node));
4882 set_ia32_op_type(load, ia32_AddrModeS);
4883 set_ia32_ls_mode(load, mode_Iu);
4885 ent = ia32_get_frame_address_entity();
4887 set_ia32_am_offs_int(load, 0);
4888 set_ia32_use_frame(load);
4889 set_ia32_frame_ent(load, ent);
4891 /* will fail anyway, but gcc does this: */
4892 set_ia32_am_offs_int(load, 0);
4895 if (get_irn_pinned(node) == op_pin_state_floats) {
4896 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4897 && pn_ia32_vfld_res == pn_ia32_Load_res
4898 && pn_ia32_Load_res == pn_ia32_res);
4899 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4902 SET_IA32_ORIG_NODE(load, node);
4903 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4907 * Transform Builtin frame_address
4909 static ir_node *gen_prefetch(ir_node *node)
4912 ir_node *ptr, *block, *mem, *base, *index;
4913 ir_node *param, *new_node;
4916 ia32_address_t addr;
4918 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4919 /* no prefetch at all, route memory */
4920 return be_transform_node(get_Builtin_mem(node));
4923 param = get_Builtin_param(node, 1);
4924 tv = get_Const_tarval(param);
4925 rw = get_tarval_long(tv);
4927 /* construct load address */
4928 memset(&addr, 0, sizeof(addr));
4929 ptr = get_Builtin_param(node, 0);
4930 ia32_create_address_mode(&addr, ptr, 0);
4937 base = be_transform_node(base);
4940 if (index == NULL) {
4943 index = be_transform_node(index);
4946 dbgi = get_irn_dbg_info(node);
4947 block = be_transform_node(get_nodes_block(node));
4948 mem = be_transform_node(get_Builtin_mem(node));
4950 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4951 /* we have 3DNow!, this was already checked above */
4952 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4953 } else if (ia32_cg_config.use_sse_prefetch) {
4954 /* note: rw == 1 is IGNORED in that case */
4955 param = get_Builtin_param(node, 2);
4956 tv = get_Const_tarval(param);
4957 locality = get_tarval_long(tv);
4959 /* SSE style prefetch */
4962 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4965 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4968 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4971 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4975 assert(ia32_cg_config.use_3dnow_prefetch);
4976 /* 3DNow! style prefetch */
4977 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4980 set_irn_pinned(new_node, get_irn_pinned(node));
4981 set_ia32_op_type(new_node, ia32_AddrModeS);
4982 set_ia32_ls_mode(new_node, mode_Bu);
4983 set_address(new_node, &addr);
4985 SET_IA32_ORIG_NODE(new_node, node);
4987 be_dep_on_frame(new_node);
4988 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
4992 * Transform bsf like node
4994 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4996 ir_node *param = get_Builtin_param(node, 0);
4997 dbg_info *dbgi = get_irn_dbg_info(node);
4999 ir_node *block = get_nodes_block(node);
5000 ir_node *new_block = be_transform_node(block);
5002 ia32_address_mode_t am;
5003 ia32_address_t *addr = &am.addr;
5006 match_arguments(&am, block, NULL, param, NULL, match_am);
5008 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5009 set_am_attributes(cnt, &am);
5010 set_ia32_ls_mode(cnt, get_irn_mode(param));
5012 SET_IA32_ORIG_NODE(cnt, node);
5013 return fix_mem_proj(cnt, &am);
5017 * Transform builtin ffs.
5019 static ir_node *gen_ffs(ir_node *node)
5021 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5022 ir_node *real = skip_Proj(bsf);
5023 dbg_info *dbgi = get_irn_dbg_info(real);
5024 ir_node *block = get_nodes_block(real);
5025 ir_node *flag, *set, *conv, *neg, *or;
5028 if (get_irn_mode(real) != mode_T) {
5029 set_irn_mode(real, mode_T);
5030 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5033 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5036 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5037 SET_IA32_ORIG_NODE(set, node);
5040 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5041 SET_IA32_ORIG_NODE(conv, node);
5044 neg = new_bd_ia32_Neg(dbgi, block, conv);
5047 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5048 set_ia32_commutative(or);
5051 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5055 * Transform builtin clz.
5057 static ir_node *gen_clz(ir_node *node)
5059 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5060 ir_node *real = skip_Proj(bsr);
5061 dbg_info *dbgi = get_irn_dbg_info(real);
5062 ir_node *block = get_nodes_block(real);
5063 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5065 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5069 * Transform builtin ctz.
5071 static ir_node *gen_ctz(ir_node *node)
5073 return gen_unop_AM(node, new_bd_ia32_Bsf);
5077 * Transform builtin parity.
5079 static ir_node *gen_parity(ir_node *node)
5081 ir_node *param = get_Builtin_param(node, 0);
5082 dbg_info *dbgi = get_irn_dbg_info(node);
5084 ir_node *block = get_nodes_block(node);
5086 ir_node *new_block = be_transform_node(block);
5087 ir_node *imm, *cmp, *new_node;
5089 ia32_address_mode_t am;
5090 ia32_address_t *addr = &am.addr;
5094 match_arguments(&am, block, NULL, param, NULL, match_am);
5095 imm = ia32_create_Immediate(NULL, 0, 0);
5096 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5097 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5098 set_am_attributes(cmp, &am);
5099 set_ia32_ls_mode(cmp, mode_Iu);
5101 SET_IA32_ORIG_NODE(cmp, node);
5103 cmp = fix_mem_proj(cmp, &am);
5106 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5107 SET_IA32_ORIG_NODE(new_node, node);
5110 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5111 nomem, new_node, mode_Bu);
5112 SET_IA32_ORIG_NODE(new_node, node);
5117 * Transform builtin popcount
5119 static ir_node *gen_popcount(ir_node *node)
5121 ir_node *param = get_Builtin_param(node, 0);
5122 dbg_info *dbgi = get_irn_dbg_info(node);
5124 ir_node *block = get_nodes_block(node);
5125 ir_node *new_block = be_transform_node(block);
5128 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5130 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5131 if (ia32_cg_config.use_popcnt) {
5132 ia32_address_mode_t am;
5133 ia32_address_t *addr = &am.addr;
5136 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5138 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5139 set_am_attributes(cnt, &am);
5140 set_ia32_ls_mode(cnt, get_irn_mode(param));
5142 SET_IA32_ORIG_NODE(cnt, node);
5143 return fix_mem_proj(cnt, &am);
5146 new_param = be_transform_node(param);
5148 /* do the standard popcount algo */
5150 /* m1 = x & 0x55555555 */
5151 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5152 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5155 simm = ia32_create_Immediate(NULL, 0, 1);
5156 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5158 /* m2 = s1 & 0x55555555 */
5159 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5162 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5164 /* m4 = m3 & 0x33333333 */
5165 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5166 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5169 simm = ia32_create_Immediate(NULL, 0, 2);
5170 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5172 /* m5 = s2 & 0x33333333 */
5173 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5176 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5178 /* m7 = m6 & 0x0F0F0F0F */
5179 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5180 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5183 simm = ia32_create_Immediate(NULL, 0, 4);
5184 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5186 /* m8 = s3 & 0x0F0F0F0F */
5187 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5190 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5192 /* m10 = m9 & 0x00FF00FF */
5193 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5194 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5197 simm = ia32_create_Immediate(NULL, 0, 8);
5198 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5200 /* m11 = s4 & 0x00FF00FF */
5201 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5203 /* m12 = m10 + m11 */
5204 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5206 /* m13 = m12 & 0x0000FFFF */
5207 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5208 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5210 /* s5 = m12 >> 16 */
5211 simm = ia32_create_Immediate(NULL, 0, 16);
5212 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5214 /* res = m13 + s5 */
5215 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5219 * Transform builtin byte swap.
5221 static ir_node *gen_bswap(ir_node *node)
5223 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5224 dbg_info *dbgi = get_irn_dbg_info(node);
5226 ir_node *block = get_nodes_block(node);
5227 ir_node *new_block = be_transform_node(block);
5228 ir_mode *mode = get_irn_mode(param);
5229 unsigned size = get_mode_size_bits(mode);
5230 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5234 if (ia32_cg_config.use_i486) {
5235 /* swap available */
5236 return new_bd_ia32_Bswap(dbgi, new_block, param);
5238 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5239 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5241 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5242 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5244 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5246 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5247 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5249 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5250 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5253 /* swap16 always available */
5254 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5257 panic("Invalid bswap size (%d)", size);
5262 * Transform builtin outport.
5264 static ir_node *gen_outport(ir_node *node)
5266 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5267 ir_node *oldv = get_Builtin_param(node, 1);
5268 ir_mode *mode = get_irn_mode(oldv);
5269 ir_node *value = be_transform_node(oldv);
5270 ir_node *block = be_transform_node(get_nodes_block(node));
5271 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5272 dbg_info *dbgi = get_irn_dbg_info(node);
5274 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5275 set_ia32_ls_mode(res, mode);
5280 * Transform builtin inport.
5282 static ir_node *gen_inport(ir_node *node)
5284 ir_type *tp = get_Builtin_type(node);
5285 ir_type *rstp = get_method_res_type(tp, 0);
5286 ir_mode *mode = get_type_mode(rstp);
5287 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5288 ir_node *block = be_transform_node(get_nodes_block(node));
5289 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5290 dbg_info *dbgi = get_irn_dbg_info(node);
5292 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5293 set_ia32_ls_mode(res, mode);
5295 /* check for missing Result Proj */
5300 * Transform a builtin inner trampoline
5302 static ir_node *gen_inner_trampoline(ir_node *node)
5304 ir_node *ptr = get_Builtin_param(node, 0);
5305 ir_node *callee = get_Builtin_param(node, 1);
5306 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5307 ir_node *mem = get_Builtin_mem(node);
5308 ir_node *block = get_nodes_block(node);
5309 ir_node *new_block = be_transform_node(block);
5313 ir_node *trampoline;
5315 dbg_info *dbgi = get_irn_dbg_info(node);
5316 ia32_address_t addr;
5318 /* construct store address */
5319 memset(&addr, 0, sizeof(addr));
5320 ia32_create_address_mode(&addr, ptr, 0);
5322 if (addr.base == NULL) {
5323 addr.base = noreg_GP;
5325 addr.base = be_transform_node(addr.base);
5328 if (addr.index == NULL) {
5329 addr.index = noreg_GP;
5331 addr.index = be_transform_node(addr.index);
5333 addr.mem = be_transform_node(mem);
5335 /* mov ecx, <env> */
5336 val = ia32_create_Immediate(NULL, 0, 0xB9);
5337 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5338 addr.index, addr.mem, val);
5339 set_irn_pinned(store, get_irn_pinned(node));
5340 set_ia32_op_type(store, ia32_AddrModeD);
5341 set_ia32_ls_mode(store, mode_Bu);
5342 set_address(store, &addr);
5346 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5347 addr.index, addr.mem, env);
5348 set_irn_pinned(store, get_irn_pinned(node));
5349 set_ia32_op_type(store, ia32_AddrModeD);
5350 set_ia32_ls_mode(store, mode_Iu);
5351 set_address(store, &addr);
5355 /* jmp rel <callee> */
5356 val = ia32_create_Immediate(NULL, 0, 0xE9);
5357 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5358 addr.index, addr.mem, val);
5359 set_irn_pinned(store, get_irn_pinned(node));
5360 set_ia32_op_type(store, ia32_AddrModeD);
5361 set_ia32_ls_mode(store, mode_Bu);
5362 set_address(store, &addr);
5366 trampoline = be_transform_node(ptr);
5368 /* the callee is typically an immediate */
5369 if (is_SymConst(callee)) {
5370 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5372 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5374 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5376 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5377 addr.index, addr.mem, rel);
5378 set_irn_pinned(store, get_irn_pinned(node));
5379 set_ia32_op_type(store, ia32_AddrModeD);
5380 set_ia32_ls_mode(store, mode_Iu);
5381 set_address(store, &addr);
5386 return new_r_Tuple(new_block, 2, in);
5390 * Transform Builtin node.
5392 static ir_node *gen_Builtin(ir_node *node)
5394 ir_builtin_kind kind = get_Builtin_kind(node);
5398 return gen_trap(node);
5399 case ir_bk_debugbreak:
5400 return gen_debugbreak(node);
5401 case ir_bk_return_address:
5402 return gen_return_address(node);
5403 case ir_bk_frame_address:
5404 return gen_frame_address(node);
5405 case ir_bk_prefetch:
5406 return gen_prefetch(node);
5408 return gen_ffs(node);
5410 return gen_clz(node);
5412 return gen_ctz(node);
5414 return gen_parity(node);
5415 case ir_bk_popcount:
5416 return gen_popcount(node);
5418 return gen_bswap(node);
5420 return gen_outport(node);
5422 return gen_inport(node);
5423 case ir_bk_inner_trampoline:
5424 return gen_inner_trampoline(node);
5426 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5430 * Transform Proj(Builtin) node.
5432 static ir_node *gen_Proj_Builtin(ir_node *proj)
5434 ir_node *node = get_Proj_pred(proj);
5435 ir_node *new_node = be_transform_node(node);
5436 ir_builtin_kind kind = get_Builtin_kind(node);
5439 case ir_bk_return_address:
5440 case ir_bk_frame_address:
5445 case ir_bk_popcount:
5447 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5450 case ir_bk_debugbreak:
5451 case ir_bk_prefetch:
5453 assert(get_Proj_proj(proj) == pn_Builtin_M);
5456 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5457 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5459 assert(get_Proj_proj(proj) == pn_Builtin_M);
5460 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5462 case ir_bk_inner_trampoline:
5463 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5464 return get_Tuple_pred(new_node, 1);
5466 assert(get_Proj_proj(proj) == pn_Builtin_M);
5467 return get_Tuple_pred(new_node, 0);
5470 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5473 static ir_node *gen_be_IncSP(ir_node *node)
5475 ir_node *res = be_duplicate_node(node);
5476 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5482 * Transform the Projs from a be_Call.
5484 static ir_node *gen_Proj_be_Call(ir_node *node)
5486 ir_node *call = get_Proj_pred(node);
5487 ir_node *new_call = be_transform_node(call);
5488 dbg_info *dbgi = get_irn_dbg_info(node);
5489 long proj = get_Proj_proj(node);
5490 ir_mode *mode = get_irn_mode(node);
5493 if (proj == pn_be_Call_M_regular) {
5494 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5496 /* transform call modes */
5497 if (mode_is_data(mode)) {
5498 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5502 /* Map from be_Call to ia32_Call proj number */
5503 if (proj == pn_be_Call_sp) {
5504 proj = pn_ia32_Call_stack;
5505 } else if (proj == pn_be_Call_M_regular) {
5506 proj = pn_ia32_Call_M;
5508 arch_register_req_t const *const req = arch_get_register_req_out(node);
5509 int const n_outs = arch_irn_get_n_outs(new_call);
5512 assert(proj >= pn_be_Call_first_res);
5513 assert(req->type & arch_register_req_type_limited);
5515 for (i = 0; i < n_outs; ++i) {
5516 arch_register_req_t const *const new_req
5517 = arch_get_out_register_req(new_call, i);
5519 if (!(new_req->type & arch_register_req_type_limited) ||
5520 new_req->cls != req->cls ||
5521 *new_req->limited != *req->limited)
5530 res = new_rd_Proj(dbgi, new_call, mode, proj);
5532 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5534 case pn_ia32_Call_stack:
5535 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5538 case pn_ia32_Call_fpcw:
5539 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5547 * Transform the Projs from a Cmp.
5549 static ir_node *gen_Proj_Cmp(ir_node *node)
5551 /* this probably means not all mode_b nodes were lowered... */
5552 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5557 * Transform the Projs from a Bound.
5559 static ir_node *gen_Proj_Bound(ir_node *node)
5562 ir_node *pred = get_Proj_pred(node);
5564 switch (get_Proj_proj(node)) {
5566 return be_transform_node(get_Bound_mem(pred));
5567 case pn_Bound_X_regular:
5568 new_node = be_transform_node(pred);
5569 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5570 case pn_Bound_X_except:
5571 new_node = be_transform_node(pred);
5572 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5574 return be_transform_node(get_Bound_index(pred));
5576 panic("unsupported Proj from Bound");
5580 static ir_node *gen_Proj_ASM(ir_node *node)
5582 ir_mode *mode = get_irn_mode(node);
5583 ir_node *pred = get_Proj_pred(node);
5584 ir_node *new_pred = be_transform_node(pred);
5585 long pos = get_Proj_proj(node);
5587 if (mode == mode_M) {
5588 pos = arch_irn_get_n_outs(new_pred)-1;
5589 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5591 } else if (mode_is_float(mode)) {
5594 panic("unexpected proj mode at ASM");
5597 return new_r_Proj(new_pred, mode, pos);
5601 * Transform and potentially renumber Proj nodes.
5603 static ir_node *gen_Proj(ir_node *node)
5605 ir_node *pred = get_Proj_pred(node);
5608 switch (get_irn_opcode(pred)) {
5610 proj = get_Proj_proj(node);
5611 if (proj == pn_Store_M) {
5612 return be_transform_node(pred);
5614 panic("No idea how to transform proj->Store");
5617 return gen_Proj_Load(node);
5619 return gen_Proj_ASM(node);
5621 return gen_Proj_Builtin(node);
5625 return gen_Proj_DivMod(node);
5627 return gen_Proj_CopyB(node);
5629 return gen_Proj_Quot(node);
5631 return gen_Proj_be_SubSP(node);
5633 return gen_Proj_be_AddSP(node);
5635 return gen_Proj_be_Call(node);
5637 return gen_Proj_Cmp(node);
5639 return gen_Proj_Bound(node);
5641 proj = get_Proj_proj(node);
5643 case pn_Start_X_initial_exec: {
5644 ir_node *block = get_nodes_block(pred);
5645 ir_node *new_block = be_transform_node(block);
5646 dbg_info *dbgi = get_irn_dbg_info(node);
5647 /* we exchange the ProjX with a jump */
5648 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5653 case pn_Start_P_tls:
5654 return gen_Proj_tls(node);
5659 if (is_ia32_l_FloattoLL(pred)) {
5660 return gen_Proj_l_FloattoLL(node);
5662 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5666 ir_mode *mode = get_irn_mode(node);
5667 if (ia32_mode_needs_gp_reg(mode)) {
5668 ir_node *new_pred = be_transform_node(pred);
5669 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5670 get_Proj_proj(node));
5671 new_proj->node_nr = node->node_nr;
5676 return be_duplicate_node(node);
5680 * Enters all transform functions into the generic pointer
5682 static void register_transformers(void)
5684 /* first clear the generic function pointer for all ops */
5685 be_start_transform_setup();
5687 be_set_transform_function(op_Add, gen_Add);
5688 be_set_transform_function(op_And, gen_And);
5689 be_set_transform_function(op_ASM, gen_ASM);
5690 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5691 be_set_transform_function(op_be_Call, gen_be_Call);
5692 be_set_transform_function(op_be_Copy, gen_be_Copy);
5693 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5694 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5695 be_set_transform_function(op_be_Return, gen_be_Return);
5696 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5697 be_set_transform_function(op_Bound, gen_Bound);
5698 be_set_transform_function(op_Builtin, gen_Builtin);
5699 be_set_transform_function(op_Cmp, gen_Cmp);
5700 be_set_transform_function(op_Cond, gen_Cond);
5701 be_set_transform_function(op_Const, gen_Const);
5702 be_set_transform_function(op_Conv, gen_Conv);
5703 be_set_transform_function(op_CopyB, gen_CopyB);
5704 be_set_transform_function(op_Div, gen_Div);
5705 be_set_transform_function(op_DivMod, gen_DivMod);
5706 be_set_transform_function(op_Eor, gen_Eor);
5707 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5708 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5709 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5710 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5711 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5712 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5713 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5714 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5715 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5716 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5717 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5718 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5719 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5720 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5721 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5722 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5723 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5724 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5725 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5726 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5727 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5728 be_set_transform_function(op_IJmp, gen_IJmp);
5729 be_set_transform_function(op_Jmp, gen_Jmp);
5730 be_set_transform_function(op_Load, gen_Load);
5731 be_set_transform_function(op_Minus, gen_Minus);
5732 be_set_transform_function(op_Mod, gen_Mod);
5733 be_set_transform_function(op_Mul, gen_Mul);
5734 be_set_transform_function(op_Mulh, gen_Mulh);
5735 be_set_transform_function(op_Mux, gen_Mux);
5736 be_set_transform_function(op_Not, gen_Not);
5737 be_set_transform_function(op_Or, gen_Or);
5738 be_set_transform_function(op_Phi, gen_Phi);
5739 be_set_transform_function(op_Proj, gen_Proj);
5740 be_set_transform_function(op_Quot, gen_Quot);
5741 be_set_transform_function(op_Rotl, gen_Rotl);
5742 be_set_transform_function(op_Shl, gen_Shl);
5743 be_set_transform_function(op_Shr, gen_Shr);
5744 be_set_transform_function(op_Shrs, gen_Shrs);
5745 be_set_transform_function(op_Store, gen_Store);
5746 be_set_transform_function(op_Sub, gen_Sub);
5747 be_set_transform_function(op_SymConst, gen_SymConst);
5748 be_set_transform_function(op_Unknown, gen_Unknown);
5752 * Pre-transform all unknown and noreg nodes.
5754 static void ia32_pretransform_node(void)
5756 ia32_code_gen_t *cg = env_cg;
5758 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5759 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5760 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5762 nomem = get_irg_no_mem(current_ir_graph);
5763 noreg_GP = ia32_new_NoReg_gp(cg);
5769 * Post-process all calls if we are in SSE mode.
5770 * The ABI requires that the results are in st0, copy them
5771 * to a xmm register.
5773 static void postprocess_fp_call_results(void)
5777 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5778 ir_node *call = call_list[i];
5779 ir_type *mtp = call_types[i];
5782 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5783 ir_type *res_tp = get_method_res_type(mtp, j);
5784 ir_node *res, *new_res;
5785 const ir_edge_t *edge, *next;
5788 if (! is_atomic_type(res_tp)) {
5789 /* no floating point return */
5792 mode = get_type_mode(res_tp);
5793 if (! mode_is_float(mode)) {
5794 /* no floating point return */
5798 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5801 /* now patch the users */
5802 foreach_out_edge_safe(res, edge, next) {
5803 ir_node *succ = get_edge_src_irn(edge);
5806 if (be_is_Keep(succ))
5809 if (is_ia32_xStore(succ)) {
5810 /* an xStore can be patched into an vfst */
5811 dbg_info *db = get_irn_dbg_info(succ);
5812 ir_node *block = get_nodes_block(succ);
5813 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5814 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5815 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5816 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5817 ir_mode *mode = get_ia32_ls_mode(succ);
5819 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5820 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5821 if (is_ia32_use_frame(succ))
5822 set_ia32_use_frame(st);
5823 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5824 set_irn_pinned(st, get_irn_pinned(succ));
5825 set_ia32_op_type(st, ia32_AddrModeD);
5829 if (new_res == NULL) {
5830 dbg_info *db = get_irn_dbg_info(call);
5831 ir_node *block = get_nodes_block(call);
5832 ir_node *frame = get_irg_frame(current_ir_graph);
5833 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5834 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5835 ir_node *vfst, *xld, *new_mem;
5837 /* store st(0) on stack */
5838 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5839 set_ia32_op_type(vfst, ia32_AddrModeD);
5840 set_ia32_use_frame(vfst);
5842 /* load into SSE register */
5843 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5844 set_ia32_op_type(xld, ia32_AddrModeS);
5845 set_ia32_use_frame(xld);
5847 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5848 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5850 if (old_mem != NULL) {
5851 edges_reroute(old_mem, new_mem, current_ir_graph);
5855 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5862 /* do the transformation */
5863 void ia32_transform_graph(ia32_code_gen_t *cg)
5867 register_transformers();
5869 initial_fpcw = NULL;
5872 be_timer_push(T_HEIGHTS);
5873 heights = heights_new(cg->irg);
5874 be_timer_pop(T_HEIGHTS);
5875 ia32_calculate_non_address_mode_nodes(cg->irg);
5877 /* the transform phase is not safe for CSE (yet) because several nodes get
5878 * attributes set after their creation */
5879 cse_last = get_opt_cse();
5882 call_list = NEW_ARR_F(ir_node *, 0);
5883 call_types = NEW_ARR_F(ir_type *, 0);
5884 be_transform_graph(cg->irg, ia32_pretransform_node);
5886 if (ia32_cg_config.use_sse2)
5887 postprocess_fp_call_results();
5888 DEL_ARR_F(call_types);
5889 DEL_ARR_F(call_list);
5891 set_opt_cse(cse_last);
5893 ia32_free_non_address_mode_nodes();
5894 heights_free(heights);
5898 void ia32_init_transform(void)
5900 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");