2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
50 #include "../benode.h"
51 #include "../besched.h"
53 #include "../beutil.h"
55 #include "../betranshlp.h"
58 #include "bearch_ia32_t.h"
59 #include "ia32_common_transform.h"
60 #include "ia32_nodes_attr.h"
61 #include "ia32_transform.h"
62 #include "ia32_new_nodes.h"
63 #include "ia32_map_regs.h"
64 #include "ia32_dbg_stat.h"
65 #include "ia32_optimize.h"
66 #include "ia32_util.h"
67 #include "ia32_address_mode.h"
68 #include "ia32_architecture.h"
70 #include "gen_ia32_regalloc_if.h"
72 /* define this to construct SSE constants instead of load them */
73 #undef CONSTRUCT_SSE_CONST
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
81 #define ULL_BIAS "18446744073709551616"
83 #define ENT_SFP_SIGN "C_ia32_sfp_sign"
84 #define ENT_DFP_SIGN "C_ia32_dfp_sign"
85 #define ENT_SFP_ABS "C_ia32_sfp_abs"
86 #define ENT_DFP_ABS "C_ia32_dfp_abs"
87 #define ENT_ULL_BIAS "C_ia32_ull_bias"
89 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
90 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
92 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
94 static ir_node *initial_fpcw = NULL;
97 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
98 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
101 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
102 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
105 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
106 ir_node *op1, ir_node *op2);
108 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
109 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
111 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
112 ir_node *base, ir_node *index, ir_node *mem);
114 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
115 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
118 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
120 static ir_node *create_immediate_or_transform(ir_node *node,
121 char immediate_constraint_type);
123 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
124 dbg_info *dbgi, ir_node *block,
125 ir_node *op, ir_node *orig_node);
127 /* its enough to have those once */
128 static ir_node *nomem, *noreg_GP;
130 /** a list to postprocess all calls */
131 static ir_node **call_list;
132 static ir_type **call_types;
134 /** Return non-zero is a node represents the 0 constant. */
135 static bool is_Const_0(ir_node *node)
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node)
143 return is_Const(node) && is_Const_one(node);
146 /** Return non-zero is a node represents the -1 constant. */
147 static bool is_Const_Minus_1(ir_node *node)
149 return is_Const(node) && is_Const_all_one(node);
153 * returns true if constant can be created with a simple float command
155 static bool is_simple_x87_Const(ir_node *node)
157 tarval *tv = get_Const_tarval(node);
158 if (tarval_is_null(tv) || tarval_is_one(tv))
161 /* TODO: match all the other float constants */
166 * returns true if constant can be created with a simple float command
168 static bool is_simple_sse_Const(ir_node *node)
170 tarval *tv = get_Const_tarval(node);
171 ir_mode *mode = get_tarval_mode(tv);
176 if (tarval_is_null(tv)
177 #ifdef CONSTRUCT_SSE_CONST
182 #ifdef CONSTRUCT_SSE_CONST
183 if (mode == mode_D) {
184 unsigned val = get_tarval_sub_bits(tv, 0) |
185 (get_tarval_sub_bits(tv, 1) << 8) |
186 (get_tarval_sub_bits(tv, 2) << 16) |
187 (get_tarval_sub_bits(tv, 3) << 24);
189 /* lower 32bit are zero, really a 32bit constant */
192 #endif /* CONSTRUCT_SSE_CONST */
193 /* TODO: match all the other float constants */
198 * return NoREG or pic_base in case of PIC.
199 * This is necessary as base address for newly created symbols
201 static ir_node *get_symconst_base(void)
203 if (be_get_irg_options(env_cg->irg)->pic) {
204 return arch_code_generator_get_pic_base(env_cg);
211 * Transforms a Const.
213 static ir_node *gen_Const(ir_node *node)
215 ir_node *old_block = get_nodes_block(node);
216 ir_node *block = be_transform_node(old_block);
217 dbg_info *dbgi = get_irn_dbg_info(node);
218 ir_mode *mode = get_irn_mode(node);
220 assert(is_Const(node));
222 if (mode_is_float(mode)) {
228 if (ia32_cg_config.use_sse2) {
229 tarval *tv = get_Const_tarval(node);
230 if (tarval_is_null(tv)) {
231 load = new_bd_ia32_xZero(dbgi, block);
232 set_ia32_ls_mode(load, mode);
234 #ifdef CONSTRUCT_SSE_CONST
235 } else if (tarval_is_one(tv)) {
236 int cnst = mode == mode_F ? 26 : 55;
237 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
238 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
239 ir_node *pslld, *psrld;
241 load = new_bd_ia32_xAllOnes(dbgi, block);
242 set_ia32_ls_mode(load, mode);
243 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
244 set_ia32_ls_mode(pslld, mode);
245 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
246 set_ia32_ls_mode(psrld, mode);
248 #endif /* CONSTRUCT_SSE_CONST */
249 } else if (mode == mode_F) {
250 /* we can place any 32bit constant by using a movd gp, sse */
251 unsigned val = get_tarval_sub_bits(tv, 0) |
252 (get_tarval_sub_bits(tv, 1) << 8) |
253 (get_tarval_sub_bits(tv, 2) << 16) |
254 (get_tarval_sub_bits(tv, 3) << 24);
255 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
256 load = new_bd_ia32_xMovd(dbgi, block, cnst);
257 set_ia32_ls_mode(load, mode);
260 #ifdef CONSTRUCT_SSE_CONST
261 if (mode == mode_D) {
262 unsigned val = get_tarval_sub_bits(tv, 0) |
263 (get_tarval_sub_bits(tv, 1) << 8) |
264 (get_tarval_sub_bits(tv, 2) << 16) |
265 (get_tarval_sub_bits(tv, 3) << 24);
267 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
268 ir_node *cnst, *psllq;
270 /* fine, lower 32bit are zero, produce 32bit value */
271 val = get_tarval_sub_bits(tv, 4) |
272 (get_tarval_sub_bits(tv, 5) << 8) |
273 (get_tarval_sub_bits(tv, 6) << 16) |
274 (get_tarval_sub_bits(tv, 7) << 24);
275 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
276 load = new_bd_ia32_xMovd(dbgi, block, cnst);
277 set_ia32_ls_mode(load, mode);
278 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
279 set_ia32_ls_mode(psllq, mode);
284 #endif /* CONSTRUCT_SSE_CONST */
285 floatent = create_float_const_entity(node);
287 base = get_symconst_base();
288 load = new_bd_ia32_xLoad(dbgi, block, base, noreg_GP, nomem,
290 set_ia32_op_type(load, ia32_AddrModeS);
291 set_ia32_am_sc(load, floatent);
292 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
293 res = new_r_Proj(load, mode_xmm, pn_ia32_xLoad_res);
296 if (is_Const_null(node)) {
297 load = new_bd_ia32_vfldz(dbgi, block);
299 set_ia32_ls_mode(load, mode);
300 } else if (is_Const_one(node)) {
301 load = new_bd_ia32_vfld1(dbgi, block);
303 set_ia32_ls_mode(load, mode);
308 floatent = create_float_const_entity(node);
309 /* create_float_const_ent is smart and sometimes creates
311 ls_mode = get_type_mode(get_entity_type(floatent));
312 base = get_symconst_base();
313 load = new_bd_ia32_vfld(dbgi, block, base, noreg_GP, nomem,
315 set_ia32_op_type(load, ia32_AddrModeS);
316 set_ia32_am_sc(load, floatent);
317 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
318 res = new_r_Proj(load, mode_vfp, pn_ia32_vfld_res);
321 #ifdef CONSTRUCT_SSE_CONST
323 #endif /* CONSTRUCT_SSE_CONST */
324 SET_IA32_ORIG_NODE(load, node);
326 be_dep_on_frame(load);
328 } else { /* non-float mode */
330 tarval *tv = get_Const_tarval(node);
333 tv = tarval_convert_to(tv, mode_Iu);
335 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
337 panic("couldn't convert constant tarval (%+F)", node);
339 val = get_tarval_long(tv);
341 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
342 SET_IA32_ORIG_NODE(cnst, node);
344 be_dep_on_frame(cnst);
350 * Transforms a SymConst.
352 static ir_node *gen_SymConst(ir_node *node)
354 ir_node *old_block = get_nodes_block(node);
355 ir_node *block = be_transform_node(old_block);
356 dbg_info *dbgi = get_irn_dbg_info(node);
357 ir_mode *mode = get_irn_mode(node);
360 if (mode_is_float(mode)) {
361 if (ia32_cg_config.use_sse2)
362 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
364 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
365 set_ia32_am_sc(cnst, get_SymConst_entity(node));
366 set_ia32_use_frame(cnst);
370 if (get_SymConst_kind(node) != symconst_addr_ent) {
371 panic("backend only support symconst_addr_ent (at %+F)", node);
373 entity = get_SymConst_entity(node);
374 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
377 SET_IA32_ORIG_NODE(cnst, node);
379 be_dep_on_frame(cnst);
384 * Create a float type for the given mode and cache it.
386 * @param mode the mode for the float type (might be integer mode for SSE2 types)
387 * @param align alignment
389 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
395 if (mode == mode_Iu) {
396 static ir_type *int_Iu[16] = {NULL, };
398 if (int_Iu[align] == NULL) {
399 int_Iu[align] = tp = new_type_primitive(mode);
400 /* set the specified alignment */
401 set_type_alignment_bytes(tp, align);
403 return int_Iu[align];
404 } else if (mode == mode_Lu) {
405 static ir_type *int_Lu[16] = {NULL, };
407 if (int_Lu[align] == NULL) {
408 int_Lu[align] = tp = new_type_primitive(mode);
409 /* set the specified alignment */
410 set_type_alignment_bytes(tp, align);
412 return int_Lu[align];
413 } else if (mode == mode_F) {
414 static ir_type *float_F[16] = {NULL, };
416 if (float_F[align] == NULL) {
417 float_F[align] = tp = new_type_primitive(mode);
418 /* set the specified alignment */
419 set_type_alignment_bytes(tp, align);
421 return float_F[align];
422 } else if (mode == mode_D) {
423 static ir_type *float_D[16] = {NULL, };
425 if (float_D[align] == NULL) {
426 float_D[align] = tp = new_type_primitive(mode);
427 /* set the specified alignment */
428 set_type_alignment_bytes(tp, align);
430 return float_D[align];
432 static ir_type *float_E[16] = {NULL, };
434 if (float_E[align] == NULL) {
435 float_E[align] = tp = new_type_primitive(mode);
436 /* set the specified alignment */
437 set_type_alignment_bytes(tp, align);
439 return float_E[align];
444 * Create a float[2] array type for the given atomic type.
446 * @param tp the atomic type
448 static ir_type *ia32_create_float_array(ir_type *tp)
450 ir_mode *mode = get_type_mode(tp);
451 unsigned align = get_type_alignment_bytes(tp);
456 if (mode == mode_F) {
457 static ir_type *float_F[16] = {NULL, };
459 if (float_F[align] != NULL)
460 return float_F[align];
461 arr = float_F[align] = new_type_array(1, tp);
462 } else if (mode == mode_D) {
463 static ir_type *float_D[16] = {NULL, };
465 if (float_D[align] != NULL)
466 return float_D[align];
467 arr = float_D[align] = new_type_array(1, tp);
469 static ir_type *float_E[16] = {NULL, };
471 if (float_E[align] != NULL)
472 return float_E[align];
473 arr = float_E[align] = new_type_array(1, tp);
475 set_type_alignment_bytes(arr, align);
476 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
477 set_type_state(arr, layout_fixed);
481 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
482 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
484 static const struct {
485 const char *ent_name;
486 const char *cnst_str;
489 } names [ia32_known_const_max] = {
490 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
491 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
492 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
493 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
494 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
496 static ir_entity *ent_cache[ia32_known_const_max];
498 const char *ent_name, *cnst_str;
504 ent_name = names[kct].ent_name;
505 if (! ent_cache[kct]) {
506 cnst_str = names[kct].cnst_str;
508 switch (names[kct].mode) {
509 case 0: mode = mode_Iu; break;
510 case 1: mode = mode_Lu; break;
511 default: mode = mode_F; break;
513 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
514 tp = ia32_create_float_type(mode, names[kct].align);
516 if (kct == ia32_ULLBIAS)
517 tp = ia32_create_float_array(tp);
518 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
520 set_entity_ld_ident(ent, get_entity_ident(ent));
521 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
522 set_entity_visibility(ent, ir_visibility_private);
524 if (kct == ia32_ULLBIAS) {
525 ir_initializer_t *initializer = create_initializer_compound(2);
527 set_initializer_compound_value(initializer, 0,
528 create_initializer_tarval(get_mode_null(mode)));
529 set_initializer_compound_value(initializer, 1,
530 create_initializer_tarval(tv));
532 set_entity_initializer(ent, initializer);
534 set_entity_initializer(ent, create_initializer_tarval(tv));
537 /* cache the entry */
538 ent_cache[kct] = ent;
541 return ent_cache[kct];
545 * return true if the node is a Proj(Load) and could be used in source address
546 * mode for another node. Will return only true if the @p other node is not
547 * dependent on the memory of the Load (for binary operations use the other
548 * input here, for unary operations use NULL).
550 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
551 ir_node *other, ir_node *other2, match_flags_t flags)
556 /* float constants are always available */
557 if (is_Const(node)) {
558 ir_mode *mode = get_irn_mode(node);
559 if (mode_is_float(mode)) {
560 if (ia32_cg_config.use_sse2) {
561 if (is_simple_sse_Const(node))
564 if (is_simple_x87_Const(node))
567 if (get_irn_n_edges(node) > 1)
575 load = get_Proj_pred(node);
576 pn = get_Proj_proj(node);
577 if (!is_Load(load) || pn != pn_Load_res)
579 if (get_nodes_block(load) != block)
581 /* we only use address mode if we're the only user of the load */
582 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
584 /* in some edge cases with address mode we might reach the load normally
585 * and through some AM sequence, if it is already materialized then we
586 * can't create an AM node from it */
587 if (be_is_transformed(node))
590 /* don't do AM if other node inputs depend on the load (via mem-proj) */
591 if (other != NULL && prevents_AM(block, load, other))
594 if (other2 != NULL && prevents_AM(block, load, other2))
600 typedef struct ia32_address_mode_t ia32_address_mode_t;
601 struct ia32_address_mode_t {
606 ia32_op_type_t op_type;
610 unsigned commutative : 1;
611 unsigned ins_permuted : 1;
614 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
616 /* construct load address */
617 memset(addr, 0, sizeof(addr[0]));
618 ia32_create_address_mode(addr, ptr, 0);
620 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
621 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
622 addr->mem = be_transform_node(mem);
625 static void build_address(ia32_address_mode_t *am, ir_node *node,
626 ia32_create_am_flags_t flags)
628 ia32_address_t *addr = &am->addr;
634 /* floating point immediates */
635 if (is_Const(node)) {
636 ir_entity *entity = create_float_const_entity(node);
637 addr->base = get_symconst_base();
638 addr->index = noreg_GP;
640 addr->symconst_ent = entity;
642 am->ls_mode = get_type_mode(get_entity_type(entity));
643 am->pinned = op_pin_state_floats;
647 load = get_Proj_pred(node);
648 ptr = get_Load_ptr(load);
649 mem = get_Load_mem(load);
650 new_mem = be_transform_node(mem);
651 am->pinned = get_irn_pinned(load);
652 am->ls_mode = get_Load_mode(load);
653 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
656 /* construct load address */
657 ia32_create_address_mode(addr, ptr, flags);
659 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
660 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
664 static void set_address(ir_node *node, const ia32_address_t *addr)
666 set_ia32_am_scale(node, addr->scale);
667 set_ia32_am_sc(node, addr->symconst_ent);
668 set_ia32_am_offs_int(node, addr->offset);
669 if (addr->symconst_sign)
670 set_ia32_am_sc_sign(node);
672 set_ia32_use_frame(node);
673 set_ia32_frame_ent(node, addr->frame_entity);
677 * Apply attributes of a given address mode to a node.
679 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
681 set_address(node, &am->addr);
683 set_ia32_op_type(node, am->op_type);
684 set_ia32_ls_mode(node, am->ls_mode);
685 if (am->pinned == op_pin_state_pinned) {
686 /* beware: some nodes are already pinned and did not allow to change the state */
687 if (get_irn_pinned(node) != op_pin_state_pinned)
688 set_irn_pinned(node, op_pin_state_pinned);
691 set_ia32_commutative(node);
695 * Check, if a given node is a Down-Conv, ie. a integer Conv
696 * from a mode with a mode with more bits to a mode with lesser bits.
697 * Moreover, we return only true if the node has not more than 1 user.
699 * @param node the node
700 * @return non-zero if node is a Down-Conv
702 static int is_downconv(const ir_node *node)
710 /* we only want to skip the conv when we're the only user
711 * (because this test is used in the context of address-mode selection
712 * and we don't want to use address mode for multiple users) */
713 if (get_irn_n_edges(node) > 1)
716 src_mode = get_irn_mode(get_Conv_op(node));
717 dest_mode = get_irn_mode(node);
719 ia32_mode_needs_gp_reg(src_mode) &&
720 ia32_mode_needs_gp_reg(dest_mode) &&
721 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
724 /** Skip all Down-Conv's on a given node and return the resulting node. */
725 ir_node *ia32_skip_downconv(ir_node *node)
727 while (is_downconv(node))
728 node = get_Conv_op(node);
733 static bool is_sameconv(ir_node *node)
741 /* we only want to skip the conv when we're the only user
742 * (because this test is used in the context of address-mode selection
743 * and we don't want to use address mode for multiple users) */
744 if (get_irn_n_edges(node) > 1)
747 src_mode = get_irn_mode(get_Conv_op(node));
748 dest_mode = get_irn_mode(node);
750 ia32_mode_needs_gp_reg(src_mode) &&
751 ia32_mode_needs_gp_reg(dest_mode) &&
752 get_mode_size_bits(dest_mode) == get_mode_size_bits(src_mode);
755 /** Skip all signedness convs */
756 static ir_node *ia32_skip_sameconv(ir_node *node)
758 while (is_sameconv(node))
759 node = get_Conv_op(node);
764 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
766 ir_mode *mode = get_irn_mode(node);
771 if (mode_is_signed(mode)) {
776 block = get_nodes_block(node);
777 dbgi = get_irn_dbg_info(node);
779 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
783 * matches operands of a node into ia32 addressing/operand modes. This covers
784 * usage of source address mode, immediates, operations with non 32-bit modes,
786 * The resulting data is filled into the @p am struct. block is the block
787 * of the node whose arguments are matched. op1, op2 are the first and second
788 * input that are matched (op1 may be NULL). other_op is another unrelated
789 * input that is not matched! but which is needed sometimes to check if AM
790 * for op1/op2 is legal.
791 * @p flags describes the supported modes of the operation in detail.
793 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
794 ir_node *op1, ir_node *op2, ir_node *other_op,
797 ia32_address_t *addr = &am->addr;
798 ir_mode *mode = get_irn_mode(op2);
799 int mode_bits = get_mode_size_bits(mode);
800 ir_node *new_op1, *new_op2;
802 unsigned commutative;
803 int use_am_and_immediates;
806 memset(am, 0, sizeof(am[0]));
808 commutative = (flags & match_commutative) != 0;
809 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
810 use_am = (flags & match_am) != 0;
811 use_immediate = (flags & match_immediate) != 0;
812 assert(!use_am_and_immediates || use_immediate);
815 assert(!commutative || op1 != NULL);
816 assert(use_am || !(flags & match_8bit_am));
817 assert(use_am || !(flags & match_16bit_am));
819 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
820 (mode_bits == 16 && !(flags & match_16bit_am))) {
824 /* we can simply skip downconvs for mode neutral nodes: the upper bits
825 * can be random for these operations */
826 if (flags & match_mode_neutral) {
827 op2 = ia32_skip_downconv(op2);
829 op1 = ia32_skip_downconv(op1);
832 op2 = ia32_skip_sameconv(op2);
834 op1 = ia32_skip_sameconv(op1);
838 /* match immediates. firm nodes are normalized: constants are always on the
841 if (!(flags & match_try_am) && use_immediate) {
842 new_op2 = try_create_Immediate(op2, 0);
845 if (new_op2 == NULL &&
846 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
847 build_address(am, op2, 0);
848 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
849 if (mode_is_float(mode)) {
850 new_op2 = ia32_new_NoReg_vfp(env_cg);
854 am->op_type = ia32_AddrModeS;
855 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
857 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
859 build_address(am, op1, 0);
861 if (mode_is_float(mode)) {
862 noreg = ia32_new_NoReg_vfp(env_cg);
867 if (new_op2 != NULL) {
870 new_op1 = be_transform_node(op2);
872 am->ins_permuted = 1;
874 am->op_type = ia32_AddrModeS;
877 am->op_type = ia32_Normal;
879 if (flags & match_try_am) {
885 mode = get_irn_mode(op2);
886 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
887 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
889 new_op2 = create_upconv(op2, NULL);
890 am->ls_mode = mode_Iu;
892 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
894 new_op2 = be_transform_node(op2);
895 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
898 if (addr->base == NULL)
899 addr->base = noreg_GP;
900 if (addr->index == NULL)
901 addr->index = noreg_GP;
902 if (addr->mem == NULL)
905 am->new_op1 = new_op1;
906 am->new_op2 = new_op2;
907 am->commutative = commutative;
911 * "Fixes" a node that uses address mode by turning it into mode_T
912 * and returning a pn_ia32_res Proj.
914 * @param node the node
915 * @param am its address mode
917 * @return a Proj(pn_ia32_res) if a memory address mode is used,
920 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
925 if (am->mem_proj == NULL)
928 /* we have to create a mode_T so the old MemProj can attach to us */
929 mode = get_irn_mode(node);
930 load = get_Proj_pred(am->mem_proj);
932 be_set_transformed_node(load, node);
934 if (mode != mode_T) {
935 set_irn_mode(node, mode_T);
936 return new_rd_Proj(NULL, node, mode, pn_ia32_res);
943 * Construct a standard binary operation, set AM and immediate if required.
945 * @param node The original node for which the binop is created
946 * @param op1 The first operand
947 * @param op2 The second operand
948 * @param func The node constructor function
949 * @return The constructed ia32 node.
951 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
952 construct_binop_func *func, match_flags_t flags)
955 ir_node *block, *new_block, *new_node;
956 ia32_address_mode_t am;
957 ia32_address_t *addr = &am.addr;
959 block = get_nodes_block(node);
960 match_arguments(&am, block, op1, op2, NULL, flags);
962 dbgi = get_irn_dbg_info(node);
963 new_block = be_transform_node(block);
964 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
965 am.new_op1, am.new_op2);
966 set_am_attributes(new_node, &am);
967 /* we can't use source address mode anymore when using immediates */
968 if (!(flags & match_am_and_immediates) &&
969 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
970 set_ia32_am_support(new_node, ia32_am_none);
971 SET_IA32_ORIG_NODE(new_node, node);
973 new_node = fix_mem_proj(new_node, &am);
979 * Generic names for the inputs of an ia32 binary op.
982 n_ia32_l_binop_left, /**< ia32 left input */
983 n_ia32_l_binop_right, /**< ia32 right input */
984 n_ia32_l_binop_eflags /**< ia32 eflags input */
986 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
987 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
988 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
989 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
990 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
991 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
994 * Construct a binary operation which also consumes the eflags.
996 * @param node The node to transform
997 * @param func The node constructor function
998 * @param flags The match flags
999 * @return The constructor ia32 node
1001 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
1002 match_flags_t flags)
1004 ir_node *src_block = get_nodes_block(node);
1005 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
1006 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
1007 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1009 ir_node *block, *new_node, *new_eflags;
1010 ia32_address_mode_t am;
1011 ia32_address_t *addr = &am.addr;
1013 match_arguments(&am, src_block, op1, op2, eflags, flags);
1015 dbgi = get_irn_dbg_info(node);
1016 block = be_transform_node(src_block);
1017 new_eflags = be_transform_node(eflags);
1018 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
1019 am.new_op1, am.new_op2, new_eflags);
1020 set_am_attributes(new_node, &am);
1021 /* we can't use source address mode anymore when using immediates */
1022 if (!(flags & match_am_and_immediates) &&
1023 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
1024 set_ia32_am_support(new_node, ia32_am_none);
1025 SET_IA32_ORIG_NODE(new_node, node);
1027 new_node = fix_mem_proj(new_node, &am);
1032 static ir_node *get_fpcw(void)
1035 if (initial_fpcw != NULL)
1036 return initial_fpcw;
1038 fpcw = be_abi_get_ignore_irn(be_get_irg_abi(env_cg->irg),
1039 &ia32_fp_cw_regs[REG_FPCW]);
1040 initial_fpcw = be_transform_node(fpcw);
1042 return initial_fpcw;
1046 * Construct a standard binary operation, set AM and immediate if required.
1048 * @param op1 The first operand
1049 * @param op2 The second operand
1050 * @param func The node constructor function
1051 * @return The constructed ia32 node.
1053 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1054 construct_binop_float_func *func)
1056 ir_mode *mode = get_irn_mode(node);
1058 ir_node *block, *new_block, *new_node;
1059 ia32_address_mode_t am;
1060 ia32_address_t *addr = &am.addr;
1061 ia32_x87_attr_t *attr;
1062 /* All operations are considered commutative, because there are reverse
1064 match_flags_t flags = match_commutative;
1066 /* happens for div nodes... */
1068 mode = get_divop_resmod(node);
1070 /* cannot use address mode with long double on x87 */
1071 if (get_mode_size_bits(mode) <= 64)
1074 block = get_nodes_block(node);
1075 match_arguments(&am, block, op1, op2, NULL, flags);
1077 dbgi = get_irn_dbg_info(node);
1078 new_block = be_transform_node(block);
1079 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1080 am.new_op1, am.new_op2, get_fpcw());
1081 set_am_attributes(new_node, &am);
1083 attr = get_ia32_x87_attr(new_node);
1084 attr->attr.data.ins_permuted = am.ins_permuted;
1086 SET_IA32_ORIG_NODE(new_node, node);
1088 new_node = fix_mem_proj(new_node, &am);
1094 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1096 * @param op1 The first operand
1097 * @param op2 The second operand
1098 * @param func The node constructor function
1099 * @return The constructed ia32 node.
1101 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1102 construct_shift_func *func,
1103 match_flags_t flags)
1106 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1108 assert(! mode_is_float(get_irn_mode(node)));
1109 assert(flags & match_immediate);
1110 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1112 if (flags & match_mode_neutral) {
1113 op1 = ia32_skip_downconv(op1);
1114 new_op1 = be_transform_node(op1);
1115 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1116 new_op1 = create_upconv(op1, node);
1118 new_op1 = be_transform_node(op1);
1121 /* the shift amount can be any mode that is bigger than 5 bits, since all
1122 * other bits are ignored anyway */
1123 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1124 ir_node *const op = get_Conv_op(op2);
1125 if (mode_is_float(get_irn_mode(op)))
1128 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1130 new_op2 = create_immediate_or_transform(op2, 0);
1132 dbgi = get_irn_dbg_info(node);
1133 block = get_nodes_block(node);
1134 new_block = be_transform_node(block);
1135 new_node = func(dbgi, new_block, new_op1, new_op2);
1136 SET_IA32_ORIG_NODE(new_node, node);
1138 /* lowered shift instruction may have a dependency operand, handle it here */
1139 if (get_irn_arity(node) == 3) {
1140 /* we have a dependency */
1141 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1142 add_irn_dep(new_node, new_dep);
1150 * Construct a standard unary operation, set AM and immediate if required.
1152 * @param op The operand
1153 * @param func The node constructor function
1154 * @return The constructed ia32 node.
1156 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1157 match_flags_t flags)
1160 ir_node *block, *new_block, *new_op, *new_node;
1162 assert(flags == 0 || flags == match_mode_neutral);
1163 if (flags & match_mode_neutral) {
1164 op = ia32_skip_downconv(op);
1167 new_op = be_transform_node(op);
1168 dbgi = get_irn_dbg_info(node);
1169 block = get_nodes_block(node);
1170 new_block = be_transform_node(block);
1171 new_node = func(dbgi, new_block, new_op);
1173 SET_IA32_ORIG_NODE(new_node, node);
1178 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1179 ia32_address_t *addr)
1181 ir_node *base, *index, *res;
1187 base = be_transform_node(base);
1190 index = addr->index;
1191 if (index == NULL) {
1194 index = be_transform_node(index);
1197 res = new_bd_ia32_Lea(dbgi, block, base, index);
1198 set_address(res, addr);
1204 * Returns non-zero if a given address mode has a symbolic or
1205 * numerical offset != 0.
1207 static int am_has_immediates(const ia32_address_t *addr)
1209 return addr->offset != 0 || addr->symconst_ent != NULL
1210 || addr->frame_entity || addr->use_frame;
1214 * Creates an ia32 Add.
1216 * @return the created ia32 Add node
1218 static ir_node *gen_Add(ir_node *node)
1220 ir_mode *mode = get_irn_mode(node);
1221 ir_node *op1 = get_Add_left(node);
1222 ir_node *op2 = get_Add_right(node);
1224 ir_node *block, *new_block, *new_node, *add_immediate_op;
1225 ia32_address_t addr;
1226 ia32_address_mode_t am;
1228 if (mode_is_float(mode)) {
1229 if (ia32_cg_config.use_sse2)
1230 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1231 match_commutative | match_am);
1233 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1236 ia32_mark_non_am(node);
1238 op2 = ia32_skip_downconv(op2);
1239 op1 = ia32_skip_downconv(op1);
1243 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1244 * 1. Add with immediate -> Lea
1245 * 2. Add with possible source address mode -> Add
1246 * 3. Otherwise -> Lea
1248 memset(&addr, 0, sizeof(addr));
1249 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1250 add_immediate_op = NULL;
1252 dbgi = get_irn_dbg_info(node);
1253 block = get_nodes_block(node);
1254 new_block = be_transform_node(block);
1257 if (addr.base == NULL && addr.index == NULL) {
1258 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1259 addr.symconst_sign, 0, addr.offset);
1260 be_dep_on_frame(new_node);
1261 SET_IA32_ORIG_NODE(new_node, node);
1264 /* add with immediate? */
1265 if (addr.index == NULL) {
1266 add_immediate_op = addr.base;
1267 } else if (addr.base == NULL && addr.scale == 0) {
1268 add_immediate_op = addr.index;
1271 if (add_immediate_op != NULL) {
1272 if (!am_has_immediates(&addr)) {
1273 #ifdef DEBUG_libfirm
1274 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1277 return be_transform_node(add_immediate_op);
1280 new_node = create_lea_from_address(dbgi, new_block, &addr);
1281 SET_IA32_ORIG_NODE(new_node, node);
1285 /* test if we can use source address mode */
1286 match_arguments(&am, block, op1, op2, NULL, match_commutative
1287 | match_mode_neutral | match_am | match_immediate | match_try_am);
1289 /* construct an Add with source address mode */
1290 if (am.op_type == ia32_AddrModeS) {
1291 ia32_address_t *am_addr = &am.addr;
1292 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1293 am_addr->index, am_addr->mem, am.new_op1,
1295 set_am_attributes(new_node, &am);
1296 SET_IA32_ORIG_NODE(new_node, node);
1298 new_node = fix_mem_proj(new_node, &am);
1303 /* otherwise construct a lea */
1304 new_node = create_lea_from_address(dbgi, new_block, &addr);
1305 SET_IA32_ORIG_NODE(new_node, node);
1310 * Creates an ia32 Mul.
1312 * @return the created ia32 Mul node
1314 static ir_node *gen_Mul(ir_node *node)
1316 ir_node *op1 = get_Mul_left(node);
1317 ir_node *op2 = get_Mul_right(node);
1318 ir_mode *mode = get_irn_mode(node);
1320 if (mode_is_float(mode)) {
1321 if (ia32_cg_config.use_sse2)
1322 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1323 match_commutative | match_am);
1325 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1327 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1328 match_commutative | match_am | match_mode_neutral |
1329 match_immediate | match_am_and_immediates);
1333 * Creates an ia32 Mulh.
1334 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1335 * this result while Mul returns the lower 32 bit.
1337 * @return the created ia32 Mulh node
1339 static ir_node *gen_Mulh(ir_node *node)
1341 dbg_info *dbgi = get_irn_dbg_info(node);
1342 ir_node *op1 = get_Mulh_left(node);
1343 ir_node *op2 = get_Mulh_right(node);
1344 ir_mode *mode = get_irn_mode(node);
1346 ir_node *proj_res_high;
1348 if (get_mode_size_bits(mode) != 32) {
1349 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1352 if (mode_is_signed(mode)) {
1353 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1354 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1356 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1357 proj_res_high = new_rd_Proj(dbgi, new_node, mode_Iu, pn_ia32_Mul_res_high);
1359 return proj_res_high;
1363 * Creates an ia32 And.
1365 * @return The created ia32 And node
1367 static ir_node *gen_And(ir_node *node)
1369 ir_node *op1 = get_And_left(node);
1370 ir_node *op2 = get_And_right(node);
1371 assert(! mode_is_float(get_irn_mode(node)));
1373 /* is it a zero extension? */
1374 if (is_Const(op2)) {
1375 tarval *tv = get_Const_tarval(op2);
1376 long v = get_tarval_long(tv);
1378 if (v == 0xFF || v == 0xFFFF) {
1379 dbg_info *dbgi = get_irn_dbg_info(node);
1380 ir_node *block = get_nodes_block(node);
1387 assert(v == 0xFFFF);
1390 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1395 return gen_binop(node, op1, op2, new_bd_ia32_And,
1396 match_commutative | match_mode_neutral | match_am | match_immediate);
1402 * Creates an ia32 Or.
1404 * @return The created ia32 Or node
1406 static ir_node *gen_Or(ir_node *node)
1408 ir_node *op1 = get_Or_left(node);
1409 ir_node *op2 = get_Or_right(node);
1411 assert (! mode_is_float(get_irn_mode(node)));
1412 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1413 | match_mode_neutral | match_am | match_immediate);
1419 * Creates an ia32 Eor.
1421 * @return The created ia32 Eor node
1423 static ir_node *gen_Eor(ir_node *node)
1425 ir_node *op1 = get_Eor_left(node);
1426 ir_node *op2 = get_Eor_right(node);
1428 assert(! mode_is_float(get_irn_mode(node)));
1429 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1430 | match_mode_neutral | match_am | match_immediate);
1435 * Creates an ia32 Sub.
1437 * @return The created ia32 Sub node
1439 static ir_node *gen_Sub(ir_node *node)
1441 ir_node *op1 = get_Sub_left(node);
1442 ir_node *op2 = get_Sub_right(node);
1443 ir_mode *mode = get_irn_mode(node);
1445 if (mode_is_float(mode)) {
1446 if (ia32_cg_config.use_sse2)
1447 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1449 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1452 if (is_Const(op2)) {
1453 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1457 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1458 | match_am | match_immediate);
1461 static ir_node *transform_AM_mem(ir_node *const block,
1462 ir_node *const src_val,
1463 ir_node *const src_mem,
1464 ir_node *const am_mem)
1466 if (is_NoMem(am_mem)) {
1467 return be_transform_node(src_mem);
1468 } else if (is_Proj(src_val) &&
1470 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1471 /* avoid memory loop */
1473 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1474 ir_node *const ptr_pred = get_Proj_pred(src_val);
1475 int const arity = get_Sync_n_preds(src_mem);
1480 NEW_ARR_A(ir_node*, ins, arity + 1);
1482 /* NOTE: This sometimes produces dead-code because the old sync in
1483 * src_mem might not be used anymore, we should detect this case
1484 * and kill the sync... */
1485 for (i = arity - 1; i >= 0; --i) {
1486 ir_node *const pred = get_Sync_pred(src_mem, i);
1488 /* avoid memory loop */
1489 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1492 ins[n++] = be_transform_node(pred);
1497 return new_r_Sync(block, n, ins);
1501 ins[0] = be_transform_node(src_mem);
1503 return new_r_Sync(block, 2, ins);
1508 * Create a 32bit to 64bit signed extension.
1510 * @param dbgi debug info
1511 * @param block the block where node nodes should be placed
1512 * @param val the value to extend
1513 * @param orig the original node
1515 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1516 ir_node *val, const ir_node *orig)
1521 if (ia32_cg_config.use_short_sex_eax) {
1522 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1523 be_dep_on_frame(pval);
1524 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1526 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1527 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1529 SET_IA32_ORIG_NODE(res, orig);
1534 * Generates an ia32 DivMod with additional infrastructure for the
1535 * register allocator if needed.
1537 static ir_node *create_Div(ir_node *node)
1539 dbg_info *dbgi = get_irn_dbg_info(node);
1540 ir_node *block = get_nodes_block(node);
1541 ir_node *new_block = be_transform_node(block);
1548 ir_node *sign_extension;
1549 ia32_address_mode_t am;
1550 ia32_address_t *addr = &am.addr;
1552 /* the upper bits have random contents for smaller modes */
1553 switch (get_irn_opcode(node)) {
1555 op1 = get_Div_left(node);
1556 op2 = get_Div_right(node);
1557 mem = get_Div_mem(node);
1558 mode = get_Div_resmode(node);
1561 op1 = get_Mod_left(node);
1562 op2 = get_Mod_right(node);
1563 mem = get_Mod_mem(node);
1564 mode = get_Mod_resmode(node);
1567 op1 = get_DivMod_left(node);
1568 op2 = get_DivMod_right(node);
1569 mem = get_DivMod_mem(node);
1570 mode = get_DivMod_resmode(node);
1573 panic("invalid divmod node %+F", node);
1576 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1578 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1579 is the memory of the consumed address. We can have only the second op as address
1580 in Div nodes, so check only op2. */
1581 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1583 if (mode_is_signed(mode)) {
1584 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1585 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1586 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1588 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1589 be_dep_on_frame(sign_extension);
1591 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1592 addr->index, new_mem, am.new_op2,
1593 am.new_op1, sign_extension);
1596 set_irn_pinned(new_node, get_irn_pinned(node));
1598 set_am_attributes(new_node, &am);
1599 SET_IA32_ORIG_NODE(new_node, node);
1601 new_node = fix_mem_proj(new_node, &am);
1607 * Generates an ia32 Mod.
1609 static ir_node *gen_Mod(ir_node *node)
1611 return create_Div(node);
1615 * Generates an ia32 Div.
1617 static ir_node *gen_Div(ir_node *node)
1619 return create_Div(node);
1623 * Generates an ia32 DivMod.
1625 static ir_node *gen_DivMod(ir_node *node)
1627 return create_Div(node);
1633 * Creates an ia32 floating Div.
1635 * @return The created ia32 xDiv node
1637 static ir_node *gen_Quot(ir_node *node)
1639 ir_node *op1 = get_Quot_left(node);
1640 ir_node *op2 = get_Quot_right(node);
1642 if (ia32_cg_config.use_sse2) {
1643 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1645 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1651 * Creates an ia32 Shl.
1653 * @return The created ia32 Shl node
1655 static ir_node *gen_Shl(ir_node *node)
1657 ir_node *left = get_Shl_left(node);
1658 ir_node *right = get_Shl_right(node);
1660 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1661 match_mode_neutral | match_immediate);
1665 * Creates an ia32 Shr.
1667 * @return The created ia32 Shr node
1669 static ir_node *gen_Shr(ir_node *node)
1671 ir_node *left = get_Shr_left(node);
1672 ir_node *right = get_Shr_right(node);
1674 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1680 * Creates an ia32 Sar.
1682 * @return The created ia32 Shrs node
1684 static ir_node *gen_Shrs(ir_node *node)
1686 ir_node *left = get_Shrs_left(node);
1687 ir_node *right = get_Shrs_right(node);
1689 if (is_Const(right)) {
1690 tarval *tv = get_Const_tarval(right);
1691 long val = get_tarval_long(tv);
1693 /* this is a sign extension */
1694 dbg_info *dbgi = get_irn_dbg_info(node);
1695 ir_node *block = be_transform_node(get_nodes_block(node));
1696 ir_node *new_op = be_transform_node(left);
1698 return create_sex_32_64(dbgi, block, new_op, node);
1702 /* 8 or 16 bit sign extension? */
1703 if (is_Const(right) && is_Shl(left)) {
1704 ir_node *shl_left = get_Shl_left(left);
1705 ir_node *shl_right = get_Shl_right(left);
1706 if (is_Const(shl_right)) {
1707 tarval *tv1 = get_Const_tarval(right);
1708 tarval *tv2 = get_Const_tarval(shl_right);
1709 if (tv1 == tv2 && tarval_is_long(tv1)) {
1710 long val = get_tarval_long(tv1);
1711 if (val == 16 || val == 24) {
1712 dbg_info *dbgi = get_irn_dbg_info(node);
1713 ir_node *block = get_nodes_block(node);
1723 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1732 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1738 * Creates an ia32 Rol.
1740 * @param op1 The first operator
1741 * @param op2 The second operator
1742 * @return The created ia32 RotL node
1744 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1746 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1752 * Creates an ia32 Ror.
1753 * NOTE: There is no RotR with immediate because this would always be a RotL
1754 * "imm-mode_size_bits" which can be pre-calculated.
1756 * @param op1 The first operator
1757 * @param op2 The second operator
1758 * @return The created ia32 RotR node
1760 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1762 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1768 * Creates an ia32 RotR or RotL (depending on the found pattern).
1770 * @return The created ia32 RotL or RotR node
1772 static ir_node *gen_Rotl(ir_node *node)
1774 ir_node *rotate = NULL;
1775 ir_node *op1 = get_Rotl_left(node);
1776 ir_node *op2 = get_Rotl_right(node);
1778 /* Firm has only RotL, so we are looking for a right (op2)
1779 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1780 that means we can create a RotR instead of an Add and a RotL */
1784 ir_node *left = get_Add_left(add);
1785 ir_node *right = get_Add_right(add);
1786 if (is_Const(right)) {
1787 tarval *tv = get_Const_tarval(right);
1788 ir_mode *mode = get_irn_mode(node);
1789 long bits = get_mode_size_bits(mode);
1791 if (is_Minus(left) &&
1792 tarval_is_long(tv) &&
1793 get_tarval_long(tv) == bits &&
1796 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1797 rotate = gen_Ror(node, op1, get_Minus_op(left));
1802 if (rotate == NULL) {
1803 rotate = gen_Rol(node, op1, op2);
1812 * Transforms a Minus node.
1814 * @return The created ia32 Minus node
1816 static ir_node *gen_Minus(ir_node *node)
1818 ir_node *op = get_Minus_op(node);
1819 ir_node *block = be_transform_node(get_nodes_block(node));
1820 dbg_info *dbgi = get_irn_dbg_info(node);
1821 ir_mode *mode = get_irn_mode(node);
1826 if (mode_is_float(mode)) {
1827 ir_node *new_op = be_transform_node(op);
1828 if (ia32_cg_config.use_sse2) {
1829 /* TODO: non-optimal... if we have many xXors, then we should
1830 * rather create a load for the const and use that instead of
1831 * several AM nodes... */
1832 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1834 new_node = new_bd_ia32_xXor(dbgi, block, get_symconst_base(),
1835 noreg_GP, nomem, new_op, noreg_xmm);
1837 size = get_mode_size_bits(mode);
1838 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1840 set_ia32_am_sc(new_node, ent);
1841 set_ia32_op_type(new_node, ia32_AddrModeS);
1842 set_ia32_ls_mode(new_node, mode);
1844 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1847 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1850 SET_IA32_ORIG_NODE(new_node, node);
1856 * Transforms a Not node.
1858 * @return The created ia32 Not node
1860 static ir_node *gen_Not(ir_node *node)
1862 ir_node *op = get_Not_op(node);
1864 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1865 assert (! mode_is_float(get_irn_mode(node)));
1867 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1873 * Transforms an Abs node.
1875 * @return The created ia32 Abs node
1877 static ir_node *gen_Abs(ir_node *node)
1879 ir_node *block = get_nodes_block(node);
1880 ir_node *new_block = be_transform_node(block);
1881 ir_node *op = get_Abs_op(node);
1882 dbg_info *dbgi = get_irn_dbg_info(node);
1883 ir_mode *mode = get_irn_mode(node);
1889 if (mode_is_float(mode)) {
1890 new_op = be_transform_node(op);
1892 if (ia32_cg_config.use_sse2) {
1893 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1894 new_node = new_bd_ia32_xAnd(dbgi, new_block, get_symconst_base(),
1895 noreg_GP, nomem, new_op, noreg_fp);
1897 size = get_mode_size_bits(mode);
1898 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1900 set_ia32_am_sc(new_node, ent);
1902 SET_IA32_ORIG_NODE(new_node, node);
1904 set_ia32_op_type(new_node, ia32_AddrModeS);
1905 set_ia32_ls_mode(new_node, mode);
1907 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1908 SET_IA32_ORIG_NODE(new_node, node);
1911 ir_node *xor, *sign_extension;
1913 if (get_mode_size_bits(mode) == 32) {
1914 new_op = be_transform_node(op);
1916 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1919 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1921 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1922 nomem, new_op, sign_extension);
1923 SET_IA32_ORIG_NODE(xor, node);
1925 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1926 nomem, xor, sign_extension);
1927 SET_IA32_ORIG_NODE(new_node, node);
1934 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1936 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1938 dbg_info *dbgi = get_irn_dbg_info(cmp);
1939 ir_node *block = get_nodes_block(cmp);
1940 ir_node *new_block = be_transform_node(block);
1941 ir_node *op1 = be_transform_node(x);
1942 ir_node *op2 = be_transform_node(n);
1944 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1948 * Transform a node returning a "flag" result.
1950 * @param node the node to transform
1951 * @param pnc_out the compare mode to use
1953 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1960 /* we have a Cmp as input */
1961 if (is_Proj(node)) {
1962 ir_node *pred = get_Proj_pred(node);
1964 pn_Cmp pnc = get_Proj_proj(node);
1965 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1966 ir_node *l = get_Cmp_left(pred);
1967 ir_node *r = get_Cmp_right(pred);
1969 ir_node *la = get_And_left(l);
1970 ir_node *ra = get_And_right(l);
1972 ir_node *c = get_Shl_left(la);
1973 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1974 /* (1 << n) & ra) */
1975 ir_node *n = get_Shl_right(la);
1976 flags = gen_bt(pred, ra, n);
1977 /* we must generate a Jc/Jnc jump */
1978 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1981 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1986 ir_node *c = get_Shl_left(ra);
1987 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1988 /* la & (1 << n)) */
1989 ir_node *n = get_Shl_right(ra);
1990 flags = gen_bt(pred, la, n);
1991 /* we must generate a Jc/Jnc jump */
1992 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1995 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
2001 /* add ia32 compare flags */
2003 ir_node *l = get_Cmp_left(pred);
2004 ir_mode *mode = get_irn_mode(l);
2005 if (mode_is_float(mode))
2006 pnc |= ia32_pn_Cmp_float;
2007 else if (! mode_is_signed(mode))
2008 pnc |= ia32_pn_Cmp_unsigned;
2011 flags = be_transform_node(pred);
2016 /* a mode_b value, we have to compare it against 0 */
2017 dbgi = get_irn_dbg_info(node);
2018 new_block = be_transform_node(get_nodes_block(node));
2019 new_op = be_transform_node(node);
2020 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
2021 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
2022 *pnc_out = pn_Cmp_Lg;
2027 * Transforms a Load.
2029 * @return the created ia32 Load node
2031 static ir_node *gen_Load(ir_node *node)
2033 ir_node *old_block = get_nodes_block(node);
2034 ir_node *block = be_transform_node(old_block);
2035 ir_node *ptr = get_Load_ptr(node);
2036 ir_node *mem = get_Load_mem(node);
2037 ir_node *new_mem = be_transform_node(mem);
2040 dbg_info *dbgi = get_irn_dbg_info(node);
2041 ir_mode *mode = get_Load_mode(node);
2043 ia32_address_t addr;
2045 /* construct load address */
2046 memset(&addr, 0, sizeof(addr));
2047 ia32_create_address_mode(&addr, ptr, 0);
2054 base = be_transform_node(base);
2057 if (index == NULL) {
2060 index = be_transform_node(index);
2063 if (mode_is_float(mode)) {
2064 if (ia32_cg_config.use_sse2) {
2065 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2068 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2072 assert(mode != mode_b);
2074 /* create a conv node with address mode for smaller modes */
2075 if (get_mode_size_bits(mode) < 32) {
2076 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2077 new_mem, noreg_GP, mode);
2079 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2083 set_irn_pinned(new_node, get_irn_pinned(node));
2084 set_ia32_op_type(new_node, ia32_AddrModeS);
2085 set_ia32_ls_mode(new_node, mode);
2086 set_address(new_node, &addr);
2088 if (get_irn_pinned(node) == op_pin_state_floats) {
2089 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2090 && pn_ia32_vfld_res == pn_ia32_Load_res
2091 && pn_ia32_Load_res == pn_ia32_res);
2092 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2095 SET_IA32_ORIG_NODE(new_node, node);
2097 be_dep_on_frame(new_node);
2101 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2102 ir_node *ptr, ir_node *other)
2109 /* we only use address mode if we're the only user of the load */
2110 if (get_irn_n_edges(node) > 1)
2113 load = get_Proj_pred(node);
2116 if (get_nodes_block(load) != block)
2119 /* store should have the same pointer as the load */
2120 if (get_Load_ptr(load) != ptr)
2123 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2124 if (other != NULL &&
2125 get_nodes_block(other) == block &&
2126 heights_reachable_in_block(heights, other, load)) {
2130 if (prevents_AM(block, load, mem))
2132 /* Store should be attached to the load via mem */
2133 assert(heights_reachable_in_block(heights, mem, load));
2138 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2139 ir_node *mem, ir_node *ptr, ir_mode *mode,
2140 construct_binop_dest_func *func,
2141 construct_binop_dest_func *func8bit,
2142 match_flags_t flags)
2144 ir_node *src_block = get_nodes_block(node);
2152 ia32_address_mode_t am;
2153 ia32_address_t *addr = &am.addr;
2154 memset(&am, 0, sizeof(am));
2156 assert(flags & match_immediate); /* there is no destam node without... */
2157 commutative = (flags & match_commutative) != 0;
2159 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2160 build_address(&am, op1, ia32_create_am_double_use);
2161 new_op = create_immediate_or_transform(op2, 0);
2162 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2163 build_address(&am, op2, ia32_create_am_double_use);
2164 new_op = create_immediate_or_transform(op1, 0);
2169 if (addr->base == NULL)
2170 addr->base = noreg_GP;
2171 if (addr->index == NULL)
2172 addr->index = noreg_GP;
2173 if (addr->mem == NULL)
2176 dbgi = get_irn_dbg_info(node);
2177 block = be_transform_node(src_block);
2178 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2180 if (get_mode_size_bits(mode) == 8) {
2181 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2183 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2185 set_address(new_node, addr);
2186 set_ia32_op_type(new_node, ia32_AddrModeD);
2187 set_ia32_ls_mode(new_node, mode);
2188 SET_IA32_ORIG_NODE(new_node, node);
2190 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2191 mem_proj = be_transform_node(am.mem_proj);
2192 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2197 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2198 ir_node *ptr, ir_mode *mode,
2199 construct_unop_dest_func *func)
2201 ir_node *src_block = get_nodes_block(node);
2207 ia32_address_mode_t am;
2208 ia32_address_t *addr = &am.addr;
2210 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2213 memset(&am, 0, sizeof(am));
2214 build_address(&am, op, ia32_create_am_double_use);
2216 dbgi = get_irn_dbg_info(node);
2217 block = be_transform_node(src_block);
2218 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2219 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2220 set_address(new_node, addr);
2221 set_ia32_op_type(new_node, ia32_AddrModeD);
2222 set_ia32_ls_mode(new_node, mode);
2223 SET_IA32_ORIG_NODE(new_node, node);
2225 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2226 mem_proj = be_transform_node(am.mem_proj);
2227 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2232 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2234 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2235 return get_negated_pnc(pnc, mode);
2238 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2240 ir_mode *mode = get_irn_mode(node);
2241 ir_node *mux_true = get_Mux_true(node);
2242 ir_node *mux_false = get_Mux_false(node);
2251 ia32_address_t addr;
2253 if (get_mode_size_bits(mode) != 8)
2256 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2258 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2264 cond = get_Mux_sel(node);
2265 flags = get_flags_node(cond, &pnc);
2266 /* we can't handle the float special cases with SetM */
2267 if (pnc & ia32_pn_Cmp_float)
2270 pnc = ia32_get_negated_pnc(pnc);
2272 build_address_ptr(&addr, ptr, mem);
2274 dbgi = get_irn_dbg_info(node);
2275 block = get_nodes_block(node);
2276 new_block = be_transform_node(block);
2277 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2278 addr.index, addr.mem, flags, pnc);
2279 set_address(new_node, &addr);
2280 set_ia32_op_type(new_node, ia32_AddrModeD);
2281 set_ia32_ls_mode(new_node, mode);
2282 SET_IA32_ORIG_NODE(new_node, node);
2287 static ir_node *try_create_dest_am(ir_node *node)
2289 ir_node *val = get_Store_value(node);
2290 ir_node *mem = get_Store_mem(node);
2291 ir_node *ptr = get_Store_ptr(node);
2292 ir_mode *mode = get_irn_mode(val);
2293 unsigned bits = get_mode_size_bits(mode);
2298 /* handle only GP modes for now... */
2299 if (!ia32_mode_needs_gp_reg(mode))
2303 /* store must be the only user of the val node */
2304 if (get_irn_n_edges(val) > 1)
2306 /* skip pointless convs */
2308 ir_node *conv_op = get_Conv_op(val);
2309 ir_mode *pred_mode = get_irn_mode(conv_op);
2310 if (!ia32_mode_needs_gp_reg(pred_mode))
2312 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2320 /* value must be in the same block */
2321 if (get_nodes_block(node) != get_nodes_block(val))
2324 switch (get_irn_opcode(val)) {
2326 op1 = get_Add_left(val);
2327 op2 = get_Add_right(val);
2328 if (ia32_cg_config.use_incdec) {
2329 if (is_Const_1(op2)) {
2330 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2332 } else if (is_Const_Minus_1(op2)) {
2333 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2337 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2338 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2339 match_commutative | match_immediate);
2342 op1 = get_Sub_left(val);
2343 op2 = get_Sub_right(val);
2344 if (is_Const(op2)) {
2345 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2347 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2348 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2352 op1 = get_And_left(val);
2353 op2 = get_And_right(val);
2354 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2355 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2356 match_commutative | match_immediate);
2359 op1 = get_Or_left(val);
2360 op2 = get_Or_right(val);
2361 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2362 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2363 match_commutative | match_immediate);
2366 op1 = get_Eor_left(val);
2367 op2 = get_Eor_right(val);
2368 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2369 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2370 match_commutative | match_immediate);
2373 op1 = get_Shl_left(val);
2374 op2 = get_Shl_right(val);
2375 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2376 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2380 op1 = get_Shr_left(val);
2381 op2 = get_Shr_right(val);
2382 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2383 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2387 op1 = get_Shrs_left(val);
2388 op2 = get_Shrs_right(val);
2389 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2390 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2394 op1 = get_Rotl_left(val);
2395 op2 = get_Rotl_right(val);
2396 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2397 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2400 /* TODO: match ROR patterns... */
2402 new_node = try_create_SetMem(val, ptr, mem);
2406 op1 = get_Minus_op(val);
2407 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2410 /* should be lowered already */
2411 assert(mode != mode_b);
2412 op1 = get_Not_op(val);
2413 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2419 if (new_node != NULL) {
2420 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2421 get_irn_pinned(node) == op_pin_state_pinned) {
2422 set_irn_pinned(new_node, op_pin_state_pinned);
2429 static bool possible_int_mode_for_fp(ir_mode *mode)
2433 if (!mode_is_signed(mode))
2435 size = get_mode_size_bits(mode);
2436 if (size != 16 && size != 32)
2441 static int is_float_to_int_conv(const ir_node *node)
2443 ir_mode *mode = get_irn_mode(node);
2447 if (!possible_int_mode_for_fp(mode))
2452 conv_op = get_Conv_op(node);
2453 conv_mode = get_irn_mode(conv_op);
2455 if (!mode_is_float(conv_mode))
2462 * Transform a Store(floatConst) into a sequence of
2465 * @return the created ia32 Store node
2467 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2469 ir_mode *mode = get_irn_mode(cns);
2470 unsigned size = get_mode_size_bytes(mode);
2471 tarval *tv = get_Const_tarval(cns);
2472 ir_node *block = get_nodes_block(node);
2473 ir_node *new_block = be_transform_node(block);
2474 ir_node *ptr = get_Store_ptr(node);
2475 ir_node *mem = get_Store_mem(node);
2476 dbg_info *dbgi = get_irn_dbg_info(node);
2480 ia32_address_t addr;
2482 assert(size % 4 == 0);
2485 build_address_ptr(&addr, ptr, mem);
2489 get_tarval_sub_bits(tv, ofs) |
2490 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2491 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2492 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2493 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2495 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2496 addr.index, addr.mem, imm);
2498 set_irn_pinned(new_node, get_irn_pinned(node));
2499 set_ia32_op_type(new_node, ia32_AddrModeD);
2500 set_ia32_ls_mode(new_node, mode_Iu);
2501 set_address(new_node, &addr);
2502 SET_IA32_ORIG_NODE(new_node, node);
2505 ins[i++] = new_node;
2510 } while (size != 0);
2513 return new_rd_Sync(dbgi, new_block, i, ins);
2520 * Generate a vfist or vfisttp instruction.
2522 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2523 ir_node *mem, ir_node *val, ir_node **fist)
2527 if (ia32_cg_config.use_fisttp) {
2528 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2529 if other users exists */
2530 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2531 ir_node *value = new_r_Proj(vfisttp, mode_E, pn_ia32_vfisttp_res);
2532 be_new_Keep(block, 1, &value);
2534 new_node = new_r_Proj(vfisttp, mode_M, pn_ia32_vfisttp_M);
2537 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2540 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2546 * Transforms a general (no special case) Store.
2548 * @return the created ia32 Store node
2550 static ir_node *gen_general_Store(ir_node *node)
2552 ir_node *val = get_Store_value(node);
2553 ir_mode *mode = get_irn_mode(val);
2554 ir_node *block = get_nodes_block(node);
2555 ir_node *new_block = be_transform_node(block);
2556 ir_node *ptr = get_Store_ptr(node);
2557 ir_node *mem = get_Store_mem(node);
2558 dbg_info *dbgi = get_irn_dbg_info(node);
2559 ir_node *new_val, *new_node, *store;
2560 ia32_address_t addr;
2562 /* check for destination address mode */
2563 new_node = try_create_dest_am(node);
2564 if (new_node != NULL)
2567 /* construct store address */
2568 memset(&addr, 0, sizeof(addr));
2569 ia32_create_address_mode(&addr, ptr, 0);
2571 if (addr.base == NULL) {
2572 addr.base = noreg_GP;
2574 addr.base = be_transform_node(addr.base);
2577 if (addr.index == NULL) {
2578 addr.index = noreg_GP;
2580 addr.index = be_transform_node(addr.index);
2582 addr.mem = be_transform_node(mem);
2584 if (mode_is_float(mode)) {
2585 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2587 while (is_Conv(val) && mode == get_irn_mode(val)) {
2588 ir_node *op = get_Conv_op(val);
2589 if (!mode_is_float(get_irn_mode(op)))
2593 new_val = be_transform_node(val);
2594 if (ia32_cg_config.use_sse2) {
2595 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2596 addr.index, addr.mem, new_val);
2598 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2599 addr.index, addr.mem, new_val, mode);
2602 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2603 val = get_Conv_op(val);
2605 /* TODO: is this optimisation still necessary at all (middleend)? */
2606 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2607 while (is_Conv(val)) {
2608 ir_node *op = get_Conv_op(val);
2609 if (!mode_is_float(get_irn_mode(op)))
2611 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2615 new_val = be_transform_node(val);
2616 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2618 new_val = create_immediate_or_transform(val, 0);
2619 assert(mode != mode_b);
2621 if (get_mode_size_bits(mode) == 8) {
2622 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2623 addr.index, addr.mem, new_val);
2625 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2626 addr.index, addr.mem, new_val);
2631 set_irn_pinned(store, get_irn_pinned(node));
2632 set_ia32_op_type(store, ia32_AddrModeD);
2633 set_ia32_ls_mode(store, mode);
2635 set_address(store, &addr);
2636 SET_IA32_ORIG_NODE(store, node);
2642 * Transforms a Store.
2644 * @return the created ia32 Store node
2646 static ir_node *gen_Store(ir_node *node)
2648 ir_node *val = get_Store_value(node);
2649 ir_mode *mode = get_irn_mode(val);
2651 if (mode_is_float(mode) && is_Const(val)) {
2652 /* We can transform every floating const store
2653 into a sequence of integer stores.
2654 If the constant is already in a register,
2655 it would be better to use it, but we don't
2656 have this information here. */
2657 return gen_float_const_Store(node, val);
2659 return gen_general_Store(node);
2663 * Transforms a Switch.
2665 * @return the created ia32 SwitchJmp node
2667 static ir_node *create_Switch(ir_node *node)
2669 dbg_info *dbgi = get_irn_dbg_info(node);
2670 ir_node *block = be_transform_node(get_nodes_block(node));
2671 ir_node *sel = get_Cond_selector(node);
2672 ir_node *new_sel = be_transform_node(sel);
2673 long switch_min = LONG_MAX;
2674 long switch_max = LONG_MIN;
2675 long default_pn = get_Cond_default_proj(node);
2677 const ir_edge_t *edge;
2679 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2681 /* determine the smallest switch case value */
2682 foreach_out_edge(node, edge) {
2683 ir_node *proj = get_edge_src_irn(edge);
2684 long pn = get_Proj_proj(proj);
2685 if (pn == default_pn)
2688 if (pn < switch_min)
2690 if (pn > switch_max)
2694 if ((unsigned long) (switch_max - switch_min) > 128000) {
2695 panic("Size of switch %+F bigger than 128000", node);
2698 if (switch_min != 0) {
2699 /* if smallest switch case is not 0 we need an additional sub */
2700 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2701 add_ia32_am_offs_int(new_sel, -switch_min);
2702 set_ia32_op_type(new_sel, ia32_AddrModeS);
2704 SET_IA32_ORIG_NODE(new_sel, node);
2707 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2708 SET_IA32_ORIG_NODE(new_node, node);
2714 * Transform a Cond node.
2716 static ir_node *gen_Cond(ir_node *node)
2718 ir_node *block = get_nodes_block(node);
2719 ir_node *new_block = be_transform_node(block);
2720 dbg_info *dbgi = get_irn_dbg_info(node);
2721 ir_node *sel = get_Cond_selector(node);
2722 ir_mode *sel_mode = get_irn_mode(sel);
2723 ir_node *flags = NULL;
2727 if (sel_mode != mode_b) {
2728 return create_Switch(node);
2731 /* we get flags from a Cmp */
2732 flags = get_flags_node(sel, &pnc);
2734 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2735 SET_IA32_ORIG_NODE(new_node, node);
2741 * Transform a be_Copy.
2743 static ir_node *gen_be_Copy(ir_node *node)
2745 ir_node *new_node = be_duplicate_node(node);
2746 ir_mode *mode = get_irn_mode(new_node);
2748 if (ia32_mode_needs_gp_reg(mode)) {
2749 set_irn_mode(new_node, mode_Iu);
2755 static ir_node *create_Fucom(ir_node *node)
2757 dbg_info *dbgi = get_irn_dbg_info(node);
2758 ir_node *block = get_nodes_block(node);
2759 ir_node *new_block = be_transform_node(block);
2760 ir_node *left = get_Cmp_left(node);
2761 ir_node *new_left = be_transform_node(left);
2762 ir_node *right = get_Cmp_right(node);
2766 if (ia32_cg_config.use_fucomi) {
2767 new_right = be_transform_node(right);
2768 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2770 set_ia32_commutative(new_node);
2771 SET_IA32_ORIG_NODE(new_node, node);
2773 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2774 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2776 new_right = be_transform_node(right);
2777 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2780 set_ia32_commutative(new_node);
2782 SET_IA32_ORIG_NODE(new_node, node);
2784 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2785 SET_IA32_ORIG_NODE(new_node, node);
2791 static ir_node *create_Ucomi(ir_node *node)
2793 dbg_info *dbgi = get_irn_dbg_info(node);
2794 ir_node *src_block = get_nodes_block(node);
2795 ir_node *new_block = be_transform_node(src_block);
2796 ir_node *left = get_Cmp_left(node);
2797 ir_node *right = get_Cmp_right(node);
2799 ia32_address_mode_t am;
2800 ia32_address_t *addr = &am.addr;
2802 match_arguments(&am, src_block, left, right, NULL,
2803 match_commutative | match_am);
2805 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2806 addr->mem, am.new_op1, am.new_op2,
2808 set_am_attributes(new_node, &am);
2810 SET_IA32_ORIG_NODE(new_node, node);
2812 new_node = fix_mem_proj(new_node, &am);
2818 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2819 * to fold an and into a test node
2821 static bool can_fold_test_and(ir_node *node)
2823 const ir_edge_t *edge;
2825 /** we can only have eq and lg projs */
2826 foreach_out_edge(node, edge) {
2827 ir_node *proj = get_edge_src_irn(edge);
2828 pn_Cmp pnc = get_Proj_proj(proj);
2829 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2837 * returns true if it is assured, that the upper bits of a node are "clean"
2838 * which means for a 16 or 8 bit value, that the upper bits in the register
2839 * are 0 for unsigned and a copy of the last significant bit for signed
2842 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2844 assert(ia32_mode_needs_gp_reg(mode));
2845 if (get_mode_size_bits(mode) >= 32)
2848 if (is_Proj(transformed_node))
2849 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2851 switch (get_ia32_irn_opcode(transformed_node)) {
2852 case iro_ia32_Conv_I2I:
2853 case iro_ia32_Conv_I2I8Bit: {
2854 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2855 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2857 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2864 if (mode_is_signed(mode)) {
2865 return false; /* TODO handle signed modes */
2867 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2868 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2869 const ia32_immediate_attr_t *attr
2870 = get_ia32_immediate_attr_const(right);
2871 if (attr->symconst == 0 &&
2872 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2876 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2880 /* TODO too conservative if shift amount is constant */
2881 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2884 if (!mode_is_signed(mode)) {
2886 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2887 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2889 /* TODO if one is known to be zero extended, then || is sufficient */
2894 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2895 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2897 case iro_ia32_Const:
2898 case iro_ia32_Immediate: {
2899 const ia32_immediate_attr_t *attr =
2900 get_ia32_immediate_attr_const(transformed_node);
2901 if (mode_is_signed(mode)) {
2902 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2903 return shifted == 0 || shifted == -1;
2905 unsigned long shifted = (unsigned long)attr->offset;
2906 shifted >>= get_mode_size_bits(mode);
2907 return shifted == 0;
2917 * Generate code for a Cmp.
2919 static ir_node *gen_Cmp(ir_node *node)
2921 dbg_info *dbgi = get_irn_dbg_info(node);
2922 ir_node *block = get_nodes_block(node);
2923 ir_node *new_block = be_transform_node(block);
2924 ir_node *left = get_Cmp_left(node);
2925 ir_node *right = get_Cmp_right(node);
2926 ir_mode *cmp_mode = get_irn_mode(left);
2928 ia32_address_mode_t am;
2929 ia32_address_t *addr = &am.addr;
2932 if (mode_is_float(cmp_mode)) {
2933 if (ia32_cg_config.use_sse2) {
2934 return create_Ucomi(node);
2936 return create_Fucom(node);
2940 assert(ia32_mode_needs_gp_reg(cmp_mode));
2942 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2943 cmp_unsigned = !mode_is_signed(cmp_mode);
2944 if (is_Const_0(right) &&
2946 get_irn_n_edges(left) == 1 &&
2947 can_fold_test_and(node)) {
2948 /* Test(and_left, and_right) */
2949 ir_node *and_left = get_And_left(left);
2950 ir_node *and_right = get_And_right(left);
2952 /* matze: code here used mode instead of cmd_mode, I think it is always
2953 * the same as cmp_mode, but I leave this here to see if this is really
2956 assert(get_irn_mode(and_left) == cmp_mode);
2958 match_arguments(&am, block, and_left, and_right, NULL,
2960 match_am | match_8bit_am | match_16bit_am |
2961 match_am_and_immediates | match_immediate);
2963 /* use 32bit compare mode if possible since the opcode is smaller */
2964 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2965 upper_bits_clean(am.new_op2, cmp_mode)) {
2966 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2969 if (get_mode_size_bits(cmp_mode) == 8) {
2970 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2971 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2974 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2975 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2978 /* Cmp(left, right) */
2979 match_arguments(&am, block, left, right, NULL,
2980 match_commutative | match_am | match_8bit_am |
2981 match_16bit_am | match_am_and_immediates |
2983 /* use 32bit compare mode if possible since the opcode is smaller */
2984 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2985 upper_bits_clean(am.new_op2, cmp_mode)) {
2986 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2989 if (get_mode_size_bits(cmp_mode) == 8) {
2990 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2991 addr->index, addr->mem, am.new_op1,
2992 am.new_op2, am.ins_permuted,
2995 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2996 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2999 set_am_attributes(new_node, &am);
3000 set_ia32_ls_mode(new_node, cmp_mode);
3002 SET_IA32_ORIG_NODE(new_node, node);
3004 new_node = fix_mem_proj(new_node, &am);
3009 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
3012 dbg_info *dbgi = get_irn_dbg_info(node);
3013 ir_node *block = get_nodes_block(node);
3014 ir_node *new_block = be_transform_node(block);
3015 ir_node *val_true = get_Mux_true(node);
3016 ir_node *val_false = get_Mux_false(node);
3018 ia32_address_mode_t am;
3019 ia32_address_t *addr;
3021 assert(ia32_cg_config.use_cmov);
3022 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
3026 match_arguments(&am, block, val_false, val_true, flags,
3027 match_commutative | match_am | match_16bit_am | match_mode_neutral);
3029 if (am.ins_permuted)
3030 pnc = ia32_get_negated_pnc(pnc);
3032 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
3033 addr->mem, am.new_op1, am.new_op2, new_flags,
3035 set_am_attributes(new_node, &am);
3037 SET_IA32_ORIG_NODE(new_node, node);
3039 new_node = fix_mem_proj(new_node, &am);
3045 * Creates a ia32 Setcc instruction.
3047 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
3048 ir_node *flags, pn_Cmp pnc,
3051 ir_mode *mode = get_irn_mode(orig_node);
3054 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3055 SET_IA32_ORIG_NODE(new_node, orig_node);
3057 /* we might need to conv the result up */
3058 if (get_mode_size_bits(mode) > 8) {
3059 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3060 nomem, new_node, mode_Bu);
3061 SET_IA32_ORIG_NODE(new_node, orig_node);
3068 * Create instruction for an unsigned Difference or Zero.
3070 static ir_node *create_doz(ir_node *psi, ir_node *a, ir_node *b)
3072 ir_mode *mode = get_irn_mode(psi);
3082 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3083 match_mode_neutral | match_am | match_immediate | match_two_users);
3085 block = get_nodes_block(new_node);
3087 if (is_Proj(new_node)) {
3088 sub = get_Proj_pred(new_node);
3089 assert(is_ia32_Sub(sub));
3092 set_irn_mode(sub, mode_T);
3093 new_node = new_rd_Proj(NULL, sub, mode, pn_ia32_res);
3095 eflags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
3097 dbgi = get_irn_dbg_info(psi);
3098 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3099 not = new_bd_ia32_Not(dbgi, block, sbb);
3101 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, not);
3102 set_ia32_commutative(new_node);
3107 * Create an const array of two float consts.
3109 * @param c0 the first constant
3110 * @param c1 the second constant
3111 * @param new_mode IN/OUT for the mode of the constants, if NULL
3112 * smallest possible mode will be used
3114 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode)
3117 ir_mode *mode = *new_mode;
3119 ir_initializer_t *initializer;
3120 tarval *tv0 = get_Const_tarval(c0);
3121 tarval *tv1 = get_Const_tarval(c1);
3124 /* detect the best mode for the constants */
3125 mode = get_tarval_mode(tv0);
3127 if (mode != mode_F) {
3128 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3129 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3131 tv0 = tarval_convert_to(tv0, mode);
3132 tv1 = tarval_convert_to(tv1, mode);
3133 } else if (mode != mode_D) {
3134 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3135 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3137 tv0 = tarval_convert_to(tv0, mode);
3138 tv1 = tarval_convert_to(tv1, mode);
3145 tp = ia32_create_float_type(mode, 4);
3146 tp = ia32_create_float_array(tp);
3148 ent = new_entity(get_glob_type(), id_unique("C%u"), tp);
3150 set_entity_ld_ident(ent, get_entity_ident(ent));
3151 set_entity_visibility(ent, ir_visibility_private);
3152 add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
3154 initializer = create_initializer_compound(2);
3156 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3157 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3159 set_entity_initializer(ent, initializer);
3166 * Possible transformations for creating a Setcc.
3168 enum setcc_transform_insn {
3181 typedef struct setcc_transform {
3185 enum setcc_transform_insn transform;
3189 } setcc_transform_t;
3192 * Setcc can only handle 0 and 1 result.
3193 * Find a transformation that creates 0 and 1 from
3196 static void find_const_transform(pn_Cmp pnc, tarval *t, tarval *f,
3197 setcc_transform_t *res)
3203 if (tarval_is_null(t)) {
3207 pnc = ia32_get_negated_pnc(pnc);
3208 } else if (tarval_cmp(t, f) == pn_Cmp_Lt) {
3209 // now, t is the bigger one
3213 pnc = ia32_get_negated_pnc(pnc);
3217 if (! tarval_is_null(f)) {
3218 tarval *t_sub = tarval_sub(t, f, NULL);
3221 res->steps[step].transform = SETCC_TR_ADD;
3223 if (t == tarval_bad)
3224 panic("constant subtract failed");
3225 if (! tarval_is_long(f))
3226 panic("tarval is not long");
3228 res->steps[step].val = get_tarval_long(f);
3230 f = tarval_sub(f, f, NULL);
3231 assert(tarval_is_null(f));
3234 if (tarval_is_one(t)) {
3235 res->steps[step].transform = SETCC_TR_SET;
3236 res->num_steps = ++step;
3240 if (tarval_is_minus_one(t)) {
3241 res->steps[step].transform = SETCC_TR_NEG;
3243 res->steps[step].transform = SETCC_TR_SET;
3244 res->num_steps = ++step;
3247 if (tarval_is_long(t)) {
3248 long v = get_tarval_long(t);
3250 res->steps[step].val = 0;
3253 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3255 res->steps[step].transform = SETCC_TR_LEAxx;
3256 res->steps[step].scale = 3; /* (a << 3) + a */
3259 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3261 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3262 res->steps[step].scale = 3; /* (a << 3) */
3265 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3267 res->steps[step].transform = SETCC_TR_LEAxx;
3268 res->steps[step].scale = 2; /* (a << 2) + a */
3271 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3273 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3274 res->steps[step].scale = 2; /* (a << 2) */
3277 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3279 res->steps[step].transform = SETCC_TR_LEAxx;
3280 res->steps[step].scale = 1; /* (a << 1) + a */
3283 if (step > 0 && res->steps[step - 1].transform == SETCC_TR_ADD)
3285 res->steps[step].transform = res->steps[step].val == 0 ? SETCC_TR_SHL : SETCC_TR_LEA;
3286 res->steps[step].scale = 1; /* (a << 1) */
3289 res->num_steps = step;
3292 if (! tarval_is_single_bit(t)) {
3293 res->steps[step].transform = SETCC_TR_AND;
3294 res->steps[step].val = v;
3296 res->steps[step].transform = SETCC_TR_NEG;
3298 int v = get_tarval_lowest_bit(t);
3301 res->steps[step].transform = SETCC_TR_SHL;
3302 res->steps[step].scale = v;
3306 res->steps[step].transform = SETCC_TR_SET;
3307 res->num_steps = ++step;
3310 panic("tarval is not long");
3314 * Transforms a Mux node into some code sequence.
3316 * @return The transformed node.
3318 static ir_node *gen_Mux(ir_node *node)
3320 dbg_info *dbgi = get_irn_dbg_info(node);
3321 ir_node *block = get_nodes_block(node);
3322 ir_node *new_block = be_transform_node(block);
3323 ir_node *mux_true = get_Mux_true(node);
3324 ir_node *mux_false = get_Mux_false(node);
3325 ir_node *cond = get_Mux_sel(node);
3326 ir_mode *mode = get_irn_mode(node);
3331 assert(get_irn_mode(cond) == mode_b);
3333 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3334 if (mode_is_float(mode)) {
3335 ir_node *cmp = get_Proj_pred(cond);
3336 ir_node *cmp_left = get_Cmp_left(cmp);
3337 ir_node *cmp_right = get_Cmp_right(cmp);
3338 pn_Cmp pnc = get_Proj_proj(cond);
3340 if (ia32_cg_config.use_sse2) {
3341 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3342 if (cmp_left == mux_true && cmp_right == mux_false) {
3343 /* Mux(a <= b, a, b) => MIN */
3344 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3345 match_commutative | match_am | match_two_users);
3346 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3347 /* Mux(a <= b, b, a) => MAX */
3348 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3349 match_commutative | match_am | match_two_users);
3351 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3352 if (cmp_left == mux_true && cmp_right == mux_false) {
3353 /* Mux(a >= b, a, b) => MAX */
3354 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3355 match_commutative | match_am | match_two_users);
3356 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3357 /* Mux(a >= b, b, a) => MIN */
3358 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3359 match_commutative | match_am | match_two_users);
3364 if (is_Const(mux_true) && is_Const(mux_false)) {
3365 ia32_address_mode_t am;
3370 flags = get_flags_node(cond, &pnc);
3371 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3373 if (ia32_cg_config.use_sse2) {
3374 /* cannot load from different mode on SSE */
3377 /* x87 can load any mode */
3381 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3383 switch (get_mode_size_bytes(new_mode)) {
3393 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3394 set_ia32_am_scale(new_node, 2);
3399 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3400 set_ia32_am_scale(new_node, 1);
3403 /* arg, shift 16 NOT supported */
3405 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3408 panic("Unsupported constant size");
3411 am.ls_mode = new_mode;
3412 am.addr.base = get_symconst_base();
3413 am.addr.index = new_node;
3414 am.addr.mem = nomem;
3416 am.addr.scale = scale;
3417 am.addr.use_frame = 0;
3418 am.addr.frame_entity = NULL;
3419 am.addr.symconst_sign = 0;
3420 am.mem_proj = am.addr.mem;
3421 am.op_type = ia32_AddrModeS;
3424 am.pinned = op_pin_state_floats;
3426 am.ins_permuted = 0;
3428 if (ia32_cg_config.use_sse2)
3429 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3431 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3432 set_am_attributes(load, &am);
3434 return new_rd_Proj(NULL, load, mode_vfp, pn_ia32_res);
3436 panic("cannot transform floating point Mux");
3439 assert(ia32_mode_needs_gp_reg(mode));
3441 if (is_Proj(cond)) {
3442 ir_node *cmp = get_Proj_pred(cond);
3444 ir_node *cmp_left = get_Cmp_left(cmp);
3445 ir_node *cmp_right = get_Cmp_right(cmp);
3446 ir_node *val_true = mux_true;
3447 ir_node *val_false = mux_false;
3448 pn_Cmp pnc = get_Proj_proj(cond);
3450 if (is_Const(val_true) && is_Const_null(val_true)) {
3451 ir_node *tmp = val_false;
3452 val_false = val_true;
3454 pnc = ia32_get_negated_pnc(pnc);
3456 if (is_Const_0(val_false) && is_Sub(val_true)) {
3457 if ((pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge)
3458 && get_Sub_left(val_true) == cmp_left
3459 && get_Sub_right(val_true) == cmp_right) {
3460 return create_doz(node, cmp_left, cmp_right);
3462 if ((pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le)
3463 && get_Sub_left(val_true) == cmp_right
3464 && get_Sub_right(val_true) == cmp_left) {
3465 return create_doz(node, cmp_right, cmp_left);
3471 flags = get_flags_node(cond, &pnc);
3473 if (is_Const(mux_true) && is_Const(mux_false)) {
3474 /* both are const, good */
3475 tarval *tv_true = get_Const_tarval(mux_true);
3476 tarval *tv_false = get_Const_tarval(mux_false);
3477 setcc_transform_t res;
3480 find_const_transform(pnc, tv_true, tv_false, &res);
3482 for (step = (int)res.num_steps - 1; step >= 0; --step) {
3485 switch (res.steps[step].transform) {
3487 imm = ia32_immediate_from_long(res.steps[step].val);
3488 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3490 case SETCC_TR_ADDxx:
3491 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3494 new_node = new_bd_ia32_Lea(dbgi, new_block, noreg_GP, new_node);
3495 set_ia32_am_scale(new_node, res.steps[step].scale);
3496 set_ia32_am_offs_int(new_node, res.steps[step].val);
3498 case SETCC_TR_LEAxx:
3499 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3500 set_ia32_am_scale(new_node, res.steps[step].scale);
3501 set_ia32_am_offs_int(new_node, res.steps[step].val);
3504 imm = ia32_immediate_from_long(res.steps[step].scale);
3505 new_node = new_bd_ia32_Shl(dbgi, new_block, new_node, imm);
3508 new_node = new_bd_ia32_Neg(dbgi, new_block, new_node);
3511 new_node = new_bd_ia32_Not(dbgi, new_block, new_node);
3514 imm = ia32_immediate_from_long(res.steps[step].val);
3515 new_node = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, imm);
3518 new_node = create_set_32bit(dbgi, new_block, flags, res.pnc, node);
3521 new_node = new_bd_ia32_Sbb0(dbgi, new_block, flags);
3524 panic("unknown setcc transform");
3528 new_node = create_CMov(node, cond, flags, pnc);
3536 * Create a conversion from x87 state register to general purpose.
3538 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3540 ir_node *block = be_transform_node(get_nodes_block(node));
3541 ir_node *op = get_Conv_op(node);
3542 ir_node *new_op = be_transform_node(op);
3543 ir_graph *irg = current_ir_graph;
3544 dbg_info *dbgi = get_irn_dbg_info(node);
3545 ir_mode *mode = get_irn_mode(node);
3546 ir_node *fist, *load, *mem;
3548 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3549 set_irn_pinned(fist, op_pin_state_floats);
3550 set_ia32_use_frame(fist);
3551 set_ia32_op_type(fist, ia32_AddrModeD);
3553 assert(get_mode_size_bits(mode) <= 32);
3554 /* exception we can only store signed 32 bit integers, so for unsigned
3555 we store a 64bit (signed) integer and load the lower bits */
3556 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3557 set_ia32_ls_mode(fist, mode_Ls);
3559 set_ia32_ls_mode(fist, mode_Is);
3561 SET_IA32_ORIG_NODE(fist, node);
3564 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3566 set_irn_pinned(load, op_pin_state_floats);
3567 set_ia32_use_frame(load);
3568 set_ia32_op_type(load, ia32_AddrModeS);
3569 set_ia32_ls_mode(load, mode_Is);
3570 if (get_ia32_ls_mode(fist) == mode_Ls) {
3571 ia32_attr_t *attr = get_ia32_attr(load);
3572 attr->data.need_64bit_stackent = 1;
3574 ia32_attr_t *attr = get_ia32_attr(load);
3575 attr->data.need_32bit_stackent = 1;
3577 SET_IA32_ORIG_NODE(load, node);
3579 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
3583 * Creates a x87 strict Conv by placing a Store and a Load
3585 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3587 ir_node *block = get_nodes_block(node);
3588 ir_graph *irg = get_Block_irg(block);
3589 dbg_info *dbgi = get_irn_dbg_info(node);
3590 ir_node *frame = get_irg_frame(irg);
3591 ir_node *store, *load;
3594 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3595 set_ia32_use_frame(store);
3596 set_ia32_op_type(store, ia32_AddrModeD);
3597 SET_IA32_ORIG_NODE(store, node);
3599 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3600 set_ia32_use_frame(load);
3601 set_ia32_op_type(load, ia32_AddrModeS);
3602 SET_IA32_ORIG_NODE(load, node);
3604 new_node = new_r_Proj(load, mode_E, pn_ia32_vfld_res);
3608 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3609 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3611 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3613 func = get_mode_size_bits(mode) == 8 ?
3614 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3615 return func(dbgi, block, base, index, mem, val, mode);
3619 * Create a conversion from general purpose to x87 register
3621 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3623 ir_node *src_block = get_nodes_block(node);
3624 ir_node *block = be_transform_node(src_block);
3625 ir_graph *irg = get_Block_irg(block);
3626 dbg_info *dbgi = get_irn_dbg_info(node);
3627 ir_node *op = get_Conv_op(node);
3628 ir_node *new_op = NULL;
3630 ir_mode *store_mode;
3635 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3636 if (possible_int_mode_for_fp(src_mode)) {
3637 ia32_address_mode_t am;
3639 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3640 if (am.op_type == ia32_AddrModeS) {
3641 ia32_address_t *addr = &am.addr;
3643 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3644 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3646 set_am_attributes(fild, &am);
3647 SET_IA32_ORIG_NODE(fild, node);
3649 fix_mem_proj(fild, &am);
3654 if (new_op == NULL) {
3655 new_op = be_transform_node(op);
3658 mode = get_irn_mode(op);
3660 /* first convert to 32 bit signed if necessary */
3661 if (get_mode_size_bits(src_mode) < 32) {
3662 if (!upper_bits_clean(new_op, src_mode)) {
3663 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3664 SET_IA32_ORIG_NODE(new_op, node);
3669 assert(get_mode_size_bits(mode) == 32);
3672 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3674 set_ia32_use_frame(store);
3675 set_ia32_op_type(store, ia32_AddrModeD);
3676 set_ia32_ls_mode(store, mode_Iu);
3678 /* exception for 32bit unsigned, do a 64bit spill+load */
3679 if (!mode_is_signed(mode)) {
3682 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3684 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3685 noreg_GP, nomem, zero_const);
3687 set_ia32_use_frame(zero_store);
3688 set_ia32_op_type(zero_store, ia32_AddrModeD);
3689 add_ia32_am_offs_int(zero_store, 4);
3690 set_ia32_ls_mode(zero_store, mode_Iu);
3695 store = new_rd_Sync(dbgi, block, 2, in);
3696 store_mode = mode_Ls;
3698 store_mode = mode_Is;
3702 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3704 set_ia32_use_frame(fild);
3705 set_ia32_op_type(fild, ia32_AddrModeS);
3706 set_ia32_ls_mode(fild, store_mode);
3708 new_node = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
3714 * Create a conversion from one integer mode into another one
3716 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3717 dbg_info *dbgi, ir_node *block, ir_node *op,
3720 ir_node *new_block = be_transform_node(block);
3722 ir_mode *smaller_mode;
3723 ia32_address_mode_t am;
3724 ia32_address_t *addr = &am.addr;
3727 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3728 smaller_mode = src_mode;
3730 smaller_mode = tgt_mode;
3733 #ifdef DEBUG_libfirm
3735 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3740 match_arguments(&am, block, NULL, op, NULL,
3741 match_am | match_8bit_am | match_16bit_am);
3743 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3744 /* unnecessary conv. in theory it shouldn't have been AM */
3745 assert(is_ia32_NoReg_GP(addr->base));
3746 assert(is_ia32_NoReg_GP(addr->index));
3747 assert(is_NoMem(addr->mem));
3748 assert(am.addr.offset == 0);
3749 assert(am.addr.symconst_ent == NULL);
3753 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3754 addr->mem, am.new_op2, smaller_mode);
3755 set_am_attributes(new_node, &am);
3756 /* match_arguments assume that out-mode = in-mode, this isn't true here
3758 set_ia32_ls_mode(new_node, smaller_mode);
3759 SET_IA32_ORIG_NODE(new_node, node);
3760 new_node = fix_mem_proj(new_node, &am);
3765 * Transforms a Conv node.
3767 * @return The created ia32 Conv node
3769 static ir_node *gen_Conv(ir_node *node)
3771 ir_node *block = get_nodes_block(node);
3772 ir_node *new_block = be_transform_node(block);
3773 ir_node *op = get_Conv_op(node);
3774 ir_node *new_op = NULL;
3775 dbg_info *dbgi = get_irn_dbg_info(node);
3776 ir_mode *src_mode = get_irn_mode(op);
3777 ir_mode *tgt_mode = get_irn_mode(node);
3778 int src_bits = get_mode_size_bits(src_mode);
3779 int tgt_bits = get_mode_size_bits(tgt_mode);
3780 ir_node *res = NULL;
3782 assert(!mode_is_int(src_mode) || src_bits <= 32);
3783 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3785 /* modeB -> X should already be lowered by the lower_mode_b pass */
3786 if (src_mode == mode_b) {
3787 panic("ConvB not lowered %+F", node);
3790 if (src_mode == tgt_mode) {
3791 if (get_Conv_strict(node)) {
3792 if (ia32_cg_config.use_sse2) {
3793 /* when we are in SSE mode, we can kill all strict no-op conversion */
3794 return be_transform_node(op);
3797 /* this should be optimized already, but who knows... */
3798 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3799 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3800 return be_transform_node(op);
3804 if (mode_is_float(src_mode)) {
3805 new_op = be_transform_node(op);
3806 /* we convert from float ... */
3807 if (mode_is_float(tgt_mode)) {
3809 if (ia32_cg_config.use_sse2) {
3810 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3811 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3813 set_ia32_ls_mode(res, tgt_mode);
3815 if (get_Conv_strict(node)) {
3816 /* if fp_no_float_fold is not set then we assume that we
3817 * don't have any float operations in a non
3818 * mode_float_arithmetic mode and can skip strict upconvs */
3819 if (src_bits < tgt_bits
3820 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3821 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3824 res = gen_x87_strict_conv(tgt_mode, new_op);
3825 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3829 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3834 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3835 if (ia32_cg_config.use_sse2) {
3836 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3838 set_ia32_ls_mode(res, src_mode);
3840 return gen_x87_fp_to_gp(node);
3844 /* we convert from int ... */
3845 if (mode_is_float(tgt_mode)) {
3847 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3848 if (ia32_cg_config.use_sse2) {
3849 new_op = be_transform_node(op);
3850 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3852 set_ia32_ls_mode(res, tgt_mode);
3854 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3855 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3856 res = gen_x87_gp_to_fp(node, src_mode);
3858 /* we need a strict-Conv, if the int mode has more bits than the
3860 if (float_mantissa < int_mantissa) {
3861 res = gen_x87_strict_conv(tgt_mode, res);
3862 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3866 } else if (tgt_mode == mode_b) {
3867 /* mode_b lowering already took care that we only have 0/1 values */
3868 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3869 src_mode, tgt_mode));
3870 return be_transform_node(op);
3873 if (src_bits == tgt_bits) {
3874 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3875 src_mode, tgt_mode));
3876 return be_transform_node(op);
3879 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3887 static ir_node *create_immediate_or_transform(ir_node *node,
3888 char immediate_constraint_type)
3890 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3891 if (new_node == NULL) {
3892 new_node = be_transform_node(node);
3898 * Transforms a FrameAddr into an ia32 Add.
3900 static ir_node *gen_be_FrameAddr(ir_node *node)
3902 ir_node *block = be_transform_node(get_nodes_block(node));
3903 ir_node *op = be_get_FrameAddr_frame(node);
3904 ir_node *new_op = be_transform_node(op);
3905 dbg_info *dbgi = get_irn_dbg_info(node);
3908 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3909 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3910 set_ia32_use_frame(new_node);
3912 SET_IA32_ORIG_NODE(new_node, node);
3918 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3920 static ir_node *gen_be_Return(ir_node *node)
3922 ir_graph *irg = current_ir_graph;
3923 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3924 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3925 ir_entity *ent = get_irg_entity(irg);
3926 ir_type *tp = get_entity_type(ent);
3931 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3932 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3934 int pn_ret_val, pn_ret_mem, arity, i;
3936 assert(ret_val != NULL);
3937 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3938 return be_duplicate_node(node);
3941 res_type = get_method_res_type(tp, 0);
3943 if (! is_Primitive_type(res_type)) {
3944 return be_duplicate_node(node);
3947 mode = get_type_mode(res_type);
3948 if (! mode_is_float(mode)) {
3949 return be_duplicate_node(node);
3952 assert(get_method_n_ress(tp) == 1);
3954 pn_ret_val = get_Proj_proj(ret_val);
3955 pn_ret_mem = get_Proj_proj(ret_mem);
3957 /* get the Barrier */
3958 barrier = get_Proj_pred(ret_val);
3960 /* get result input of the Barrier */
3961 ret_val = get_irn_n(barrier, pn_ret_val);
3962 new_ret_val = be_transform_node(ret_val);
3964 /* get memory input of the Barrier */
3965 ret_mem = get_irn_n(barrier, pn_ret_mem);
3966 new_ret_mem = be_transform_node(ret_mem);
3968 frame = get_irg_frame(irg);
3970 dbgi = get_irn_dbg_info(barrier);
3971 block = be_transform_node(get_nodes_block(barrier));
3973 /* store xmm0 onto stack */
3974 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3975 new_ret_mem, new_ret_val);
3976 set_ia32_ls_mode(sse_store, mode);
3977 set_ia32_op_type(sse_store, ia32_AddrModeD);
3978 set_ia32_use_frame(sse_store);
3980 /* load into x87 register */
3981 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3982 set_ia32_op_type(fld, ia32_AddrModeS);
3983 set_ia32_use_frame(fld);
3985 mproj = new_r_Proj(fld, mode_M, pn_ia32_vfld_M);
3986 fld = new_r_Proj(fld, mode_vfp, pn_ia32_vfld_res);
3988 /* create a new barrier */
3989 arity = get_irn_arity(barrier);
3990 in = ALLOCAN(ir_node*, arity);
3991 for (i = 0; i < arity; ++i) {
3994 if (i == pn_ret_val) {
3996 } else if (i == pn_ret_mem) {
3999 ir_node *in = get_irn_n(barrier, i);
4000 new_in = be_transform_node(in);
4005 new_barrier = new_ir_node(dbgi, irg, block,
4006 get_irn_op(barrier), get_irn_mode(barrier),
4008 copy_node_attr(irg, barrier, new_barrier);
4009 be_duplicate_deps(barrier, new_barrier);
4010 be_set_transformed_node(barrier, new_barrier);
4012 /* transform normally */
4013 return be_duplicate_node(node);
4017 * Transform a be_AddSP into an ia32_SubSP.
4019 static ir_node *gen_be_AddSP(ir_node *node)
4021 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4022 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4024 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
4025 match_am | match_immediate);
4029 * Transform a be_SubSP into an ia32_AddSP
4031 static ir_node *gen_be_SubSP(ir_node *node)
4033 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4034 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4036 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
4037 match_am | match_immediate);
4041 * Change some phi modes
4043 static ir_node *gen_Phi(ir_node *node)
4045 const arch_register_req_t *req;
4046 ir_node *block = be_transform_node(get_nodes_block(node));
4047 ir_graph *irg = current_ir_graph;
4048 dbg_info *dbgi = get_irn_dbg_info(node);
4049 ir_mode *mode = get_irn_mode(node);
4052 if (ia32_mode_needs_gp_reg(mode)) {
4053 /* we shouldn't have any 64bit stuff around anymore */
4054 assert(get_mode_size_bits(mode) <= 32);
4055 /* all integer operations are on 32bit registers now */
4057 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
4058 } else if (mode_is_float(mode)) {
4059 if (ia32_cg_config.use_sse2) {
4061 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
4064 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
4067 req = arch_no_register_req;
4070 /* phi nodes allow loops, so we use the old arguments for now
4071 * and fix this later */
4072 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4073 get_irn_in(node) + 1);
4074 copy_node_attr(irg, node, phi);
4075 be_duplicate_deps(node, phi);
4077 arch_set_out_register_req(phi, 0, req);
4079 be_enqueue_preds(node);
4084 static ir_node *gen_Jmp(ir_node *node)
4086 ir_node *block = get_nodes_block(node);
4087 ir_node *new_block = be_transform_node(block);
4088 dbg_info *dbgi = get_irn_dbg_info(node);
4091 new_node = new_bd_ia32_Jmp(dbgi, new_block);
4092 SET_IA32_ORIG_NODE(new_node, node);
4100 static ir_node *gen_IJmp(ir_node *node)
4102 ir_node *block = get_nodes_block(node);
4103 ir_node *new_block = be_transform_node(block);
4104 dbg_info *dbgi = get_irn_dbg_info(node);
4105 ir_node *op = get_IJmp_target(node);
4107 ia32_address_mode_t am;
4108 ia32_address_t *addr = &am.addr;
4110 assert(get_irn_mode(op) == mode_P);
4112 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
4114 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
4115 addr->mem, am.new_op2);
4116 set_am_attributes(new_node, &am);
4117 SET_IA32_ORIG_NODE(new_node, node);
4119 new_node = fix_mem_proj(new_node, &am);
4125 * Transform a Bound node.
4127 static ir_node *gen_Bound(ir_node *node)
4130 ir_node *lower = get_Bound_lower(node);
4131 dbg_info *dbgi = get_irn_dbg_info(node);
4133 if (is_Const_0(lower)) {
4134 /* typical case for Java */
4135 ir_node *sub, *res, *flags, *block;
4137 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
4138 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
4140 block = get_nodes_block(res);
4141 if (! is_Proj(res)) {
4143 set_irn_mode(sub, mode_T);
4144 res = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_res);
4146 sub = get_Proj_pred(res);
4148 flags = new_rd_Proj(NULL, sub, mode_Iu, pn_ia32_Sub_flags);
4149 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
4150 SET_IA32_ORIG_NODE(new_node, node);
4152 panic("generic Bound not supported in ia32 Backend");
4158 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4160 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4161 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4163 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
4164 match_immediate | match_mode_neutral);
4167 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4169 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4170 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4171 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
4175 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4177 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4178 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4179 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
4183 static ir_node *gen_ia32_l_Add(ir_node *node)
4185 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4186 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4187 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
4188 match_commutative | match_am | match_immediate |
4189 match_mode_neutral);
4191 if (is_Proj(lowered)) {
4192 lowered = get_Proj_pred(lowered);
4194 assert(is_ia32_Add(lowered));
4195 set_irn_mode(lowered, mode_T);
4201 static ir_node *gen_ia32_l_Adc(ir_node *node)
4203 return gen_binop_flags(node, new_bd_ia32_Adc,
4204 match_commutative | match_am | match_immediate |
4205 match_mode_neutral);
4209 * Transforms a l_MulS into a "real" MulS node.
4211 * @return the created ia32 Mul node
4213 static ir_node *gen_ia32_l_Mul(ir_node *node)
4215 ir_node *left = get_binop_left(node);
4216 ir_node *right = get_binop_right(node);
4218 return gen_binop(node, left, right, new_bd_ia32_Mul,
4219 match_commutative | match_am | match_mode_neutral);
4223 * Transforms a l_IMulS into a "real" IMul1OPS node.
4225 * @return the created ia32 IMul1OP node
4227 static ir_node *gen_ia32_l_IMul(ir_node *node)
4229 ir_node *left = get_binop_left(node);
4230 ir_node *right = get_binop_right(node);
4232 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
4233 match_commutative | match_am | match_mode_neutral);
4236 static ir_node *gen_ia32_l_Sub(ir_node *node)
4238 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4239 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4240 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
4241 match_am | match_immediate | match_mode_neutral);
4243 if (is_Proj(lowered)) {
4244 lowered = get_Proj_pred(lowered);
4246 assert(is_ia32_Sub(lowered));
4247 set_irn_mode(lowered, mode_T);
4253 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4255 return gen_binop_flags(node, new_bd_ia32_Sbb,
4256 match_am | match_immediate | match_mode_neutral);
4260 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4261 * op1 - target to be shifted
4262 * op2 - contains bits to be shifted into target
4264 * Only op3 can be an immediate.
4266 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4267 ir_node *low, ir_node *count)
4269 ir_node *block = get_nodes_block(node);
4270 ir_node *new_block = be_transform_node(block);
4271 dbg_info *dbgi = get_irn_dbg_info(node);
4272 ir_node *new_high = be_transform_node(high);
4273 ir_node *new_low = be_transform_node(low);
4277 /* the shift amount can be any mode that is bigger than 5 bits, since all
4278 * other bits are ignored anyway */
4279 while (is_Conv(count) &&
4280 get_irn_n_edges(count) == 1 &&
4281 mode_is_int(get_irn_mode(count))) {
4282 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4283 count = get_Conv_op(count);
4285 new_count = create_immediate_or_transform(count, 0);
4287 if (is_ia32_l_ShlD(node)) {
4288 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4291 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4294 SET_IA32_ORIG_NODE(new_node, node);
4299 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4301 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4302 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4303 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4304 return gen_lowered_64bit_shifts(node, high, low, count);
4307 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4309 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4310 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4311 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4312 return gen_lowered_64bit_shifts(node, high, low, count);
4315 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4317 ir_node *src_block = get_nodes_block(node);
4318 ir_node *block = be_transform_node(src_block);
4319 ir_graph *irg = current_ir_graph;
4320 dbg_info *dbgi = get_irn_dbg_info(node);
4321 ir_node *frame = get_irg_frame(irg);
4322 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4323 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4324 ir_node *new_val_low = be_transform_node(val_low);
4325 ir_node *new_val_high = be_transform_node(val_high);
4327 ir_node *sync, *fild, *res;
4328 ir_node *store_low, *store_high;
4330 if (ia32_cg_config.use_sse2) {
4331 panic("ia32_l_LLtoFloat not implemented for SSE2");
4335 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4337 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4339 SET_IA32_ORIG_NODE(store_low, node);
4340 SET_IA32_ORIG_NODE(store_high, node);
4342 set_ia32_use_frame(store_low);
4343 set_ia32_use_frame(store_high);
4344 set_ia32_op_type(store_low, ia32_AddrModeD);
4345 set_ia32_op_type(store_high, ia32_AddrModeD);
4346 set_ia32_ls_mode(store_low, mode_Iu);
4347 set_ia32_ls_mode(store_high, mode_Is);
4348 add_ia32_am_offs_int(store_high, 4);
4352 sync = new_rd_Sync(dbgi, block, 2, in);
4355 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4357 set_ia32_use_frame(fild);
4358 set_ia32_op_type(fild, ia32_AddrModeS);
4359 set_ia32_ls_mode(fild, mode_Ls);
4361 SET_IA32_ORIG_NODE(fild, node);
4363 res = new_r_Proj(fild, mode_vfp, pn_ia32_vfild_res);
4365 if (! mode_is_signed(get_irn_mode(val_high))) {
4366 ia32_address_mode_t am;
4368 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4371 am.addr.base = get_symconst_base();
4372 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4373 am.addr.mem = nomem;
4376 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4377 am.addr.use_frame = 0;
4378 am.addr.frame_entity = NULL;
4379 am.addr.symconst_sign = 0;
4380 am.ls_mode = mode_F;
4381 am.mem_proj = nomem;
4382 am.op_type = ia32_AddrModeS;
4384 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4385 am.pinned = op_pin_state_floats;
4387 am.ins_permuted = 0;
4389 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4390 am.new_op1, am.new_op2, get_fpcw());
4391 set_am_attributes(fadd, &am);
4393 set_irn_mode(fadd, mode_T);
4394 res = new_rd_Proj(NULL, fadd, mode_vfp, pn_ia32_res);
4399 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4401 ir_node *src_block = get_nodes_block(node);
4402 ir_node *block = be_transform_node(src_block);
4403 ir_graph *irg = get_Block_irg(block);
4404 dbg_info *dbgi = get_irn_dbg_info(node);
4405 ir_node *frame = get_irg_frame(irg);
4406 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4407 ir_node *new_val = be_transform_node(val);
4408 ir_node *fist, *mem;
4410 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4411 SET_IA32_ORIG_NODE(fist, node);
4412 set_ia32_use_frame(fist);
4413 set_ia32_op_type(fist, ia32_AddrModeD);
4414 set_ia32_ls_mode(fist, mode_Ls);
4419 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4421 ir_node *block = be_transform_node(get_nodes_block(node));
4422 ir_graph *irg = get_Block_irg(block);
4423 ir_node *pred = get_Proj_pred(node);
4424 ir_node *new_pred = be_transform_node(pred);
4425 ir_node *frame = get_irg_frame(irg);
4426 dbg_info *dbgi = get_irn_dbg_info(node);
4427 long pn = get_Proj_proj(node);
4432 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4433 SET_IA32_ORIG_NODE(load, node);
4434 set_ia32_use_frame(load);
4435 set_ia32_op_type(load, ia32_AddrModeS);
4436 set_ia32_ls_mode(load, mode_Iu);
4437 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4438 * 32 bit from it with this particular load */
4439 attr = get_ia32_attr(load);
4440 attr->data.need_64bit_stackent = 1;
4442 if (pn == pn_ia32_l_FloattoLL_res_high) {
4443 add_ia32_am_offs_int(load, 4);
4445 assert(pn == pn_ia32_l_FloattoLL_res_low);
4448 proj = new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4454 * Transform the Projs of an AddSP.
4456 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4458 ir_node *pred = get_Proj_pred(node);
4459 ir_node *new_pred = be_transform_node(pred);
4460 dbg_info *dbgi = get_irn_dbg_info(node);
4461 long proj = get_Proj_proj(node);
4463 if (proj == pn_be_AddSP_sp) {
4464 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4465 pn_ia32_SubSP_stack);
4466 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4468 } else if (proj == pn_be_AddSP_res) {
4469 return new_rd_Proj(dbgi, new_pred, mode_Iu,
4470 pn_ia32_SubSP_addr);
4471 } else if (proj == pn_be_AddSP_M) {
4472 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_SubSP_M);
4475 panic("No idea how to transform proj->AddSP");
4479 * Transform the Projs of a SubSP.
4481 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4483 ir_node *pred = get_Proj_pred(node);
4484 ir_node *new_pred = be_transform_node(pred);
4485 dbg_info *dbgi = get_irn_dbg_info(node);
4486 long proj = get_Proj_proj(node);
4488 if (proj == pn_be_SubSP_sp) {
4489 ir_node *res = new_rd_Proj(dbgi, new_pred, mode_Iu,
4490 pn_ia32_AddSP_stack);
4491 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4493 } else if (proj == pn_be_SubSP_M) {
4494 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_AddSP_M);
4497 panic("No idea how to transform proj->SubSP");
4501 * Transform and renumber the Projs from a Load.
4503 static ir_node *gen_Proj_Load(ir_node *node)
4506 ir_node *block = be_transform_node(get_nodes_block(node));
4507 ir_node *pred = get_Proj_pred(node);
4508 dbg_info *dbgi = get_irn_dbg_info(node);
4509 long proj = get_Proj_proj(node);
4511 /* loads might be part of source address mode matches, so we don't
4512 * transform the ProjMs yet (with the exception of loads whose result is
4515 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4518 /* this is needed, because sometimes we have loops that are only
4519 reachable through the ProjM */
4520 be_enqueue_preds(node);
4521 /* do it in 2 steps, to silence firm verifier */
4522 res = new_rd_Proj(dbgi, pred, mode_M, pn_Load_M);
4523 set_Proj_proj(res, pn_ia32_mem);
4527 /* renumber the proj */
4528 new_pred = be_transform_node(pred);
4529 if (is_ia32_Load(new_pred)) {
4532 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Load_res);
4534 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Load_M);
4535 case pn_Load_X_regular:
4536 return new_rd_Jmp(dbgi, block);
4537 case pn_Load_X_except:
4538 /* This Load might raise an exception. Mark it. */
4539 set_ia32_exc_label(new_pred, 1);
4540 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Load_X_exc);
4544 } else if (is_ia32_Conv_I2I(new_pred) ||
4545 is_ia32_Conv_I2I8Bit(new_pred)) {
4546 set_irn_mode(new_pred, mode_T);
4547 if (proj == pn_Load_res) {
4548 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_res);
4549 } else if (proj == pn_Load_M) {
4550 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_mem);
4552 } else if (is_ia32_xLoad(new_pred)) {
4555 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xLoad_res);
4557 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xLoad_M);
4558 case pn_Load_X_regular:
4559 return new_rd_Jmp(dbgi, block);
4560 case pn_Load_X_except:
4561 /* This Load might raise an exception. Mark it. */
4562 set_ia32_exc_label(new_pred, 1);
4563 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4567 } else if (is_ia32_vfld(new_pred)) {
4570 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfld_res);
4572 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfld_M);
4573 case pn_Load_X_regular:
4574 return new_rd_Jmp(dbgi, block);
4575 case pn_Load_X_except:
4576 /* This Load might raise an exception. Mark it. */
4577 set_ia32_exc_label(new_pred, 1);
4578 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_vfld_X_exc);
4583 /* can happen for ProJMs when source address mode happened for the
4586 /* however it should not be the result proj, as that would mean the
4587 load had multiple users and should not have been used for
4589 if (proj != pn_Load_M) {
4590 panic("internal error: transformed node not a Load");
4592 return new_rd_Proj(dbgi, new_pred, mode_M, 1);
4595 panic("No idea how to transform proj");
4599 * Transform and renumber the Projs from a DivMod like instruction.
4601 static ir_node *gen_Proj_DivMod(ir_node *node)
4603 ir_node *block = be_transform_node(get_nodes_block(node));
4604 ir_node *pred = get_Proj_pred(node);
4605 ir_node *new_pred = be_transform_node(pred);
4606 dbg_info *dbgi = get_irn_dbg_info(node);
4607 long proj = get_Proj_proj(node);
4609 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4611 switch (get_irn_opcode(pred)) {
4615 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4617 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4618 case pn_Div_X_regular:
4619 return new_rd_Jmp(dbgi, block);
4620 case pn_Div_X_except:
4621 set_ia32_exc_label(new_pred, 1);
4622 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4630 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4632 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4633 case pn_Mod_X_except:
4634 set_ia32_exc_label(new_pred, 1);
4635 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4643 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_Div_M);
4644 case pn_DivMod_res_div:
4645 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_div_res);
4646 case pn_DivMod_res_mod:
4647 return new_rd_Proj(dbgi, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4648 case pn_DivMod_X_regular:
4649 return new_rd_Jmp(dbgi, block);
4650 case pn_DivMod_X_except:
4651 set_ia32_exc_label(new_pred, 1);
4652 return new_rd_Proj(dbgi, new_pred, mode_X, pn_ia32_Div_X_exc);
4661 panic("No idea how to transform proj->DivMod");
4665 * Transform and renumber the Projs from a CopyB.
4667 static ir_node *gen_Proj_CopyB(ir_node *node)
4669 ir_node *pred = get_Proj_pred(node);
4670 ir_node *new_pred = be_transform_node(pred);
4671 dbg_info *dbgi = get_irn_dbg_info(node);
4672 long proj = get_Proj_proj(node);
4676 if (is_ia32_CopyB_i(new_pred)) {
4677 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_i_M);
4678 } else if (is_ia32_CopyB(new_pred)) {
4679 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_CopyB_M);
4686 panic("No idea how to transform proj->CopyB");
4690 * Transform and renumber the Projs from a Quot.
4692 static ir_node *gen_Proj_Quot(ir_node *node)
4694 ir_node *pred = get_Proj_pred(node);
4695 ir_node *new_pred = be_transform_node(pred);
4696 dbg_info *dbgi = get_irn_dbg_info(node);
4697 long proj = get_Proj_proj(node);
4701 if (is_ia32_xDiv(new_pred)) {
4702 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_xDiv_M);
4703 } else if (is_ia32_vfdiv(new_pred)) {
4704 return new_rd_Proj(dbgi, new_pred, mode_M, pn_ia32_vfdiv_M);
4708 if (is_ia32_xDiv(new_pred)) {
4709 return new_rd_Proj(dbgi, new_pred, mode_xmm, pn_ia32_xDiv_res);
4710 } else if (is_ia32_vfdiv(new_pred)) {
4711 return new_rd_Proj(dbgi, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4714 case pn_Quot_X_regular:
4715 case pn_Quot_X_except:
4720 panic("No idea how to transform proj->Quot");
4723 static ir_node *gen_be_Call(ir_node *node)
4725 dbg_info *const dbgi = get_irn_dbg_info(node);
4726 ir_node *const src_block = get_nodes_block(node);
4727 ir_node *const block = be_transform_node(src_block);
4728 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4729 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4730 ir_node *const sp = be_transform_node(src_sp);
4731 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4732 ia32_address_mode_t am;
4733 ia32_address_t *const addr = &am.addr;
4738 ir_node * eax = noreg_GP;
4739 ir_node * ecx = noreg_GP;
4740 ir_node * edx = noreg_GP;
4741 unsigned const pop = be_Call_get_pop(node);
4742 ir_type *const call_tp = be_Call_get_type(node);
4743 int old_no_pic_adjust;
4745 /* Run the x87 simulator if the call returns a float value */
4746 if (get_method_n_ress(call_tp) > 0) {
4747 ir_type *const res_type = get_method_res_type(call_tp, 0);
4748 ir_mode *const res_mode = get_type_mode(res_type);
4750 if (res_mode != NULL && mode_is_float(res_mode)) {
4751 env_cg->do_x87_sim = 1;
4755 /* We do not want be_Call direct calls */
4756 assert(be_Call_get_entity(node) == NULL);
4758 /* special case for PIC trampoline calls */
4759 old_no_pic_adjust = no_pic_adjust;
4760 no_pic_adjust = be_get_irg_options(env_cg->irg)->pic;
4762 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4763 match_am | match_immediate);
4765 no_pic_adjust = old_no_pic_adjust;
4767 i = get_irn_arity(node) - 1;
4768 fpcw = be_transform_node(get_irn_n(node, i--));
4769 for (; i >= be_pos_Call_first_arg; --i) {
4770 arch_register_req_t const *const req = arch_get_register_req(node, i);
4771 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4773 assert(req->type == arch_register_req_type_limited);
4774 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4776 switch (*req->limited) {
4777 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4778 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4779 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4780 default: panic("Invalid GP register for register parameter");
4784 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4785 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4786 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4787 set_am_attributes(call, &am);
4788 call = fix_mem_proj(call, &am);
4790 if (get_irn_pinned(node) == op_pin_state_pinned)
4791 set_irn_pinned(call, op_pin_state_pinned);
4793 SET_IA32_ORIG_NODE(call, node);
4795 if (ia32_cg_config.use_sse2) {
4796 /* remember this call for post-processing */
4797 ARR_APP1(ir_node *, call_list, call);
4798 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4805 * Transform Builtin trap
4807 static ir_node *gen_trap(ir_node *node)
4809 dbg_info *dbgi = get_irn_dbg_info(node);
4810 ir_node *block = be_transform_node(get_nodes_block(node));
4811 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4813 return new_bd_ia32_UD2(dbgi, block, mem);
4817 * Transform Builtin debugbreak
4819 static ir_node *gen_debugbreak(ir_node *node)
4821 dbg_info *dbgi = get_irn_dbg_info(node);
4822 ir_node *block = be_transform_node(get_nodes_block(node));
4823 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4825 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4829 * Transform Builtin return_address
4831 static ir_node *gen_return_address(ir_node *node)
4833 ir_node *param = get_Builtin_param(node, 0);
4834 ir_node *frame = get_Builtin_param(node, 1);
4835 dbg_info *dbgi = get_irn_dbg_info(node);
4836 tarval *tv = get_Const_tarval(param);
4837 unsigned long value = get_tarval_long(tv);
4839 ir_node *block = be_transform_node(get_nodes_block(node));
4840 ir_node *ptr = be_transform_node(frame);
4844 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4845 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4846 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4849 /* load the return address from this frame */
4850 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4852 set_irn_pinned(load, get_irn_pinned(node));
4853 set_ia32_op_type(load, ia32_AddrModeS);
4854 set_ia32_ls_mode(load, mode_Iu);
4856 set_ia32_am_offs_int(load, 0);
4857 set_ia32_use_frame(load);
4858 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4860 if (get_irn_pinned(node) == op_pin_state_floats) {
4861 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4862 && pn_ia32_vfld_res == pn_ia32_Load_res
4863 && pn_ia32_Load_res == pn_ia32_res);
4864 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4867 SET_IA32_ORIG_NODE(load, node);
4868 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4872 * Transform Builtin frame_address
4874 static ir_node *gen_frame_address(ir_node *node)
4876 ir_node *param = get_Builtin_param(node, 0);
4877 ir_node *frame = get_Builtin_param(node, 1);
4878 dbg_info *dbgi = get_irn_dbg_info(node);
4879 tarval *tv = get_Const_tarval(param);
4880 unsigned long value = get_tarval_long(tv);
4882 ir_node *block = be_transform_node(get_nodes_block(node));
4883 ir_node *ptr = be_transform_node(frame);
4888 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4889 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4890 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4893 /* load the frame address from this frame */
4894 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4896 set_irn_pinned(load, get_irn_pinned(node));
4897 set_ia32_op_type(load, ia32_AddrModeS);
4898 set_ia32_ls_mode(load, mode_Iu);
4900 ent = ia32_get_frame_address_entity();
4902 set_ia32_am_offs_int(load, 0);
4903 set_ia32_use_frame(load);
4904 set_ia32_frame_ent(load, ent);
4906 /* will fail anyway, but gcc does this: */
4907 set_ia32_am_offs_int(load, 0);
4910 if (get_irn_pinned(node) == op_pin_state_floats) {
4911 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4912 && pn_ia32_vfld_res == pn_ia32_Load_res
4913 && pn_ia32_Load_res == pn_ia32_res);
4914 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4917 SET_IA32_ORIG_NODE(load, node);
4918 return new_r_Proj(load, mode_Iu, pn_ia32_Load_res);
4922 * Transform Builtin frame_address
4924 static ir_node *gen_prefetch(ir_node *node)
4927 ir_node *ptr, *block, *mem, *base, *index;
4928 ir_node *param, *new_node;
4931 ia32_address_t addr;
4933 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4934 /* no prefetch at all, route memory */
4935 return be_transform_node(get_Builtin_mem(node));
4938 param = get_Builtin_param(node, 1);
4939 tv = get_Const_tarval(param);
4940 rw = get_tarval_long(tv);
4942 /* construct load address */
4943 memset(&addr, 0, sizeof(addr));
4944 ptr = get_Builtin_param(node, 0);
4945 ia32_create_address_mode(&addr, ptr, 0);
4952 base = be_transform_node(base);
4955 if (index == NULL) {
4958 index = be_transform_node(index);
4961 dbgi = get_irn_dbg_info(node);
4962 block = be_transform_node(get_nodes_block(node));
4963 mem = be_transform_node(get_Builtin_mem(node));
4965 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4966 /* we have 3DNow!, this was already checked above */
4967 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4968 } else if (ia32_cg_config.use_sse_prefetch) {
4969 /* note: rw == 1 is IGNORED in that case */
4970 param = get_Builtin_param(node, 2);
4971 tv = get_Const_tarval(param);
4972 locality = get_tarval_long(tv);
4974 /* SSE style prefetch */
4977 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4980 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4983 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4986 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4990 assert(ia32_cg_config.use_3dnow_prefetch);
4991 /* 3DNow! style prefetch */
4992 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4995 set_irn_pinned(new_node, get_irn_pinned(node));
4996 set_ia32_op_type(new_node, ia32_AddrModeS);
4997 set_ia32_ls_mode(new_node, mode_Bu);
4998 set_address(new_node, &addr);
5000 SET_IA32_ORIG_NODE(new_node, node);
5002 be_dep_on_frame(new_node);
5003 return new_r_Proj(new_node, mode_M, pn_ia32_Prefetch_M);
5007 * Transform bsf like node
5009 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
5011 ir_node *param = get_Builtin_param(node, 0);
5012 dbg_info *dbgi = get_irn_dbg_info(node);
5014 ir_node *block = get_nodes_block(node);
5015 ir_node *new_block = be_transform_node(block);
5017 ia32_address_mode_t am;
5018 ia32_address_t *addr = &am.addr;
5021 match_arguments(&am, block, NULL, param, NULL, match_am);
5023 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5024 set_am_attributes(cnt, &am);
5025 set_ia32_ls_mode(cnt, get_irn_mode(param));
5027 SET_IA32_ORIG_NODE(cnt, node);
5028 return fix_mem_proj(cnt, &am);
5032 * Transform builtin ffs.
5034 static ir_node *gen_ffs(ir_node *node)
5036 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
5037 ir_node *real = skip_Proj(bsf);
5038 dbg_info *dbgi = get_irn_dbg_info(real);
5039 ir_node *block = get_nodes_block(real);
5040 ir_node *flag, *set, *conv, *neg, *or;
5043 if (get_irn_mode(real) != mode_T) {
5044 set_irn_mode(real, mode_T);
5045 bsf = new_r_Proj(real, mode_Iu, pn_ia32_res);
5048 flag = new_r_Proj(real, mode_b, pn_ia32_flags);
5051 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
5052 SET_IA32_ORIG_NODE(set, node);
5055 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
5056 SET_IA32_ORIG_NODE(conv, node);
5059 neg = new_bd_ia32_Neg(dbgi, block, conv);
5062 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
5063 set_ia32_commutative(or);
5066 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
5070 * Transform builtin clz.
5072 static ir_node *gen_clz(ir_node *node)
5074 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
5075 ir_node *real = skip_Proj(bsr);
5076 dbg_info *dbgi = get_irn_dbg_info(real);
5077 ir_node *block = get_nodes_block(real);
5078 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
5080 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
5084 * Transform builtin ctz.
5086 static ir_node *gen_ctz(ir_node *node)
5088 return gen_unop_AM(node, new_bd_ia32_Bsf);
5092 * Transform builtin parity.
5094 static ir_node *gen_parity(ir_node *node)
5096 ir_node *param = get_Builtin_param(node, 0);
5097 dbg_info *dbgi = get_irn_dbg_info(node);
5099 ir_node *block = get_nodes_block(node);
5101 ir_node *new_block = be_transform_node(block);
5102 ir_node *imm, *cmp, *new_node;
5104 ia32_address_mode_t am;
5105 ia32_address_t *addr = &am.addr;
5109 match_arguments(&am, block, NULL, param, NULL, match_am);
5110 imm = ia32_create_Immediate(NULL, 0, 0);
5111 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
5112 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
5113 set_am_attributes(cmp, &am);
5114 set_ia32_ls_mode(cmp, mode_Iu);
5116 SET_IA32_ORIG_NODE(cmp, node);
5118 cmp = fix_mem_proj(cmp, &am);
5121 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
5122 SET_IA32_ORIG_NODE(new_node, node);
5125 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
5126 nomem, new_node, mode_Bu);
5127 SET_IA32_ORIG_NODE(new_node, node);
5132 * Transform builtin popcount
5134 static ir_node *gen_popcount(ir_node *node)
5136 ir_node *param = get_Builtin_param(node, 0);
5137 dbg_info *dbgi = get_irn_dbg_info(node);
5139 ir_node *block = get_nodes_block(node);
5140 ir_node *new_block = be_transform_node(block);
5143 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
5145 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
5146 if (ia32_cg_config.use_popcnt) {
5147 ia32_address_mode_t am;
5148 ia32_address_t *addr = &am.addr;
5151 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
5153 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
5154 set_am_attributes(cnt, &am);
5155 set_ia32_ls_mode(cnt, get_irn_mode(param));
5157 SET_IA32_ORIG_NODE(cnt, node);
5158 return fix_mem_proj(cnt, &am);
5161 new_param = be_transform_node(param);
5163 /* do the standard popcount algo */
5165 /* m1 = x & 0x55555555 */
5166 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
5167 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
5170 simm = ia32_create_Immediate(NULL, 0, 1);
5171 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
5173 /* m2 = s1 & 0x55555555 */
5174 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
5177 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
5179 /* m4 = m3 & 0x33333333 */
5180 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
5181 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
5184 simm = ia32_create_Immediate(NULL, 0, 2);
5185 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
5187 /* m5 = s2 & 0x33333333 */
5188 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
5191 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
5193 /* m7 = m6 & 0x0F0F0F0F */
5194 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
5195 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
5198 simm = ia32_create_Immediate(NULL, 0, 4);
5199 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
5201 /* m8 = s3 & 0x0F0F0F0F */
5202 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
5205 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
5207 /* m10 = m9 & 0x00FF00FF */
5208 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
5209 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
5212 simm = ia32_create_Immediate(NULL, 0, 8);
5213 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
5215 /* m11 = s4 & 0x00FF00FF */
5216 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
5218 /* m12 = m10 + m11 */
5219 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
5221 /* m13 = m12 & 0x0000FFFF */
5222 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
5223 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
5225 /* s5 = m12 >> 16 */
5226 simm = ia32_create_Immediate(NULL, 0, 16);
5227 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
5229 /* res = m13 + s5 */
5230 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
5234 * Transform builtin byte swap.
5236 static ir_node *gen_bswap(ir_node *node)
5238 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
5239 dbg_info *dbgi = get_irn_dbg_info(node);
5241 ir_node *block = get_nodes_block(node);
5242 ir_node *new_block = be_transform_node(block);
5243 ir_mode *mode = get_irn_mode(param);
5244 unsigned size = get_mode_size_bits(mode);
5245 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5249 if (ia32_cg_config.use_i486) {
5250 /* swap available */
5251 return new_bd_ia32_Bswap(dbgi, new_block, param);
5253 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5254 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5256 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5257 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5259 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5261 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5262 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5264 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5265 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5268 /* swap16 always available */
5269 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5272 panic("Invalid bswap size (%d)", size);
5277 * Transform builtin outport.
5279 static ir_node *gen_outport(ir_node *node)
5281 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5282 ir_node *oldv = get_Builtin_param(node, 1);
5283 ir_mode *mode = get_irn_mode(oldv);
5284 ir_node *value = be_transform_node(oldv);
5285 ir_node *block = be_transform_node(get_nodes_block(node));
5286 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5287 dbg_info *dbgi = get_irn_dbg_info(node);
5289 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5290 set_ia32_ls_mode(res, mode);
5295 * Transform builtin inport.
5297 static ir_node *gen_inport(ir_node *node)
5299 ir_type *tp = get_Builtin_type(node);
5300 ir_type *rstp = get_method_res_type(tp, 0);
5301 ir_mode *mode = get_type_mode(rstp);
5302 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5303 ir_node *block = be_transform_node(get_nodes_block(node));
5304 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5305 dbg_info *dbgi = get_irn_dbg_info(node);
5307 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5308 set_ia32_ls_mode(res, mode);
5310 /* check for missing Result Proj */
5315 * Transform a builtin inner trampoline
5317 static ir_node *gen_inner_trampoline(ir_node *node)
5319 ir_node *ptr = get_Builtin_param(node, 0);
5320 ir_node *callee = get_Builtin_param(node, 1);
5321 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5322 ir_node *mem = get_Builtin_mem(node);
5323 ir_node *block = get_nodes_block(node);
5324 ir_node *new_block = be_transform_node(block);
5328 ir_node *trampoline;
5330 dbg_info *dbgi = get_irn_dbg_info(node);
5331 ia32_address_t addr;
5333 /* construct store address */
5334 memset(&addr, 0, sizeof(addr));
5335 ia32_create_address_mode(&addr, ptr, 0);
5337 if (addr.base == NULL) {
5338 addr.base = noreg_GP;
5340 addr.base = be_transform_node(addr.base);
5343 if (addr.index == NULL) {
5344 addr.index = noreg_GP;
5346 addr.index = be_transform_node(addr.index);
5348 addr.mem = be_transform_node(mem);
5350 /* mov ecx, <env> */
5351 val = ia32_create_Immediate(NULL, 0, 0xB9);
5352 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5353 addr.index, addr.mem, val);
5354 set_irn_pinned(store, get_irn_pinned(node));
5355 set_ia32_op_type(store, ia32_AddrModeD);
5356 set_ia32_ls_mode(store, mode_Bu);
5357 set_address(store, &addr);
5361 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5362 addr.index, addr.mem, env);
5363 set_irn_pinned(store, get_irn_pinned(node));
5364 set_ia32_op_type(store, ia32_AddrModeD);
5365 set_ia32_ls_mode(store, mode_Iu);
5366 set_address(store, &addr);
5370 /* jmp rel <callee> */
5371 val = ia32_create_Immediate(NULL, 0, 0xE9);
5372 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5373 addr.index, addr.mem, val);
5374 set_irn_pinned(store, get_irn_pinned(node));
5375 set_ia32_op_type(store, ia32_AddrModeD);
5376 set_ia32_ls_mode(store, mode_Bu);
5377 set_address(store, &addr);
5381 trampoline = be_transform_node(ptr);
5383 /* the callee is typically an immediate */
5384 if (is_SymConst(callee)) {
5385 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5387 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5389 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5391 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5392 addr.index, addr.mem, rel);
5393 set_irn_pinned(store, get_irn_pinned(node));
5394 set_ia32_op_type(store, ia32_AddrModeD);
5395 set_ia32_ls_mode(store, mode_Iu);
5396 set_address(store, &addr);
5401 return new_r_Tuple(new_block, 2, in);
5405 * Transform Builtin node.
5407 static ir_node *gen_Builtin(ir_node *node)
5409 ir_builtin_kind kind = get_Builtin_kind(node);
5413 return gen_trap(node);
5414 case ir_bk_debugbreak:
5415 return gen_debugbreak(node);
5416 case ir_bk_return_address:
5417 return gen_return_address(node);
5418 case ir_bk_frame_address:
5419 return gen_frame_address(node);
5420 case ir_bk_prefetch:
5421 return gen_prefetch(node);
5423 return gen_ffs(node);
5425 return gen_clz(node);
5427 return gen_ctz(node);
5429 return gen_parity(node);
5430 case ir_bk_popcount:
5431 return gen_popcount(node);
5433 return gen_bswap(node);
5435 return gen_outport(node);
5437 return gen_inport(node);
5438 case ir_bk_inner_trampoline:
5439 return gen_inner_trampoline(node);
5441 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5445 * Transform Proj(Builtin) node.
5447 static ir_node *gen_Proj_Builtin(ir_node *proj)
5449 ir_node *node = get_Proj_pred(proj);
5450 ir_node *new_node = be_transform_node(node);
5451 ir_builtin_kind kind = get_Builtin_kind(node);
5454 case ir_bk_return_address:
5455 case ir_bk_frame_address:
5460 case ir_bk_popcount:
5462 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5465 case ir_bk_debugbreak:
5466 case ir_bk_prefetch:
5468 assert(get_Proj_proj(proj) == pn_Builtin_M);
5471 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5472 return new_r_Proj(new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5474 assert(get_Proj_proj(proj) == pn_Builtin_M);
5475 return new_r_Proj(new_node, mode_M, pn_ia32_Inport_M);
5477 case ir_bk_inner_trampoline:
5478 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5479 return get_Tuple_pred(new_node, 1);
5481 assert(get_Proj_proj(proj) == pn_Builtin_M);
5482 return get_Tuple_pred(new_node, 0);
5485 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5488 static ir_node *gen_be_IncSP(ir_node *node)
5490 ir_node *res = be_duplicate_node(node);
5491 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5497 * Transform the Projs from a be_Call.
5499 static ir_node *gen_Proj_be_Call(ir_node *node)
5501 ir_node *call = get_Proj_pred(node);
5502 ir_node *new_call = be_transform_node(call);
5503 dbg_info *dbgi = get_irn_dbg_info(node);
5504 long proj = get_Proj_proj(node);
5505 ir_mode *mode = get_irn_mode(node);
5508 if (proj == pn_be_Call_M_regular) {
5509 return new_rd_Proj(dbgi, new_call, mode_M, n_ia32_Call_mem);
5511 /* transform call modes */
5512 if (mode_is_data(mode)) {
5513 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5517 /* Map from be_Call to ia32_Call proj number */
5518 if (proj == pn_be_Call_sp) {
5519 proj = pn_ia32_Call_stack;
5520 } else if (proj == pn_be_Call_M_regular) {
5521 proj = pn_ia32_Call_M;
5523 arch_register_req_t const *const req = arch_get_register_req_out(node);
5524 int const n_outs = arch_irn_get_n_outs(new_call);
5527 assert(proj >= pn_be_Call_first_res);
5528 assert(req->type & arch_register_req_type_limited);
5530 for (i = 0; i < n_outs; ++i) {
5531 arch_register_req_t const *const new_req
5532 = arch_get_out_register_req(new_call, i);
5534 if (!(new_req->type & arch_register_req_type_limited) ||
5535 new_req->cls != req->cls ||
5536 *new_req->limited != *req->limited)
5545 res = new_rd_Proj(dbgi, new_call, mode, proj);
5547 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5549 case pn_ia32_Call_stack:
5550 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5553 case pn_ia32_Call_fpcw:
5554 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5562 * Transform the Projs from a Cmp.
5564 static ir_node *gen_Proj_Cmp(ir_node *node)
5566 /* this probably means not all mode_b nodes were lowered... */
5567 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5572 * Transform the Projs from a Bound.
5574 static ir_node *gen_Proj_Bound(ir_node *node)
5577 ir_node *pred = get_Proj_pred(node);
5579 switch (get_Proj_proj(node)) {
5581 return be_transform_node(get_Bound_mem(pred));
5582 case pn_Bound_X_regular:
5583 new_node = be_transform_node(pred);
5584 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_true);
5585 case pn_Bound_X_except:
5586 new_node = be_transform_node(pred);
5587 return new_r_Proj(new_node, mode_X, pn_ia32_Jcc_false);
5589 return be_transform_node(get_Bound_index(pred));
5591 panic("unsupported Proj from Bound");
5595 static ir_node *gen_Proj_ASM(ir_node *node)
5597 ir_mode *mode = get_irn_mode(node);
5598 ir_node *pred = get_Proj_pred(node);
5599 ir_node *new_pred = be_transform_node(pred);
5600 long pos = get_Proj_proj(node);
5602 if (mode == mode_M) {
5603 pos = arch_irn_get_n_outs(new_pred)-1;
5604 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5606 } else if (mode_is_float(mode)) {
5609 panic("unexpected proj mode at ASM");
5612 return new_r_Proj(new_pred, mode, pos);
5616 * Transform and potentially renumber Proj nodes.
5618 static ir_node *gen_Proj(ir_node *node)
5620 ir_node *pred = get_Proj_pred(node);
5623 switch (get_irn_opcode(pred)) {
5625 proj = get_Proj_proj(node);
5626 if (proj == pn_Store_M) {
5627 return be_transform_node(pred);
5629 panic("No idea how to transform proj->Store");
5632 return gen_Proj_Load(node);
5634 return gen_Proj_ASM(node);
5636 return gen_Proj_Builtin(node);
5640 return gen_Proj_DivMod(node);
5642 return gen_Proj_CopyB(node);
5644 return gen_Proj_Quot(node);
5646 return gen_Proj_be_SubSP(node);
5648 return gen_Proj_be_AddSP(node);
5650 return gen_Proj_be_Call(node);
5652 return gen_Proj_Cmp(node);
5654 return gen_Proj_Bound(node);
5656 proj = get_Proj_proj(node);
5658 case pn_Start_X_initial_exec: {
5659 ir_node *block = get_nodes_block(pred);
5660 ir_node *new_block = be_transform_node(block);
5661 dbg_info *dbgi = get_irn_dbg_info(node);
5662 /* we exchange the ProjX with a jump */
5663 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5668 case pn_Start_P_tls:
5669 return gen_Proj_tls(node);
5674 if (is_ia32_l_FloattoLL(pred)) {
5675 return gen_Proj_l_FloattoLL(node);
5677 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5681 ir_mode *mode = get_irn_mode(node);
5682 if (ia32_mode_needs_gp_reg(mode)) {
5683 ir_node *new_pred = be_transform_node(pred);
5684 ir_node *new_proj = new_r_Proj(new_pred, mode_Iu,
5685 get_Proj_proj(node));
5686 new_proj->node_nr = node->node_nr;
5691 return be_duplicate_node(node);
5695 * Enters all transform functions into the generic pointer
5697 static void register_transformers(void)
5699 /* first clear the generic function pointer for all ops */
5700 be_start_transform_setup();
5702 be_set_transform_function(op_Abs, gen_Abs);
5703 be_set_transform_function(op_Add, gen_Add);
5704 be_set_transform_function(op_And, gen_And);
5705 be_set_transform_function(op_ASM, gen_ASM);
5706 be_set_transform_function(op_be_AddSP, gen_be_AddSP);
5707 be_set_transform_function(op_be_Call, gen_be_Call);
5708 be_set_transform_function(op_be_Copy, gen_be_Copy);
5709 be_set_transform_function(op_be_FrameAddr, gen_be_FrameAddr);
5710 be_set_transform_function(op_be_IncSP, gen_be_IncSP);
5711 be_set_transform_function(op_be_Return, gen_be_Return);
5712 be_set_transform_function(op_be_SubSP, gen_be_SubSP);
5713 be_set_transform_function(op_Bound, gen_Bound);
5714 be_set_transform_function(op_Builtin, gen_Builtin);
5715 be_set_transform_function(op_Cmp, gen_Cmp);
5716 be_set_transform_function(op_Cond, gen_Cond);
5717 be_set_transform_function(op_Const, gen_Const);
5718 be_set_transform_function(op_Conv, gen_Conv);
5719 be_set_transform_function(op_CopyB, gen_CopyB);
5720 be_set_transform_function(op_Div, gen_Div);
5721 be_set_transform_function(op_DivMod, gen_DivMod);
5722 be_set_transform_function(op_Eor, gen_Eor);
5723 be_set_transform_function(op_ia32_l_Adc, gen_ia32_l_Adc);
5724 be_set_transform_function(op_ia32_l_Add, gen_ia32_l_Add);
5725 be_set_transform_function(op_ia32_Leave, be_duplicate_node);
5726 be_set_transform_function(op_ia32_l_FloattoLL, gen_ia32_l_FloattoLL);
5727 be_set_transform_function(op_ia32_l_IMul, gen_ia32_l_IMul);
5728 be_set_transform_function(op_ia32_l_LLtoFloat, gen_ia32_l_LLtoFloat);
5729 be_set_transform_function(op_ia32_l_Mul, gen_ia32_l_Mul);
5730 be_set_transform_function(op_ia32_l_SarDep, gen_ia32_l_SarDep);
5731 be_set_transform_function(op_ia32_l_Sbb, gen_ia32_l_Sbb);
5732 be_set_transform_function(op_ia32_l_ShlDep, gen_ia32_l_ShlDep);
5733 be_set_transform_function(op_ia32_l_ShlD, gen_ia32_l_ShlD);
5734 be_set_transform_function(op_ia32_l_ShrDep, gen_ia32_l_ShrDep);
5735 be_set_transform_function(op_ia32_l_ShrD, gen_ia32_l_ShrD);
5736 be_set_transform_function(op_ia32_l_Sub, gen_ia32_l_Sub);
5737 be_set_transform_function(op_ia32_GetEIP, be_duplicate_node);
5738 be_set_transform_function(op_ia32_Minus64Bit, be_duplicate_node);
5739 be_set_transform_function(op_ia32_NoReg_GP, be_duplicate_node);
5740 be_set_transform_function(op_ia32_NoReg_VFP, be_duplicate_node);
5741 be_set_transform_function(op_ia32_NoReg_XMM, be_duplicate_node);
5742 be_set_transform_function(op_ia32_PopEbp, be_duplicate_node);
5743 be_set_transform_function(op_ia32_Push, be_duplicate_node);
5744 be_set_transform_function(op_IJmp, gen_IJmp);
5745 be_set_transform_function(op_Jmp, gen_Jmp);
5746 be_set_transform_function(op_Load, gen_Load);
5747 be_set_transform_function(op_Minus, gen_Minus);
5748 be_set_transform_function(op_Mod, gen_Mod);
5749 be_set_transform_function(op_Mul, gen_Mul);
5750 be_set_transform_function(op_Mulh, gen_Mulh);
5751 be_set_transform_function(op_Mux, gen_Mux);
5752 be_set_transform_function(op_Not, gen_Not);
5753 be_set_transform_function(op_Or, gen_Or);
5754 be_set_transform_function(op_Phi, gen_Phi);
5755 be_set_transform_function(op_Proj, gen_Proj);
5756 be_set_transform_function(op_Quot, gen_Quot);
5757 be_set_transform_function(op_Rotl, gen_Rotl);
5758 be_set_transform_function(op_Shl, gen_Shl);
5759 be_set_transform_function(op_Shr, gen_Shr);
5760 be_set_transform_function(op_Shrs, gen_Shrs);
5761 be_set_transform_function(op_Store, gen_Store);
5762 be_set_transform_function(op_Sub, gen_Sub);
5763 be_set_transform_function(op_SymConst, gen_SymConst);
5764 be_set_transform_function(op_Unknown, gen_Unknown);
5768 * Pre-transform all unknown and noreg nodes.
5770 static void ia32_pretransform_node(void)
5772 ia32_code_gen_t *cg = env_cg;
5774 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5775 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5776 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5778 nomem = get_irg_no_mem(current_ir_graph);
5779 noreg_GP = ia32_new_NoReg_gp(cg);
5785 * Post-process all calls if we are in SSE mode.
5786 * The ABI requires that the results are in st0, copy them
5787 * to a xmm register.
5789 static void postprocess_fp_call_results(void)
5793 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5794 ir_node *call = call_list[i];
5795 ir_type *mtp = call_types[i];
5798 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5799 ir_type *res_tp = get_method_res_type(mtp, j);
5800 ir_node *res, *new_res;
5801 const ir_edge_t *edge, *next;
5804 if (! is_atomic_type(res_tp)) {
5805 /* no floating point return */
5808 mode = get_type_mode(res_tp);
5809 if (! mode_is_float(mode)) {
5810 /* no floating point return */
5814 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5817 /* now patch the users */
5818 foreach_out_edge_safe(res, edge, next) {
5819 ir_node *succ = get_edge_src_irn(edge);
5822 if (be_is_Keep(succ))
5825 if (is_ia32_xStore(succ)) {
5826 /* an xStore can be patched into an vfst */
5827 dbg_info *db = get_irn_dbg_info(succ);
5828 ir_node *block = get_nodes_block(succ);
5829 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5830 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5831 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5832 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5833 ir_mode *mode = get_ia32_ls_mode(succ);
5835 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5836 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5837 if (is_ia32_use_frame(succ))
5838 set_ia32_use_frame(st);
5839 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5840 set_irn_pinned(st, get_irn_pinned(succ));
5841 set_ia32_op_type(st, ia32_AddrModeD);
5845 if (new_res == NULL) {
5846 dbg_info *db = get_irn_dbg_info(call);
5847 ir_node *block = get_nodes_block(call);
5848 ir_node *frame = get_irg_frame(current_ir_graph);
5849 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5850 ir_node *call_mem = new_r_Proj(call, mode_M, pn_ia32_Call_M);
5851 ir_node *vfst, *xld, *new_mem;
5853 /* store st(0) on stack */
5854 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5855 set_ia32_op_type(vfst, ia32_AddrModeD);
5856 set_ia32_use_frame(vfst);
5858 /* load into SSE register */
5859 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5860 set_ia32_op_type(xld, ia32_AddrModeS);
5861 set_ia32_use_frame(xld);
5863 new_res = new_r_Proj(xld, mode, pn_ia32_xLoad_res);
5864 new_mem = new_r_Proj(xld, mode_M, pn_ia32_xLoad_M);
5866 if (old_mem != NULL) {
5867 edges_reroute(old_mem, new_mem, current_ir_graph);
5871 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5878 /* do the transformation */
5879 void ia32_transform_graph(ia32_code_gen_t *cg)
5883 register_transformers();
5885 initial_fpcw = NULL;
5888 be_timer_push(T_HEIGHTS);
5889 heights = heights_new(cg->irg);
5890 be_timer_pop(T_HEIGHTS);
5891 ia32_calculate_non_address_mode_nodes(cg->irg);
5893 /* the transform phase is not safe for CSE (yet) because several nodes get
5894 * attributes set after their creation */
5895 cse_last = get_opt_cse();
5898 call_list = NEW_ARR_F(ir_node *, 0);
5899 call_types = NEW_ARR_F(ir_type *, 0);
5900 be_transform_graph(cg->irg, ia32_pretransform_node);
5902 if (ia32_cg_config.use_sse2)
5903 postprocess_fp_call_results();
5904 DEL_ARR_F(call_types);
5905 DEL_ARR_F(call_list);
5907 set_opt_cse(cse_last);
5909 ia32_free_non_address_mode_nodes();
5910 heights_free(heights);
5914 void ia32_init_transform(void)
5916 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");