2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
292 floatent = create_float_const_entity(node);
293 /* create_float_const_ent is smart and sometimes creates
295 ls_mode = get_type_mode(get_entity_type(floatent));
297 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
299 set_ia32_op_type(load, ia32_AddrModeS);
300 set_ia32_am_sc(load, floatent);
301 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
302 res = new_r_Proj(block, load, mode_vfp, pn_ia32_vfld_res);
305 #ifdef CONSTRUCT_SSE_CONST
307 #endif /* CONSTRUCT_SSE_CONST */
308 SET_IA32_ORIG_NODE(load, node);
310 be_dep_on_frame(load);
312 } else { /* non-float mode */
314 tarval *tv = get_Const_tarval(node);
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
328 be_dep_on_frame(cnst);
334 * Transforms a SymConst.
336 static ir_node *gen_SymConst(ir_node *node)
338 ir_node *old_block = get_nodes_block(node);
339 ir_node *block = be_transform_node(old_block);
340 dbg_info *dbgi = get_irn_dbg_info(node);
341 ir_mode *mode = get_irn_mode(node);
344 if (mode_is_float(mode)) {
345 if (ia32_cg_config.use_sse2)
346 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if (get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
361 SET_IA32_ORIG_NODE(cnst, node);
363 be_dep_on_frame(cnst);
368 * Create a float type for the given mode and cache it.
370 * @param mode the mode for the float type (might be integer mode for SSE2 types)
371 * @param align alignment
373 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
379 if (mode == mode_Iu) {
380 static ir_type *int_Iu[16] = {NULL, };
382 if (int_Iu[align] == NULL) {
383 int_Iu[align] = tp = new_type_primitive(mode);
384 /* set the specified alignment */
385 set_type_alignment_bytes(tp, align);
387 return int_Iu[align];
388 } else if (mode == mode_Lu) {
389 static ir_type *int_Lu[16] = {NULL, };
391 if (int_Lu[align] == NULL) {
392 int_Lu[align] = tp = new_type_primitive(mode);
393 /* set the specified alignment */
394 set_type_alignment_bytes(tp, align);
396 return int_Lu[align];
397 } else if (mode == mode_F) {
398 static ir_type *float_F[16] = {NULL, };
400 if (float_F[align] == NULL) {
401 float_F[align] = tp = new_type_primitive(mode);
402 /* set the specified alignment */
403 set_type_alignment_bytes(tp, align);
405 return float_F[align];
406 } else if (mode == mode_D) {
407 static ir_type *float_D[16] = {NULL, };
409 if (float_D[align] == NULL) {
410 float_D[align] = tp = new_type_primitive(mode);
411 /* set the specified alignment */
412 set_type_alignment_bytes(tp, align);
414 return float_D[align];
416 static ir_type *float_E[16] = {NULL, };
418 if (float_E[align] == NULL) {
419 float_E[align] = tp = new_type_primitive(mode);
420 /* set the specified alignment */
421 set_type_alignment_bytes(tp, align);
423 return float_E[align];
428 * Create a float[2] array type for the given atomic type.
430 * @param tp the atomic type
432 static ir_type *ia32_create_float_array(ir_type *tp)
434 ir_mode *mode = get_type_mode(tp);
435 unsigned align = get_type_alignment_bytes(tp);
440 if (mode == mode_F) {
441 static ir_type *float_F[16] = {NULL, };
443 if (float_F[align] != NULL)
444 return float_F[align];
445 arr = float_F[align] = new_type_array(1, tp);
446 } else if (mode == mode_D) {
447 static ir_type *float_D[16] = {NULL, };
449 if (float_D[align] != NULL)
450 return float_D[align];
451 arr = float_D[align] = new_type_array(1, tp);
453 static ir_type *float_E[16] = {NULL, };
455 if (float_E[align] != NULL)
456 return float_E[align];
457 arr = float_E[align] = new_type_array(1, tp);
459 set_type_alignment_bytes(arr, align);
460 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
461 set_type_state(arr, layout_fixed);
465 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
466 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
468 static const struct {
469 const char *ent_name;
470 const char *cnst_str;
473 } names [ia32_known_const_max] = {
474 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
475 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
476 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
477 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
478 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
480 static ir_entity *ent_cache[ia32_known_const_max];
482 const char *ent_name, *cnst_str;
488 ent_name = names[kct].ent_name;
489 if (! ent_cache[kct]) {
490 cnst_str = names[kct].cnst_str;
492 switch (names[kct].mode) {
493 case 0: mode = mode_Iu; break;
494 case 1: mode = mode_Lu; break;
495 default: mode = mode_F; break;
497 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
498 tp = ia32_create_float_type(mode, names[kct].align);
500 if (kct == ia32_ULLBIAS)
501 tp = ia32_create_float_array(tp);
502 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
504 set_entity_ld_ident(ent, get_entity_ident(ent));
505 set_entity_visibility(ent, visibility_local);
506 set_entity_variability(ent, variability_constant);
507 set_entity_allocation(ent, allocation_static);
509 if (kct == ia32_ULLBIAS) {
510 ir_initializer_t *initializer = create_initializer_compound(2);
512 set_initializer_compound_value(initializer, 0,
513 create_initializer_tarval(get_tarval_null(mode)));
514 set_initializer_compound_value(initializer, 1,
515 create_initializer_tarval(tv));
517 set_entity_initializer(ent, initializer);
519 set_entity_initializer(ent, create_initializer_tarval(tv));
522 /* cache the entry */
523 ent_cache[kct] = ent;
526 return ent_cache[kct];
530 * return true if the node is a Proj(Load) and could be used in source address
531 * mode for another node. Will return only true if the @p other node is not
532 * dependent on the memory of the Load (for binary operations use the other
533 * input here, for unary operations use NULL).
535 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
536 ir_node *other, ir_node *other2, match_flags_t flags)
541 /* float constants are always available */
542 if (is_Const(node)) {
543 ir_mode *mode = get_irn_mode(node);
544 if (mode_is_float(mode)) {
545 if (ia32_cg_config.use_sse2) {
546 if (is_simple_sse_Const(node))
549 if (is_simple_x87_Const(node))
552 if (get_irn_n_edges(node) > 1)
560 load = get_Proj_pred(node);
561 pn = get_Proj_proj(node);
562 if (!is_Load(load) || pn != pn_Load_res)
564 if (get_nodes_block(load) != block)
566 /* we only use address mode if we're the only user of the load */
567 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
569 /* in some edge cases with address mode we might reach the load normally
570 * and through some AM sequence, if it is already materialized then we
571 * can't create an AM node from it */
572 if (be_is_transformed(node))
575 /* don't do AM if other node inputs depend on the load (via mem-proj) */
576 if (other != NULL && prevents_AM(block, load, other))
579 if (other2 != NULL && prevents_AM(block, load, other2))
585 typedef struct ia32_address_mode_t ia32_address_mode_t;
586 struct ia32_address_mode_t {
591 ia32_op_type_t op_type;
595 unsigned commutative : 1;
596 unsigned ins_permuted : 1;
599 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
601 /* construct load address */
602 memset(addr, 0, sizeof(addr[0]));
603 ia32_create_address_mode(addr, ptr, 0);
605 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
606 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
607 addr->mem = be_transform_node(mem);
610 static void build_address(ia32_address_mode_t *am, ir_node *node,
611 ia32_create_am_flags_t flags)
613 ia32_address_t *addr = &am->addr;
619 if (is_Const(node)) {
620 ir_entity *entity = create_float_const_entity(node);
621 addr->base = noreg_GP;
622 addr->index = noreg_GP;
624 addr->symconst_ent = entity;
626 am->ls_mode = get_type_mode(get_entity_type(entity));
627 am->pinned = op_pin_state_floats;
631 load = get_Proj_pred(node);
632 ptr = get_Load_ptr(load);
633 mem = get_Load_mem(load);
634 new_mem = be_transform_node(mem);
635 am->pinned = get_irn_pinned(load);
636 am->ls_mode = get_Load_mode(load);
637 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
640 /* construct load address */
641 ia32_create_address_mode(addr, ptr, flags);
643 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
644 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
648 static void set_address(ir_node *node, const ia32_address_t *addr)
650 set_ia32_am_scale(node, addr->scale);
651 set_ia32_am_sc(node, addr->symconst_ent);
652 set_ia32_am_offs_int(node, addr->offset);
653 if (addr->symconst_sign)
654 set_ia32_am_sc_sign(node);
656 set_ia32_use_frame(node);
657 set_ia32_frame_ent(node, addr->frame_entity);
661 * Apply attributes of a given address mode to a node.
663 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
665 set_address(node, &am->addr);
667 set_ia32_op_type(node, am->op_type);
668 set_ia32_ls_mode(node, am->ls_mode);
669 if (am->pinned == op_pin_state_pinned) {
670 /* beware: some nodes are already pinned and did not allow to change the state */
671 if (get_irn_pinned(node) != op_pin_state_pinned)
672 set_irn_pinned(node, op_pin_state_pinned);
675 set_ia32_commutative(node);
679 * Check, if a given node is a Down-Conv, ie. a integer Conv
680 * from a mode with a mode with more bits to a mode with lesser bits.
681 * Moreover, we return only true if the node has not more than 1 user.
683 * @param node the node
684 * @return non-zero if node is a Down-Conv
686 static int is_downconv(const ir_node *node)
694 /* we only want to skip the conv when we're the only user
695 * (not optimal but for now...)
697 if (get_irn_n_edges(node) > 1)
700 src_mode = get_irn_mode(get_Conv_op(node));
701 dest_mode = get_irn_mode(node);
703 ia32_mode_needs_gp_reg(src_mode) &&
704 ia32_mode_needs_gp_reg(dest_mode) &&
705 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
708 /* Skip all Down-Conv's on a given node and return the resulting node. */
709 ir_node *ia32_skip_downconv(ir_node *node)
711 while (is_downconv(node))
712 node = get_Conv_op(node);
717 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
719 ir_mode *mode = get_irn_mode(node);
724 if (mode_is_signed(mode)) {
729 block = get_nodes_block(node);
730 dbgi = get_irn_dbg_info(node);
732 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
736 * matches operands of a node into ia32 addressing/operand modes. This covers
737 * usage of source address mode, immediates, operations with non 32-bit modes,
739 * The resulting data is filled into the @p am struct. block is the block
740 * of the node whose arguments are matched. op1, op2 are the first and second
741 * input that are matched (op1 may be NULL). other_op is another unrelated
742 * input that is not matched! but which is needed sometimes to check if AM
743 * for op1/op2 is legal.
744 * @p flags describes the supported modes of the operation in detail.
746 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
747 ir_node *op1, ir_node *op2, ir_node *other_op,
750 ia32_address_t *addr = &am->addr;
751 ir_mode *mode = get_irn_mode(op2);
752 int mode_bits = get_mode_size_bits(mode);
753 ir_node *new_op1, *new_op2;
755 unsigned commutative;
756 int use_am_and_immediates;
759 memset(am, 0, sizeof(am[0]));
761 commutative = (flags & match_commutative) != 0;
762 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
763 use_am = (flags & match_am) != 0;
764 use_immediate = (flags & match_immediate) != 0;
765 assert(!use_am_and_immediates || use_immediate);
768 assert(!commutative || op1 != NULL);
769 assert(use_am || !(flags & match_8bit_am));
770 assert(use_am || !(flags & match_16bit_am));
772 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
773 (mode_bits == 16 && !(flags & match_16bit_am))) {
777 /* we can simply skip downconvs for mode neutral nodes: the upper bits
778 * can be random for these operations */
779 if (flags & match_mode_neutral) {
780 op2 = ia32_skip_downconv(op2);
782 op1 = ia32_skip_downconv(op1);
786 /* match immediates. firm nodes are normalized: constants are always on the
789 if (!(flags & match_try_am) && use_immediate) {
790 new_op2 = try_create_Immediate(op2, 0);
793 if (new_op2 == NULL &&
794 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
795 build_address(am, op2, 0);
796 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
797 if (mode_is_float(mode)) {
798 new_op2 = ia32_new_NoReg_vfp(env_cg);
802 am->op_type = ia32_AddrModeS;
803 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
805 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
807 build_address(am, op1, 0);
809 if (mode_is_float(mode)) {
810 noreg = ia32_new_NoReg_vfp(env_cg);
815 if (new_op2 != NULL) {
818 new_op1 = be_transform_node(op2);
820 am->ins_permuted = 1;
822 am->op_type = ia32_AddrModeS;
825 am->op_type = ia32_Normal;
827 if (flags & match_try_am) {
833 mode = get_irn_mode(op2);
834 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
835 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
837 new_op2 = create_upconv(op2, NULL);
838 am->ls_mode = mode_Iu;
840 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
842 new_op2 = be_transform_node(op2);
843 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
846 if (addr->base == NULL)
847 addr->base = noreg_GP;
848 if (addr->index == NULL)
849 addr->index = noreg_GP;
850 if (addr->mem == NULL)
853 am->new_op1 = new_op1;
854 am->new_op2 = new_op2;
855 am->commutative = commutative;
859 * "Fixes" a node that uses address mode by turning it into mode_T
860 * and returning a pn_ia32_res Proj.
862 * @param node the node
863 * @param am its address mode
865 * @return a Proj(pn_ia32_res) if a memory address mode is used,
868 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
873 if (am->mem_proj == NULL)
876 /* we have to create a mode_T so the old MemProj can attach to us */
877 mode = get_irn_mode(node);
878 load = get_Proj_pred(am->mem_proj);
880 be_set_transformed_node(load, node);
882 if (mode != mode_T) {
883 set_irn_mode(node, mode_T);
884 return new_rd_Proj(NULL, get_nodes_block(node), node, mode, pn_ia32_res);
891 * Construct a standard binary operation, set AM and immediate if required.
893 * @param node The original node for which the binop is created
894 * @param op1 The first operand
895 * @param op2 The second operand
896 * @param func The node constructor function
897 * @return The constructed ia32 node.
899 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
900 construct_binop_func *func, match_flags_t flags)
903 ir_node *block, *new_block, *new_node;
904 ia32_address_mode_t am;
905 ia32_address_t *addr = &am.addr;
907 block = get_nodes_block(node);
908 match_arguments(&am, block, op1, op2, NULL, flags);
910 dbgi = get_irn_dbg_info(node);
911 new_block = be_transform_node(block);
912 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
913 am.new_op1, am.new_op2);
914 set_am_attributes(new_node, &am);
915 /* we can't use source address mode anymore when using immediates */
916 if (!(flags & match_am_and_immediates) &&
917 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
918 set_ia32_am_support(new_node, ia32_am_none);
919 SET_IA32_ORIG_NODE(new_node, node);
921 new_node = fix_mem_proj(new_node, &am);
927 * Generic names for the inputs of an ia32 binary op.
930 n_ia32_l_binop_left, /**< ia32 left input */
931 n_ia32_l_binop_right, /**< ia32 right input */
932 n_ia32_l_binop_eflags /**< ia32 eflags input */
934 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
935 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
936 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
937 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
938 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
939 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
942 * Construct a binary operation which also consumes the eflags.
944 * @param node The node to transform
945 * @param func The node constructor function
946 * @param flags The match flags
947 * @return The constructor ia32 node
949 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
952 ir_node *src_block = get_nodes_block(node);
953 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
954 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
955 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
957 ir_node *block, *new_node, *new_eflags;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 match_arguments(&am, src_block, op1, op2, eflags, flags);
963 dbgi = get_irn_dbg_info(node);
964 block = be_transform_node(src_block);
965 new_eflags = be_transform_node(eflags);
966 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2, new_eflags);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
980 static ir_node *get_fpcw(void)
983 if (initial_fpcw != NULL)
986 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
987 &ia32_fp_cw_regs[REG_FPCW]);
988 initial_fpcw = be_transform_node(fpcw);
994 * Construct a standard binary operation, set AM and immediate if required.
996 * @param op1 The first operand
997 * @param op2 The second operand
998 * @param func The node constructor function
999 * @return The constructed ia32 node.
1001 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1002 construct_binop_float_func *func)
1004 ir_mode *mode = get_irn_mode(node);
1006 ir_node *block, *new_block, *new_node;
1007 ia32_address_mode_t am;
1008 ia32_address_t *addr = &am.addr;
1009 ia32_x87_attr_t *attr;
1010 /* All operations are considered commutative, because there are reverse
1012 match_flags_t flags = match_commutative;
1014 /* happens for div nodes... */
1016 mode = get_divop_resmod(node);
1018 /* cannot use address mode with long double on x87 */
1019 if (get_mode_size_bits(mode) <= 64)
1022 block = get_nodes_block(node);
1023 match_arguments(&am, block, op1, op2, NULL, flags);
1025 dbgi = get_irn_dbg_info(node);
1026 new_block = be_transform_node(block);
1027 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1028 am.new_op1, am.new_op2, get_fpcw());
1029 set_am_attributes(new_node, &am);
1031 attr = get_ia32_x87_attr(new_node);
1032 attr->attr.data.ins_permuted = am.ins_permuted;
1034 SET_IA32_ORIG_NODE(new_node, node);
1036 new_node = fix_mem_proj(new_node, &am);
1042 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1044 * @param op1 The first operand
1045 * @param op2 The second operand
1046 * @param func The node constructor function
1047 * @return The constructed ia32 node.
1049 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1050 construct_shift_func *func,
1051 match_flags_t flags)
1054 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1056 assert(! mode_is_float(get_irn_mode(node)));
1057 assert(flags & match_immediate);
1058 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1060 if (flags & match_mode_neutral) {
1061 op1 = ia32_skip_downconv(op1);
1062 new_op1 = be_transform_node(op1);
1063 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1064 new_op1 = create_upconv(op1, node);
1066 new_op1 = be_transform_node(op1);
1069 /* the shift amount can be any mode that is bigger than 5 bits, since all
1070 * other bits are ignored anyway */
1071 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1072 ir_node *const op = get_Conv_op(op2);
1073 if (mode_is_float(get_irn_mode(op)))
1076 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1078 new_op2 = create_immediate_or_transform(op2, 0);
1080 dbgi = get_irn_dbg_info(node);
1081 block = get_nodes_block(node);
1082 new_block = be_transform_node(block);
1083 new_node = func(dbgi, new_block, new_op1, new_op2);
1084 SET_IA32_ORIG_NODE(new_node, node);
1086 /* lowered shift instruction may have a dependency operand, handle it here */
1087 if (get_irn_arity(node) == 3) {
1088 /* we have a dependency */
1089 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1090 add_irn_dep(new_node, new_dep);
1098 * Construct a standard unary operation, set AM and immediate if required.
1100 * @param op The operand
1101 * @param func The node constructor function
1102 * @return The constructed ia32 node.
1104 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1105 match_flags_t flags)
1108 ir_node *block, *new_block, *new_op, *new_node;
1110 assert(flags == 0 || flags == match_mode_neutral);
1111 if (flags & match_mode_neutral) {
1112 op = ia32_skip_downconv(op);
1115 new_op = be_transform_node(op);
1116 dbgi = get_irn_dbg_info(node);
1117 block = get_nodes_block(node);
1118 new_block = be_transform_node(block);
1119 new_node = func(dbgi, new_block, new_op);
1121 SET_IA32_ORIG_NODE(new_node, node);
1126 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1127 ia32_address_t *addr)
1129 ir_node *base, *index, *res;
1135 base = be_transform_node(base);
1138 index = addr->index;
1139 if (index == NULL) {
1142 index = be_transform_node(index);
1145 res = new_bd_ia32_Lea(dbgi, block, base, index);
1146 set_address(res, addr);
1152 * Returns non-zero if a given address mode has a symbolic or
1153 * numerical offset != 0.
1155 static int am_has_immediates(const ia32_address_t *addr)
1157 return addr->offset != 0 || addr->symconst_ent != NULL
1158 || addr->frame_entity || addr->use_frame;
1162 * Creates an ia32 Add.
1164 * @return the created ia32 Add node
1166 static ir_node *gen_Add(ir_node *node)
1168 ir_mode *mode = get_irn_mode(node);
1169 ir_node *op1 = get_Add_left(node);
1170 ir_node *op2 = get_Add_right(node);
1172 ir_node *block, *new_block, *new_node, *add_immediate_op;
1173 ia32_address_t addr;
1174 ia32_address_mode_t am;
1176 if (mode_is_float(mode)) {
1177 if (ia32_cg_config.use_sse2)
1178 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1179 match_commutative | match_am);
1181 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1184 ia32_mark_non_am(node);
1186 op2 = ia32_skip_downconv(op2);
1187 op1 = ia32_skip_downconv(op1);
1191 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1192 * 1. Add with immediate -> Lea
1193 * 2. Add with possible source address mode -> Add
1194 * 3. Otherwise -> Lea
1196 memset(&addr, 0, sizeof(addr));
1197 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1198 add_immediate_op = NULL;
1200 dbgi = get_irn_dbg_info(node);
1201 block = get_nodes_block(node);
1202 new_block = be_transform_node(block);
1205 if (addr.base == NULL && addr.index == NULL) {
1206 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1207 addr.symconst_sign, 0, addr.offset);
1208 be_dep_on_frame(new_node);
1209 SET_IA32_ORIG_NODE(new_node, node);
1212 /* add with immediate? */
1213 if (addr.index == NULL) {
1214 add_immediate_op = addr.base;
1215 } else if (addr.base == NULL && addr.scale == 0) {
1216 add_immediate_op = addr.index;
1219 if (add_immediate_op != NULL) {
1220 if (!am_has_immediates(&addr)) {
1221 #ifdef DEBUG_libfirm
1222 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1225 return be_transform_node(add_immediate_op);
1228 new_node = create_lea_from_address(dbgi, new_block, &addr);
1229 SET_IA32_ORIG_NODE(new_node, node);
1233 /* test if we can use source address mode */
1234 match_arguments(&am, block, op1, op2, NULL, match_commutative
1235 | match_mode_neutral | match_am | match_immediate | match_try_am);
1237 /* construct an Add with source address mode */
1238 if (am.op_type == ia32_AddrModeS) {
1239 ia32_address_t *am_addr = &am.addr;
1240 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1241 am_addr->index, am_addr->mem, am.new_op1,
1243 set_am_attributes(new_node, &am);
1244 SET_IA32_ORIG_NODE(new_node, node);
1246 new_node = fix_mem_proj(new_node, &am);
1251 /* otherwise construct a lea */
1252 new_node = create_lea_from_address(dbgi, new_block, &addr);
1253 SET_IA32_ORIG_NODE(new_node, node);
1258 * Creates an ia32 Mul.
1260 * @return the created ia32 Mul node
1262 static ir_node *gen_Mul(ir_node *node)
1264 ir_node *op1 = get_Mul_left(node);
1265 ir_node *op2 = get_Mul_right(node);
1266 ir_mode *mode = get_irn_mode(node);
1268 if (mode_is_float(mode)) {
1269 if (ia32_cg_config.use_sse2)
1270 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1271 match_commutative | match_am);
1273 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1275 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1276 match_commutative | match_am | match_mode_neutral |
1277 match_immediate | match_am_and_immediates);
1281 * Creates an ia32 Mulh.
1282 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1283 * this result while Mul returns the lower 32 bit.
1285 * @return the created ia32 Mulh node
1287 static ir_node *gen_Mulh(ir_node *node)
1289 ir_node *block = get_nodes_block(node);
1290 ir_node *new_block = be_transform_node(block);
1291 dbg_info *dbgi = get_irn_dbg_info(node);
1292 ir_node *op1 = get_Mulh_left(node);
1293 ir_node *op2 = get_Mulh_right(node);
1294 ir_mode *mode = get_irn_mode(node);
1296 ir_node *proj_res_high;
1298 if (get_mode_size_bits(mode) != 32) {
1299 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1302 if (mode_is_signed(mode)) {
1303 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1304 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1306 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1307 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_Mul_res_high);
1309 return proj_res_high;
1313 * Creates an ia32 And.
1315 * @return The created ia32 And node
1317 static ir_node *gen_And(ir_node *node)
1319 ir_node *op1 = get_And_left(node);
1320 ir_node *op2 = get_And_right(node);
1321 assert(! mode_is_float(get_irn_mode(node)));
1323 /* is it a zero extension? */
1324 if (is_Const(op2)) {
1325 tarval *tv = get_Const_tarval(op2);
1326 long v = get_tarval_long(tv);
1328 if (v == 0xFF || v == 0xFFFF) {
1329 dbg_info *dbgi = get_irn_dbg_info(node);
1330 ir_node *block = get_nodes_block(node);
1337 assert(v == 0xFFFF);
1340 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1345 return gen_binop(node, op1, op2, new_bd_ia32_And,
1346 match_commutative | match_mode_neutral | match_am | match_immediate);
1352 * Creates an ia32 Or.
1354 * @return The created ia32 Or node
1356 static ir_node *gen_Or(ir_node *node)
1358 ir_node *op1 = get_Or_left(node);
1359 ir_node *op2 = get_Or_right(node);
1361 assert (! mode_is_float(get_irn_mode(node)));
1362 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1363 | match_mode_neutral | match_am | match_immediate);
1369 * Creates an ia32 Eor.
1371 * @return The created ia32 Eor node
1373 static ir_node *gen_Eor(ir_node *node)
1375 ir_node *op1 = get_Eor_left(node);
1376 ir_node *op2 = get_Eor_right(node);
1378 assert(! mode_is_float(get_irn_mode(node)));
1379 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1380 | match_mode_neutral | match_am | match_immediate);
1385 * Creates an ia32 Sub.
1387 * @return The created ia32 Sub node
1389 static ir_node *gen_Sub(ir_node *node)
1391 ir_node *op1 = get_Sub_left(node);
1392 ir_node *op2 = get_Sub_right(node);
1393 ir_mode *mode = get_irn_mode(node);
1395 if (mode_is_float(mode)) {
1396 if (ia32_cg_config.use_sse2)
1397 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1399 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1402 if (is_Const(op2)) {
1403 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1407 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1408 | match_am | match_immediate);
1411 static ir_node *transform_AM_mem(ir_node *const block,
1412 ir_node *const src_val,
1413 ir_node *const src_mem,
1414 ir_node *const am_mem)
1416 if (is_NoMem(am_mem)) {
1417 return be_transform_node(src_mem);
1418 } else if (is_Proj(src_val) &&
1420 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1421 /* avoid memory loop */
1423 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1424 ir_node *const ptr_pred = get_Proj_pred(src_val);
1425 int const arity = get_Sync_n_preds(src_mem);
1430 NEW_ARR_A(ir_node*, ins, arity + 1);
1432 /* NOTE: This sometimes produces dead-code because the old sync in
1433 * src_mem might not be used anymore, we should detect this case
1434 * and kill the sync... */
1435 for (i = arity - 1; i >= 0; --i) {
1436 ir_node *const pred = get_Sync_pred(src_mem, i);
1438 /* avoid memory loop */
1439 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1442 ins[n++] = be_transform_node(pred);
1447 return new_r_Sync(block, n, ins);
1451 ins[0] = be_transform_node(src_mem);
1453 return new_r_Sync(block, 2, ins);
1458 * Create a 32bit to 64bit signed extension.
1460 * @param dbgi debug info
1461 * @param block the block where node nodes should be placed
1462 * @param val the value to extend
1463 * @param orig the original node
1465 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1466 ir_node *val, const ir_node *orig)
1471 if (ia32_cg_config.use_short_sex_eax) {
1472 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1473 be_dep_on_frame(pval);
1474 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1476 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1477 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1479 SET_IA32_ORIG_NODE(res, orig);
1484 * Generates an ia32 DivMod with additional infrastructure for the
1485 * register allocator if needed.
1487 static ir_node *create_Div(ir_node *node)
1489 dbg_info *dbgi = get_irn_dbg_info(node);
1490 ir_node *block = get_nodes_block(node);
1491 ir_node *new_block = be_transform_node(block);
1498 ir_node *sign_extension;
1499 ia32_address_mode_t am;
1500 ia32_address_t *addr = &am.addr;
1502 /* the upper bits have random contents for smaller modes */
1503 switch (get_irn_opcode(node)) {
1505 op1 = get_Div_left(node);
1506 op2 = get_Div_right(node);
1507 mem = get_Div_mem(node);
1508 mode = get_Div_resmode(node);
1511 op1 = get_Mod_left(node);
1512 op2 = get_Mod_right(node);
1513 mem = get_Mod_mem(node);
1514 mode = get_Mod_resmode(node);
1517 op1 = get_DivMod_left(node);
1518 op2 = get_DivMod_right(node);
1519 mem = get_DivMod_mem(node);
1520 mode = get_DivMod_resmode(node);
1523 panic("invalid divmod node %+F", node);
1526 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1528 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1529 is the memory of the consumed address. We can have only the second op as address
1530 in Div nodes, so check only op2. */
1531 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1533 if (mode_is_signed(mode)) {
1534 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1535 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1536 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1538 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1539 be_dep_on_frame(sign_extension);
1541 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1542 addr->index, new_mem, am.new_op2,
1543 am.new_op1, sign_extension);
1546 set_irn_pinned(new_node, get_irn_pinned(node));
1548 set_am_attributes(new_node, &am);
1549 SET_IA32_ORIG_NODE(new_node, node);
1551 new_node = fix_mem_proj(new_node, &am);
1557 * Generates an ia32 Mod.
1559 static ir_node *gen_Mod(ir_node *node)
1561 return create_Div(node);
1565 * Generates an ia32 Div.
1567 static ir_node *gen_Div(ir_node *node)
1569 return create_Div(node);
1573 * Generates an ia32 DivMod.
1575 static ir_node *gen_DivMod(ir_node *node)
1577 return create_Div(node);
1583 * Creates an ia32 floating Div.
1585 * @return The created ia32 xDiv node
1587 static ir_node *gen_Quot(ir_node *node)
1589 ir_node *op1 = get_Quot_left(node);
1590 ir_node *op2 = get_Quot_right(node);
1592 if (ia32_cg_config.use_sse2) {
1593 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1595 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1601 * Creates an ia32 Shl.
1603 * @return The created ia32 Shl node
1605 static ir_node *gen_Shl(ir_node *node)
1607 ir_node *left = get_Shl_left(node);
1608 ir_node *right = get_Shl_right(node);
1610 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1611 match_mode_neutral | match_immediate);
1615 * Creates an ia32 Shr.
1617 * @return The created ia32 Shr node
1619 static ir_node *gen_Shr(ir_node *node)
1621 ir_node *left = get_Shr_left(node);
1622 ir_node *right = get_Shr_right(node);
1624 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1630 * Creates an ia32 Sar.
1632 * @return The created ia32 Shrs node
1634 static ir_node *gen_Shrs(ir_node *node)
1636 ir_node *left = get_Shrs_left(node);
1637 ir_node *right = get_Shrs_right(node);
1639 if (is_Const(right)) {
1640 tarval *tv = get_Const_tarval(right);
1641 long val = get_tarval_long(tv);
1643 /* this is a sign extension */
1644 dbg_info *dbgi = get_irn_dbg_info(node);
1645 ir_node *block = be_transform_node(get_nodes_block(node));
1646 ir_node *new_op = be_transform_node(left);
1648 return create_sex_32_64(dbgi, block, new_op, node);
1652 /* 8 or 16 bit sign extension? */
1653 if (is_Const(right) && is_Shl(left)) {
1654 ir_node *shl_left = get_Shl_left(left);
1655 ir_node *shl_right = get_Shl_right(left);
1656 if (is_Const(shl_right)) {
1657 tarval *tv1 = get_Const_tarval(right);
1658 tarval *tv2 = get_Const_tarval(shl_right);
1659 if (tv1 == tv2 && tarval_is_long(tv1)) {
1660 long val = get_tarval_long(tv1);
1661 if (val == 16 || val == 24) {
1662 dbg_info *dbgi = get_irn_dbg_info(node);
1663 ir_node *block = get_nodes_block(node);
1673 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1682 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1688 * Creates an ia32 Rol.
1690 * @param op1 The first operator
1691 * @param op2 The second operator
1692 * @return The created ia32 RotL node
1694 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1696 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1702 * Creates an ia32 Ror.
1703 * NOTE: There is no RotR with immediate because this would always be a RotL
1704 * "imm-mode_size_bits" which can be pre-calculated.
1706 * @param op1 The first operator
1707 * @param op2 The second operator
1708 * @return The created ia32 RotR node
1710 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1712 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1718 * Creates an ia32 RotR or RotL (depending on the found pattern).
1720 * @return The created ia32 RotL or RotR node
1722 static ir_node *gen_Rotl(ir_node *node)
1724 ir_node *rotate = NULL;
1725 ir_node *op1 = get_Rotl_left(node);
1726 ir_node *op2 = get_Rotl_right(node);
1728 /* Firm has only RotL, so we are looking for a right (op2)
1729 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1730 that means we can create a RotR instead of an Add and a RotL */
1734 ir_node *left = get_Add_left(add);
1735 ir_node *right = get_Add_right(add);
1736 if (is_Const(right)) {
1737 tarval *tv = get_Const_tarval(right);
1738 ir_mode *mode = get_irn_mode(node);
1739 long bits = get_mode_size_bits(mode);
1741 if (is_Minus(left) &&
1742 tarval_is_long(tv) &&
1743 get_tarval_long(tv) == bits &&
1746 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1747 rotate = gen_Ror(node, op1, get_Minus_op(left));
1752 if (rotate == NULL) {
1753 rotate = gen_Rol(node, op1, op2);
1762 * Transforms a Minus node.
1764 * @return The created ia32 Minus node
1766 static ir_node *gen_Minus(ir_node *node)
1768 ir_node *op = get_Minus_op(node);
1769 ir_node *block = be_transform_node(get_nodes_block(node));
1770 dbg_info *dbgi = get_irn_dbg_info(node);
1771 ir_mode *mode = get_irn_mode(node);
1776 if (mode_is_float(mode)) {
1777 ir_node *new_op = be_transform_node(op);
1778 if (ia32_cg_config.use_sse2) {
1779 /* TODO: non-optimal... if we have many xXors, then we should
1780 * rather create a load for the const and use that instead of
1781 * several AM nodes... */
1782 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1784 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1785 nomem, new_op, noreg_xmm);
1787 size = get_mode_size_bits(mode);
1788 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1790 set_ia32_am_sc(new_node, ent);
1791 set_ia32_op_type(new_node, ia32_AddrModeS);
1792 set_ia32_ls_mode(new_node, mode);
1794 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1797 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1800 SET_IA32_ORIG_NODE(new_node, node);
1806 * Transforms a Not node.
1808 * @return The created ia32 Not node
1810 static ir_node *gen_Not(ir_node *node)
1812 ir_node *op = get_Not_op(node);
1814 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1815 assert (! mode_is_float(get_irn_mode(node)));
1817 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1823 * Transforms an Abs node.
1825 * @return The created ia32 Abs node
1827 static ir_node *gen_Abs(ir_node *node)
1829 ir_node *block = get_nodes_block(node);
1830 ir_node *new_block = be_transform_node(block);
1831 ir_node *op = get_Abs_op(node);
1832 dbg_info *dbgi = get_irn_dbg_info(node);
1833 ir_mode *mode = get_irn_mode(node);
1839 if (mode_is_float(mode)) {
1840 new_op = be_transform_node(op);
1842 if (ia32_cg_config.use_sse2) {
1843 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1844 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1845 nomem, new_op, noreg_fp);
1847 size = get_mode_size_bits(mode);
1848 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1850 set_ia32_am_sc(new_node, ent);
1852 SET_IA32_ORIG_NODE(new_node, node);
1854 set_ia32_op_type(new_node, ia32_AddrModeS);
1855 set_ia32_ls_mode(new_node, mode);
1857 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1858 SET_IA32_ORIG_NODE(new_node, node);
1861 ir_node *xor, *sign_extension;
1863 if (get_mode_size_bits(mode) == 32) {
1864 new_op = be_transform_node(op);
1866 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1869 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1871 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1872 nomem, new_op, sign_extension);
1873 SET_IA32_ORIG_NODE(xor, node);
1875 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1876 nomem, xor, sign_extension);
1877 SET_IA32_ORIG_NODE(new_node, node);
1884 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1886 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1888 dbg_info *dbgi = get_irn_dbg_info(cmp);
1889 ir_node *block = get_nodes_block(cmp);
1890 ir_node *new_block = be_transform_node(block);
1891 ir_node *op1 = be_transform_node(x);
1892 ir_node *op2 = be_transform_node(n);
1894 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1898 * Transform a node returning a "flag" result.
1900 * @param node the node to transform
1901 * @param pnc_out the compare mode to use
1903 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1910 /* we have a Cmp as input */
1911 if (is_Proj(node)) {
1912 ir_node *pred = get_Proj_pred(node);
1914 pn_Cmp pnc = get_Proj_proj(node);
1915 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1916 ir_node *l = get_Cmp_left(pred);
1917 ir_node *r = get_Cmp_right(pred);
1919 ir_node *la = get_And_left(l);
1920 ir_node *ra = get_And_right(l);
1922 ir_node *c = get_Shl_left(la);
1923 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1924 /* (1 << n) & ra) */
1925 ir_node *n = get_Shl_right(la);
1926 flags = gen_bt(pred, ra, n);
1927 /* we must generate a Jc/Jnc jump */
1928 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1931 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1936 ir_node *c = get_Shl_left(ra);
1937 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1938 /* la & (1 << n)) */
1939 ir_node *n = get_Shl_right(ra);
1940 flags = gen_bt(pred, la, n);
1941 /* we must generate a Jc/Jnc jump */
1942 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1945 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1951 flags = be_transform_node(pred);
1953 if (mode_is_float(get_irn_mode(get_Cmp_left(pred))))
1954 *pnc_out |= ia32_pn_Cmp_float;
1959 /* a mode_b value, we have to compare it against 0 */
1960 dbgi = get_irn_dbg_info(node);
1961 new_block = be_transform_node(get_nodes_block(node));
1962 new_op = be_transform_node(node);
1963 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1964 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1965 *pnc_out = pn_Cmp_Lg;
1970 * Transforms a Load.
1972 * @return the created ia32 Load node
1974 static ir_node *gen_Load(ir_node *node)
1976 ir_node *old_block = get_nodes_block(node);
1977 ir_node *block = be_transform_node(old_block);
1978 ir_node *ptr = get_Load_ptr(node);
1979 ir_node *mem = get_Load_mem(node);
1980 ir_node *new_mem = be_transform_node(mem);
1983 dbg_info *dbgi = get_irn_dbg_info(node);
1984 ir_mode *mode = get_Load_mode(node);
1987 ia32_address_t addr;
1989 /* construct load address */
1990 memset(&addr, 0, sizeof(addr));
1991 ia32_create_address_mode(&addr, ptr, 0);
1998 base = be_transform_node(base);
2001 if (index == NULL) {
2004 index = be_transform_node(index);
2007 if (mode_is_float(mode)) {
2008 if (ia32_cg_config.use_sse2) {
2009 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2011 res_mode = mode_xmm;
2013 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2015 res_mode = mode_vfp;
2018 assert(mode != mode_b);
2020 /* create a conv node with address mode for smaller modes */
2021 if (get_mode_size_bits(mode) < 32) {
2022 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2023 new_mem, noreg_GP, mode);
2025 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2030 set_irn_pinned(new_node, get_irn_pinned(node));
2031 set_ia32_op_type(new_node, ia32_AddrModeS);
2032 set_ia32_ls_mode(new_node, mode);
2033 set_address(new_node, &addr);
2035 if (get_irn_pinned(node) == op_pin_state_floats) {
2036 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2037 && pn_ia32_vfld_res == pn_ia32_Load_res
2038 && pn_ia32_Load_res == pn_ia32_res);
2039 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2042 SET_IA32_ORIG_NODE(new_node, node);
2044 be_dep_on_frame(new_node);
2048 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2049 ir_node *ptr, ir_node *other)
2056 /* we only use address mode if we're the only user of the load */
2057 if (get_irn_n_edges(node) > 1)
2060 load = get_Proj_pred(node);
2063 if (get_nodes_block(load) != block)
2066 /* store should have the same pointer as the load */
2067 if (get_Load_ptr(load) != ptr)
2070 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2071 if (other != NULL &&
2072 get_nodes_block(other) == block &&
2073 heights_reachable_in_block(heights, other, load)) {
2077 if (prevents_AM(block, load, mem))
2079 /* Store should be attached to the load via mem */
2080 assert(heights_reachable_in_block(heights, mem, load));
2085 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2086 ir_node *mem, ir_node *ptr, ir_mode *mode,
2087 construct_binop_dest_func *func,
2088 construct_binop_dest_func *func8bit,
2089 match_flags_t flags)
2091 ir_node *src_block = get_nodes_block(node);
2099 ia32_address_mode_t am;
2100 ia32_address_t *addr = &am.addr;
2101 memset(&am, 0, sizeof(am));
2103 assert(flags & match_immediate); /* there is no destam node without... */
2104 commutative = (flags & match_commutative) != 0;
2106 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2107 build_address(&am, op1, ia32_create_am_double_use);
2108 new_op = create_immediate_or_transform(op2, 0);
2109 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2110 build_address(&am, op2, ia32_create_am_double_use);
2111 new_op = create_immediate_or_transform(op1, 0);
2116 if (addr->base == NULL)
2117 addr->base = noreg_GP;
2118 if (addr->index == NULL)
2119 addr->index = noreg_GP;
2120 if (addr->mem == NULL)
2123 dbgi = get_irn_dbg_info(node);
2124 block = be_transform_node(src_block);
2125 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2127 if (get_mode_size_bits(mode) == 8) {
2128 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2130 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2132 set_address(new_node, addr);
2133 set_ia32_op_type(new_node, ia32_AddrModeD);
2134 set_ia32_ls_mode(new_node, mode);
2135 SET_IA32_ORIG_NODE(new_node, node);
2137 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2138 mem_proj = be_transform_node(am.mem_proj);
2139 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2144 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2145 ir_node *ptr, ir_mode *mode,
2146 construct_unop_dest_func *func)
2148 ir_node *src_block = get_nodes_block(node);
2154 ia32_address_mode_t am;
2155 ia32_address_t *addr = &am.addr;
2157 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2160 memset(&am, 0, sizeof(am));
2161 build_address(&am, op, ia32_create_am_double_use);
2163 dbgi = get_irn_dbg_info(node);
2164 block = be_transform_node(src_block);
2165 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2166 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2167 set_address(new_node, addr);
2168 set_ia32_op_type(new_node, ia32_AddrModeD);
2169 set_ia32_ls_mode(new_node, mode);
2170 SET_IA32_ORIG_NODE(new_node, node);
2172 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2173 mem_proj = be_transform_node(am.mem_proj);
2174 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2179 static pn_Cmp ia32_get_negated_pnc(pn_Cmp pnc)
2181 ir_mode *mode = pnc & ia32_pn_Cmp_float ? mode_F : mode_Iu;
2182 return get_negated_pnc(pnc, mode);
2185 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2187 ir_mode *mode = get_irn_mode(node);
2188 ir_node *mux_true = get_Mux_true(node);
2189 ir_node *mux_false = get_Mux_false(node);
2199 ia32_address_t addr;
2201 if (get_mode_size_bits(mode) != 8)
2204 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2206 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2212 cond = get_Mux_sel(node);
2213 flags = get_flags_node(cond, &pnc);
2214 /* we can't handle the float special cases with SetM */
2215 if (pnc & ia32_pn_Cmp_float)
2218 pnc = ia32_get_negated_pnc(pnc);
2220 build_address_ptr(&addr, ptr, mem);
2222 dbgi = get_irn_dbg_info(node);
2223 block = get_nodes_block(node);
2224 new_block = be_transform_node(block);
2225 new_mem = be_transform_node(mem);
2226 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2227 addr.index, addr.mem, flags, pnc);
2228 set_address(new_node, &addr);
2229 set_ia32_op_type(new_node, ia32_AddrModeD);
2230 set_ia32_ls_mode(new_node, mode);
2231 SET_IA32_ORIG_NODE(new_node, node);
2236 static ir_node *try_create_dest_am(ir_node *node)
2238 ir_node *val = get_Store_value(node);
2239 ir_node *mem = get_Store_mem(node);
2240 ir_node *ptr = get_Store_ptr(node);
2241 ir_mode *mode = get_irn_mode(val);
2242 unsigned bits = get_mode_size_bits(mode);
2247 /* handle only GP modes for now... */
2248 if (!ia32_mode_needs_gp_reg(mode))
2252 /* store must be the only user of the val node */
2253 if (get_irn_n_edges(val) > 1)
2255 /* skip pointless convs */
2257 ir_node *conv_op = get_Conv_op(val);
2258 ir_mode *pred_mode = get_irn_mode(conv_op);
2259 if (!ia32_mode_needs_gp_reg(pred_mode))
2261 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2269 /* value must be in the same block */
2270 if (get_nodes_block(node) != get_nodes_block(val))
2273 switch (get_irn_opcode(val)) {
2275 op1 = get_Add_left(val);
2276 op2 = get_Add_right(val);
2277 if (ia32_cg_config.use_incdec) {
2278 if (is_Const_1(op2)) {
2279 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2281 } else if (is_Const_Minus_1(op2)) {
2282 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2286 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2287 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2288 match_commutative | match_immediate);
2291 op1 = get_Sub_left(val);
2292 op2 = get_Sub_right(val);
2293 if (is_Const(op2)) {
2294 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2296 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2297 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2301 op1 = get_And_left(val);
2302 op2 = get_And_right(val);
2303 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2304 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2305 match_commutative | match_immediate);
2308 op1 = get_Or_left(val);
2309 op2 = get_Or_right(val);
2310 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2311 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2312 match_commutative | match_immediate);
2315 op1 = get_Eor_left(val);
2316 op2 = get_Eor_right(val);
2317 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2318 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2319 match_commutative | match_immediate);
2322 op1 = get_Shl_left(val);
2323 op2 = get_Shl_right(val);
2324 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2325 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2329 op1 = get_Shr_left(val);
2330 op2 = get_Shr_right(val);
2331 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2332 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2336 op1 = get_Shrs_left(val);
2337 op2 = get_Shrs_right(val);
2338 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2339 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2343 op1 = get_Rotl_left(val);
2344 op2 = get_Rotl_right(val);
2345 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2346 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2349 /* TODO: match ROR patterns... */
2351 new_node = try_create_SetMem(val, ptr, mem);
2355 op1 = get_Minus_op(val);
2356 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2359 /* should be lowered already */
2360 assert(mode != mode_b);
2361 op1 = get_Not_op(val);
2362 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2368 if (new_node != NULL) {
2369 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2370 get_irn_pinned(node) == op_pin_state_pinned) {
2371 set_irn_pinned(new_node, op_pin_state_pinned);
2378 static bool possible_int_mode_for_fp(ir_mode *mode)
2382 if (!mode_is_signed(mode))
2384 size = get_mode_size_bits(mode);
2385 if (size != 16 && size != 32)
2390 static int is_float_to_int_conv(const ir_node *node)
2392 ir_mode *mode = get_irn_mode(node);
2396 if (!possible_int_mode_for_fp(mode))
2401 conv_op = get_Conv_op(node);
2402 conv_mode = get_irn_mode(conv_op);
2404 if (!mode_is_float(conv_mode))
2411 * Transform a Store(floatConst) into a sequence of
2414 * @return the created ia32 Store node
2416 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2418 ir_mode *mode = get_irn_mode(cns);
2419 unsigned size = get_mode_size_bytes(mode);
2420 tarval *tv = get_Const_tarval(cns);
2421 ir_node *block = get_nodes_block(node);
2422 ir_node *new_block = be_transform_node(block);
2423 ir_node *ptr = get_Store_ptr(node);
2424 ir_node *mem = get_Store_mem(node);
2425 dbg_info *dbgi = get_irn_dbg_info(node);
2429 ia32_address_t addr;
2431 assert(size % 4 == 0);
2434 build_address_ptr(&addr, ptr, mem);
2438 get_tarval_sub_bits(tv, ofs) |
2439 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2440 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2441 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2442 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2444 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2445 addr.index, addr.mem, imm);
2447 set_irn_pinned(new_node, get_irn_pinned(node));
2448 set_ia32_op_type(new_node, ia32_AddrModeD);
2449 set_ia32_ls_mode(new_node, mode_Iu);
2450 set_address(new_node, &addr);
2451 SET_IA32_ORIG_NODE(new_node, node);
2454 ins[i++] = new_node;
2459 } while (size != 0);
2462 return new_rd_Sync(dbgi, new_block, i, ins);
2469 * Generate a vfist or vfisttp instruction.
2471 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2472 ir_node *mem, ir_node *val, ir_node **fist)
2476 if (ia32_cg_config.use_fisttp) {
2477 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2478 if other users exists */
2479 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2480 ir_node *value = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2481 be_new_Keep(block, 1, &value);
2483 new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2486 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2489 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2495 * Transforms a general (no special case) Store.
2497 * @return the created ia32 Store node
2499 static ir_node *gen_general_Store(ir_node *node)
2501 ir_node *val = get_Store_value(node);
2502 ir_mode *mode = get_irn_mode(val);
2503 ir_node *block = get_nodes_block(node);
2504 ir_node *new_block = be_transform_node(block);
2505 ir_node *ptr = get_Store_ptr(node);
2506 ir_node *mem = get_Store_mem(node);
2507 dbg_info *dbgi = get_irn_dbg_info(node);
2508 ir_node *new_val, *new_node, *store;
2509 ia32_address_t addr;
2511 /* check for destination address mode */
2512 new_node = try_create_dest_am(node);
2513 if (new_node != NULL)
2516 /* construct store address */
2517 memset(&addr, 0, sizeof(addr));
2518 ia32_create_address_mode(&addr, ptr, 0);
2520 if (addr.base == NULL) {
2521 addr.base = noreg_GP;
2523 addr.base = be_transform_node(addr.base);
2526 if (addr.index == NULL) {
2527 addr.index = noreg_GP;
2529 addr.index = be_transform_node(addr.index);
2531 addr.mem = be_transform_node(mem);
2533 if (mode_is_float(mode)) {
2534 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2536 while (is_Conv(val) && mode == get_irn_mode(val)) {
2537 ir_node *op = get_Conv_op(val);
2538 if (!mode_is_float(get_irn_mode(op)))
2542 new_val = be_transform_node(val);
2543 if (ia32_cg_config.use_sse2) {
2544 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2545 addr.index, addr.mem, new_val);
2547 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2548 addr.index, addr.mem, new_val, mode);
2551 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2552 val = get_Conv_op(val);
2554 /* TODO: is this optimisation still necessary at all (middleend)? */
2555 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2556 while (is_Conv(val)) {
2557 ir_node *op = get_Conv_op(val);
2558 if (!mode_is_float(get_irn_mode(op)))
2560 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2564 new_val = be_transform_node(val);
2565 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2567 new_val = create_immediate_or_transform(val, 0);
2568 assert(mode != mode_b);
2570 if (get_mode_size_bits(mode) == 8) {
2571 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2572 addr.index, addr.mem, new_val);
2574 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2575 addr.index, addr.mem, new_val);
2580 set_irn_pinned(store, get_irn_pinned(node));
2581 set_ia32_op_type(store, ia32_AddrModeD);
2582 set_ia32_ls_mode(store, mode);
2584 set_address(store, &addr);
2585 SET_IA32_ORIG_NODE(store, node);
2591 * Transforms a Store.
2593 * @return the created ia32 Store node
2595 static ir_node *gen_Store(ir_node *node)
2597 ir_node *val = get_Store_value(node);
2598 ir_mode *mode = get_irn_mode(val);
2600 if (mode_is_float(mode) && is_Const(val)) {
2601 /* We can transform every floating const store
2602 into a sequence of integer stores.
2603 If the constant is already in a register,
2604 it would be better to use it, but we don't
2605 have this information here. */
2606 return gen_float_const_Store(node, val);
2608 return gen_general_Store(node);
2612 * Transforms a Switch.
2614 * @return the created ia32 SwitchJmp node
2616 static ir_node *create_Switch(ir_node *node)
2618 dbg_info *dbgi = get_irn_dbg_info(node);
2619 ir_node *block = be_transform_node(get_nodes_block(node));
2620 ir_node *sel = get_Cond_selector(node);
2621 ir_node *new_sel = be_transform_node(sel);
2622 long switch_min = LONG_MAX;
2623 long switch_max = LONG_MIN;
2624 long default_pn = get_Cond_default_proj(node);
2626 const ir_edge_t *edge;
2628 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2630 /* determine the smallest switch case value */
2631 foreach_out_edge(node, edge) {
2632 ir_node *proj = get_edge_src_irn(edge);
2633 long pn = get_Proj_proj(proj);
2634 if (pn == default_pn)
2637 if (pn < switch_min)
2639 if (pn > switch_max)
2643 if ((unsigned long) (switch_max - switch_min) > 128000) {
2644 panic("Size of switch %+F bigger than 128000", node);
2647 if (switch_min != 0) {
2648 /* if smallest switch case is not 0 we need an additional sub */
2649 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2650 add_ia32_am_offs_int(new_sel, -switch_min);
2651 set_ia32_op_type(new_sel, ia32_AddrModeS);
2653 SET_IA32_ORIG_NODE(new_sel, node);
2656 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2657 SET_IA32_ORIG_NODE(new_node, node);
2663 * Transform a Cond node.
2665 static ir_node *gen_Cond(ir_node *node)
2667 ir_node *block = get_nodes_block(node);
2668 ir_node *new_block = be_transform_node(block);
2669 dbg_info *dbgi = get_irn_dbg_info(node);
2670 ir_node *sel = get_Cond_selector(node);
2671 ir_mode *sel_mode = get_irn_mode(sel);
2672 ir_node *flags = NULL;
2676 if (sel_mode != mode_b) {
2677 return create_Switch(node);
2680 /* we get flags from a Cmp */
2681 flags = get_flags_node(sel, &pnc);
2683 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2684 SET_IA32_ORIG_NODE(new_node, node);
2690 * Transform a be_Copy.
2692 static ir_node *gen_be_Copy(ir_node *node)
2694 ir_node *new_node = be_duplicate_node(node);
2695 ir_mode *mode = get_irn_mode(new_node);
2697 if (ia32_mode_needs_gp_reg(mode)) {
2698 set_irn_mode(new_node, mode_Iu);
2704 static ir_node *create_Fucom(ir_node *node)
2706 dbg_info *dbgi = get_irn_dbg_info(node);
2707 ir_node *block = get_nodes_block(node);
2708 ir_node *new_block = be_transform_node(block);
2709 ir_node *left = get_Cmp_left(node);
2710 ir_node *new_left = be_transform_node(left);
2711 ir_node *right = get_Cmp_right(node);
2715 if (ia32_cg_config.use_fucomi) {
2716 new_right = be_transform_node(right);
2717 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2719 set_ia32_commutative(new_node);
2720 SET_IA32_ORIG_NODE(new_node, node);
2722 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2723 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2725 new_right = be_transform_node(right);
2726 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2729 set_ia32_commutative(new_node);
2731 SET_IA32_ORIG_NODE(new_node, node);
2733 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2734 SET_IA32_ORIG_NODE(new_node, node);
2740 static ir_node *create_Ucomi(ir_node *node)
2742 dbg_info *dbgi = get_irn_dbg_info(node);
2743 ir_node *src_block = get_nodes_block(node);
2744 ir_node *new_block = be_transform_node(src_block);
2745 ir_node *left = get_Cmp_left(node);
2746 ir_node *right = get_Cmp_right(node);
2748 ia32_address_mode_t am;
2749 ia32_address_t *addr = &am.addr;
2751 match_arguments(&am, src_block, left, right, NULL,
2752 match_commutative | match_am);
2754 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2755 addr->mem, am.new_op1, am.new_op2,
2757 set_am_attributes(new_node, &am);
2759 SET_IA32_ORIG_NODE(new_node, node);
2761 new_node = fix_mem_proj(new_node, &am);
2767 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2768 * to fold an and into a test node
2770 static bool can_fold_test_and(ir_node *node)
2772 const ir_edge_t *edge;
2774 /** we can only have eq and lg projs */
2775 foreach_out_edge(node, edge) {
2776 ir_node *proj = get_edge_src_irn(edge);
2777 pn_Cmp pnc = get_Proj_proj(proj);
2778 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2786 * returns true if it is assured, that the upper bits of a node are "clean"
2787 * which means for a 16 or 8 bit value, that the upper bits in the register
2788 * are 0 for unsigned and a copy of the last significant bit for signed
2791 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2793 assert(ia32_mode_needs_gp_reg(mode));
2794 if (get_mode_size_bits(mode) >= 32)
2797 if (is_Proj(transformed_node))
2798 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2800 switch (get_ia32_irn_opcode(transformed_node)) {
2801 case iro_ia32_Conv_I2I:
2802 case iro_ia32_Conv_I2I8Bit: {
2803 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2804 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2806 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2813 if (mode_is_signed(mode)) {
2814 return false; /* TODO handle signed modes */
2816 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2817 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2818 const ia32_immediate_attr_t *attr
2819 = get_ia32_immediate_attr_const(right);
2820 if (attr->symconst == 0 &&
2821 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2825 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2829 /* TODO too conservative if shift amount is constant */
2830 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2833 if (!mode_is_signed(mode)) {
2835 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2836 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2838 /* TODO if one is known to be zero extended, then || is sufficient */
2843 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2844 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2846 case iro_ia32_Const:
2847 case iro_ia32_Immediate: {
2848 const ia32_immediate_attr_t *attr =
2849 get_ia32_immediate_attr_const(transformed_node);
2850 if (mode_is_signed(mode)) {
2851 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2852 return shifted == 0 || shifted == -1;
2854 unsigned long shifted = (unsigned long)attr->offset;
2855 shifted >>= get_mode_size_bits(mode);
2856 return shifted == 0;
2866 * Generate code for a Cmp.
2868 static ir_node *gen_Cmp(ir_node *node)
2870 dbg_info *dbgi = get_irn_dbg_info(node);
2871 ir_node *block = get_nodes_block(node);
2872 ir_node *new_block = be_transform_node(block);
2873 ir_node *left = get_Cmp_left(node);
2874 ir_node *right = get_Cmp_right(node);
2875 ir_mode *cmp_mode = get_irn_mode(left);
2877 ia32_address_mode_t am;
2878 ia32_address_t *addr = &am.addr;
2881 if (mode_is_float(cmp_mode)) {
2882 if (ia32_cg_config.use_sse2) {
2883 return create_Ucomi(node);
2885 return create_Fucom(node);
2889 assert(ia32_mode_needs_gp_reg(cmp_mode));
2891 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2892 cmp_unsigned = !mode_is_signed(cmp_mode);
2893 if (is_Const_0(right) &&
2895 get_irn_n_edges(left) == 1 &&
2896 can_fold_test_and(node)) {
2897 /* Test(and_left, and_right) */
2898 ir_node *and_left = get_And_left(left);
2899 ir_node *and_right = get_And_right(left);
2901 /* matze: code here used mode instead of cmd_mode, I think it is always
2902 * the same as cmp_mode, but I leave this here to see if this is really
2905 assert(get_irn_mode(and_left) == cmp_mode);
2907 match_arguments(&am, block, and_left, and_right, NULL,
2909 match_am | match_8bit_am | match_16bit_am |
2910 match_am_and_immediates | match_immediate);
2912 /* use 32bit compare mode if possible since the opcode is smaller */
2913 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2914 upper_bits_clean(am.new_op2, cmp_mode)) {
2915 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2918 if (get_mode_size_bits(cmp_mode) == 8) {
2919 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2920 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2923 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2924 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2927 /* Cmp(left, right) */
2928 match_arguments(&am, block, left, right, NULL,
2929 match_commutative | match_am | match_8bit_am |
2930 match_16bit_am | match_am_and_immediates |
2932 /* use 32bit compare mode if possible since the opcode is smaller */
2933 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2934 upper_bits_clean(am.new_op2, cmp_mode)) {
2935 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2938 if (get_mode_size_bits(cmp_mode) == 8) {
2939 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2940 addr->index, addr->mem, am.new_op1,
2941 am.new_op2, am.ins_permuted,
2944 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2945 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2948 set_am_attributes(new_node, &am);
2949 set_ia32_ls_mode(new_node, cmp_mode);
2951 SET_IA32_ORIG_NODE(new_node, node);
2953 new_node = fix_mem_proj(new_node, &am);
2958 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2961 dbg_info *dbgi = get_irn_dbg_info(node);
2962 ir_node *block = get_nodes_block(node);
2963 ir_node *new_block = be_transform_node(block);
2964 ir_node *val_true = get_Mux_true(node);
2965 ir_node *val_false = get_Mux_false(node);
2967 ia32_address_mode_t am;
2968 ia32_address_t *addr;
2970 assert(ia32_cg_config.use_cmov);
2971 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2975 match_arguments(&am, block, val_false, val_true, flags,
2976 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2978 if (am.ins_permuted)
2979 pnc = ia32_get_negated_pnc(pnc);
2981 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
2982 addr->mem, am.new_op1, am.new_op2, new_flags,
2984 set_am_attributes(new_node, &am);
2986 SET_IA32_ORIG_NODE(new_node, node);
2988 new_node = fix_mem_proj(new_node, &am);
2994 * Creates a ia32 Setcc instruction.
2996 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2997 ir_node *flags, pn_Cmp pnc,
3000 ir_mode *mode = get_irn_mode(orig_node);
3003 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
3004 SET_IA32_ORIG_NODE(new_node, orig_node);
3006 /* we might need to conv the result up */
3007 if (get_mode_size_bits(mode) > 8) {
3008 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
3009 nomem, new_node, mode_Bu);
3010 SET_IA32_ORIG_NODE(new_node, orig_node);
3017 * Create instruction for an unsigned Difference or Zero.
3019 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
3021 ir_mode *mode = get_irn_mode(psi);
3022 ir_node *new_node, *sub, *sbb, *eflags, *block;
3026 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3027 match_mode_neutral | match_am | match_immediate | match_two_users);
3029 block = get_nodes_block(new_node);
3031 if (is_Proj(new_node)) {
3032 sub = get_Proj_pred(new_node);
3033 assert(is_ia32_Sub(sub));
3036 set_irn_mode(sub, mode_T);
3037 new_node = new_rd_Proj(NULL, block, sub, mode, pn_ia32_res);
3039 eflags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3041 dbgi = get_irn_dbg_info(psi);
3042 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3044 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3045 set_ia32_commutative(new_node);
3050 * Create an const array of two float consts.
3052 * @param c0 the first constant
3053 * @param c1 the second constant
3054 * @param new_mode IN/OUT for the mode of the constants, if NULL
3055 * smallest possible mode will be used
3057 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3059 ir_mode *mode = *new_mode;
3061 ir_initializer_t *initializer;
3062 tarval *tv0 = get_Const_tarval(c0);
3063 tarval *tv1 = get_Const_tarval(c1);
3066 /* detect the best mode for the constants */
3067 mode = get_tarval_mode(tv0);
3069 if (mode != mode_F) {
3070 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3071 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3073 tv0 = tarval_convert_to(tv0, mode);
3074 tv1 = tarval_convert_to(tv1, mode);
3075 } else if (mode != mode_D) {
3076 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3077 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3079 tv0 = tarval_convert_to(tv0, mode);
3080 tv1 = tarval_convert_to(tv1, mode);
3087 tp = ia32_create_float_type(mode, 4);
3088 tp = ia32_create_float_array(tp);
3090 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3092 set_entity_ld_ident(ent, get_entity_ident(ent));
3093 set_entity_visibility(ent, visibility_local);
3094 set_entity_variability(ent, variability_constant);
3095 set_entity_allocation(ent, allocation_static);
3097 initializer = create_initializer_compound(2);
3099 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3100 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3102 set_entity_initializer(ent, initializer);
3109 * Transforms a Mux node into some code sequence.
3111 * @return The transformed node.
3113 static ir_node *gen_Mux(ir_node *node)
3115 dbg_info *dbgi = get_irn_dbg_info(node);
3116 ir_node *block = get_nodes_block(node);
3117 ir_node *new_block = be_transform_node(block);
3118 ir_node *mux_true = get_Mux_true(node);
3119 ir_node *mux_false = get_Mux_false(node);
3120 ir_node *cond = get_Mux_sel(node);
3121 ir_mode *mode = get_irn_mode(node);
3126 assert(get_irn_mode(cond) == mode_b);
3128 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3129 if (mode_is_float(mode)) {
3130 ir_node *cmp = get_Proj_pred(cond);
3131 ir_node *cmp_left = get_Cmp_left(cmp);
3132 ir_node *cmp_right = get_Cmp_right(cmp);
3133 pn_Cmp pnc = get_Proj_proj(cond);
3135 if (ia32_cg_config.use_sse2) {
3136 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3137 if (cmp_left == mux_true && cmp_right == mux_false) {
3138 /* Mux(a <= b, a, b) => MIN */
3139 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3140 match_commutative | match_am | match_two_users);
3141 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3142 /* Mux(a <= b, b, a) => MAX */
3143 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3144 match_commutative | match_am | match_two_users);
3146 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3147 if (cmp_left == mux_true && cmp_right == mux_false) {
3148 /* Mux(a >= b, a, b) => MAX */
3149 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3150 match_commutative | match_am | match_two_users);
3151 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3152 /* Mux(a >= b, b, a) => MIN */
3153 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3154 match_commutative | match_am | match_two_users);
3159 if (is_Const(mux_true) && is_Const(mux_false)) {
3160 ia32_address_mode_t am;
3165 flags = get_flags_node(cond, &pnc);
3166 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3168 if (ia32_cg_config.use_sse2) {
3169 /* cannot load from different mode on SSE */
3172 /* x87 can load any mode */
3176 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3178 switch (get_mode_size_bytes(new_mode)) {
3188 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3189 set_ia32_am_scale(new_node, 2);
3194 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3195 set_ia32_am_scale(new_node, 1);
3198 /* arg, shift 16 NOT supported */
3200 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3203 panic("Unsupported constant size");
3206 am.ls_mode = new_mode;
3207 am.addr.base = noreg_GP;
3208 am.addr.index = new_node;
3209 am.addr.mem = nomem;
3211 am.addr.scale = scale;
3212 am.addr.use_frame = 0;
3213 am.addr.frame_entity = NULL;
3214 am.addr.symconst_sign = 0;
3215 am.mem_proj = am.addr.mem;
3216 am.op_type = ia32_AddrModeS;
3219 am.pinned = op_pin_state_floats;
3221 am.ins_permuted = 0;
3223 if (ia32_cg_config.use_sse2)
3224 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3226 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3227 set_am_attributes(load, &am);
3229 return new_rd_Proj(NULL, block, load, mode_vfp, pn_ia32_res);
3231 panic("cannot transform floating point Mux");
3234 assert(ia32_mode_needs_gp_reg(mode));
3236 if (is_Proj(cond)) {
3237 ir_node *cmp = get_Proj_pred(cond);
3239 ir_node *cmp_left = get_Cmp_left(cmp);
3240 ir_node *cmp_right = get_Cmp_right(cmp);
3241 pn_Cmp pnc = get_Proj_proj(cond);
3243 /* check for unsigned Doz first */
3244 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3245 is_Const_0(mux_false) && is_Sub(mux_true) &&
3246 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3247 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3248 return create_Doz(node, cmp_left, cmp_right);
3249 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3250 is_Const_0(mux_true) && is_Sub(mux_false) &&
3251 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3252 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3253 return create_Doz(node, cmp_left, cmp_right);
3258 flags = get_flags_node(cond, &pnc);
3260 if (is_Const(mux_true) && is_Const(mux_false)) {
3261 /* both are const, good */
3262 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3263 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3264 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3265 pnc = ia32_get_negated_pnc(pnc);
3266 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3268 /* Not that simple. */
3273 new_node = create_CMov(node, cond, flags, pnc);
3281 * Create a conversion from x87 state register to general purpose.
3283 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3285 ir_node *block = be_transform_node(get_nodes_block(node));
3286 ir_node *op = get_Conv_op(node);
3287 ir_node *new_op = be_transform_node(op);
3288 ir_graph *irg = current_ir_graph;
3289 dbg_info *dbgi = get_irn_dbg_info(node);
3290 ir_mode *mode = get_irn_mode(node);
3291 ir_node *fist, *load, *mem;
3293 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3294 set_irn_pinned(fist, op_pin_state_floats);
3295 set_ia32_use_frame(fist);
3296 set_ia32_op_type(fist, ia32_AddrModeD);
3298 assert(get_mode_size_bits(mode) <= 32);
3299 /* exception we can only store signed 32 bit integers, so for unsigned
3300 we store a 64bit (signed) integer and load the lower bits */
3301 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3302 set_ia32_ls_mode(fist, mode_Ls);
3304 set_ia32_ls_mode(fist, mode_Is);
3306 SET_IA32_ORIG_NODE(fist, node);
3309 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3311 set_irn_pinned(load, op_pin_state_floats);
3312 set_ia32_use_frame(load);
3313 set_ia32_op_type(load, ia32_AddrModeS);
3314 set_ia32_ls_mode(load, mode_Is);
3315 if (get_ia32_ls_mode(fist) == mode_Ls) {
3316 ia32_attr_t *attr = get_ia32_attr(load);
3317 attr->data.need_64bit_stackent = 1;
3319 ia32_attr_t *attr = get_ia32_attr(load);
3320 attr->data.need_32bit_stackent = 1;
3322 SET_IA32_ORIG_NODE(load, node);
3324 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
3328 * Creates a x87 strict Conv by placing a Store and a Load
3330 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3332 ir_node *block = get_nodes_block(node);
3333 ir_graph *irg = get_Block_irg(block);
3334 dbg_info *dbgi = get_irn_dbg_info(node);
3335 ir_node *frame = get_irg_frame(irg);
3336 ir_node *store, *load;
3339 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3340 set_ia32_use_frame(store);
3341 set_ia32_op_type(store, ia32_AddrModeD);
3342 SET_IA32_ORIG_NODE(store, node);
3344 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3345 set_ia32_use_frame(load);
3346 set_ia32_op_type(load, ia32_AddrModeS);
3347 SET_IA32_ORIG_NODE(load, node);
3349 new_node = new_r_Proj(block, load, mode_E, pn_ia32_vfld_res);
3353 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3354 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3356 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3358 func = get_mode_size_bits(mode) == 8 ?
3359 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3360 return func(dbgi, block, base, index, mem, val, mode);
3364 * Create a conversion from general purpose to x87 register
3366 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3368 ir_node *src_block = get_nodes_block(node);
3369 ir_node *block = be_transform_node(src_block);
3370 ir_graph *irg = get_Block_irg(block);
3371 dbg_info *dbgi = get_irn_dbg_info(node);
3372 ir_node *op = get_Conv_op(node);
3373 ir_node *new_op = NULL;
3375 ir_mode *store_mode;
3380 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3381 if (possible_int_mode_for_fp(src_mode)) {
3382 ia32_address_mode_t am;
3384 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3385 if (am.op_type == ia32_AddrModeS) {
3386 ia32_address_t *addr = &am.addr;
3388 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3389 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3391 set_am_attributes(fild, &am);
3392 SET_IA32_ORIG_NODE(fild, node);
3394 fix_mem_proj(fild, &am);
3399 if (new_op == NULL) {
3400 new_op = be_transform_node(op);
3403 mode = get_irn_mode(op);
3405 /* first convert to 32 bit signed if necessary */
3406 if (get_mode_size_bits(src_mode) < 32) {
3407 if (!upper_bits_clean(new_op, src_mode)) {
3408 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3409 SET_IA32_ORIG_NODE(new_op, node);
3414 assert(get_mode_size_bits(mode) == 32);
3417 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3419 set_ia32_use_frame(store);
3420 set_ia32_op_type(store, ia32_AddrModeD);
3421 set_ia32_ls_mode(store, mode_Iu);
3423 /* exception for 32bit unsigned, do a 64bit spill+load */
3424 if (!mode_is_signed(mode)) {
3427 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3429 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3430 noreg_GP, nomem, zero_const);
3432 set_ia32_use_frame(zero_store);
3433 set_ia32_op_type(zero_store, ia32_AddrModeD);
3434 add_ia32_am_offs_int(zero_store, 4);
3435 set_ia32_ls_mode(zero_store, mode_Iu);
3440 store = new_rd_Sync(dbgi, block, 2, in);
3441 store_mode = mode_Ls;
3443 store_mode = mode_Is;
3447 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3449 set_ia32_use_frame(fild);
3450 set_ia32_op_type(fild, ia32_AddrModeS);
3451 set_ia32_ls_mode(fild, store_mode);
3453 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3459 * Create a conversion from one integer mode into another one
3461 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3462 dbg_info *dbgi, ir_node *block, ir_node *op,
3465 ir_node *new_block = be_transform_node(block);
3467 ir_mode *smaller_mode;
3468 ia32_address_mode_t am;
3469 ia32_address_t *addr = &am.addr;
3472 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3473 smaller_mode = src_mode;
3475 smaller_mode = tgt_mode;
3478 #ifdef DEBUG_libfirm
3480 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3485 match_arguments(&am, block, NULL, op, NULL,
3486 match_am | match_8bit_am | match_16bit_am);
3488 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3489 /* unnecessary conv. in theory it shouldn't have been AM */
3490 assert(is_ia32_NoReg_GP(addr->base));
3491 assert(is_ia32_NoReg_GP(addr->index));
3492 assert(is_NoMem(addr->mem));
3493 assert(am.addr.offset == 0);
3494 assert(am.addr.symconst_ent == NULL);
3498 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3499 addr->mem, am.new_op2, smaller_mode);
3500 set_am_attributes(new_node, &am);
3501 /* match_arguments assume that out-mode = in-mode, this isn't true here
3503 set_ia32_ls_mode(new_node, smaller_mode);
3504 SET_IA32_ORIG_NODE(new_node, node);
3505 new_node = fix_mem_proj(new_node, &am);
3510 * Transforms a Conv node.
3512 * @return The created ia32 Conv node
3514 static ir_node *gen_Conv(ir_node *node)
3516 ir_node *block = get_nodes_block(node);
3517 ir_node *new_block = be_transform_node(block);
3518 ir_node *op = get_Conv_op(node);
3519 ir_node *new_op = NULL;
3520 dbg_info *dbgi = get_irn_dbg_info(node);
3521 ir_mode *src_mode = get_irn_mode(op);
3522 ir_mode *tgt_mode = get_irn_mode(node);
3523 int src_bits = get_mode_size_bits(src_mode);
3524 int tgt_bits = get_mode_size_bits(tgt_mode);
3525 ir_node *res = NULL;
3527 assert(!mode_is_int(src_mode) || src_bits <= 32);
3528 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3530 /* modeB -> X should already be lowered by the lower_mode_b pass */
3531 if (src_mode == mode_b) {
3532 panic("ConvB not lowered %+F", node);
3535 if (src_mode == tgt_mode) {
3536 if (get_Conv_strict(node)) {
3537 if (ia32_cg_config.use_sse2) {
3538 /* when we are in SSE mode, we can kill all strict no-op conversion */
3539 return be_transform_node(op);
3542 /* this should be optimized already, but who knows... */
3543 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3544 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3545 return be_transform_node(op);
3549 if (mode_is_float(src_mode)) {
3550 new_op = be_transform_node(op);
3551 /* we convert from float ... */
3552 if (mode_is_float(tgt_mode)) {
3554 if (ia32_cg_config.use_sse2) {
3555 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3556 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3558 set_ia32_ls_mode(res, tgt_mode);
3560 if (get_Conv_strict(node)) {
3561 /* if fp_no_float_fold is not set then we assume that we
3562 * don't have any float operations in a non
3563 * mode_float_arithmetic mode and can skip strict upconvs */
3564 if (src_bits < tgt_bits
3565 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3566 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3569 res = gen_x87_strict_conv(tgt_mode, new_op);
3570 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3574 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3579 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3580 if (ia32_cg_config.use_sse2) {
3581 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3583 set_ia32_ls_mode(res, src_mode);
3585 return gen_x87_fp_to_gp(node);
3589 /* we convert from int ... */
3590 if (mode_is_float(tgt_mode)) {
3592 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3593 if (ia32_cg_config.use_sse2) {
3594 new_op = be_transform_node(op);
3595 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3597 set_ia32_ls_mode(res, tgt_mode);
3599 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3600 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3601 res = gen_x87_gp_to_fp(node, src_mode);
3603 /* we need a strict-Conv, if the int mode has more bits than the
3605 if (float_mantissa < int_mantissa) {
3606 res = gen_x87_strict_conv(tgt_mode, res);
3607 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3611 } else if (tgt_mode == mode_b) {
3612 /* mode_b lowering already took care that we only have 0/1 values */
3613 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3614 src_mode, tgt_mode));
3615 return be_transform_node(op);
3618 if (src_bits == tgt_bits) {
3619 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3620 src_mode, tgt_mode));
3621 return be_transform_node(op);
3624 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3632 static ir_node *create_immediate_or_transform(ir_node *node,
3633 char immediate_constraint_type)
3635 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3636 if (new_node == NULL) {
3637 new_node = be_transform_node(node);
3643 * Transforms a FrameAddr into an ia32 Add.
3645 static ir_node *gen_be_FrameAddr(ir_node *node)
3647 ir_node *block = be_transform_node(get_nodes_block(node));
3648 ir_node *op = be_get_FrameAddr_frame(node);
3649 ir_node *new_op = be_transform_node(op);
3650 dbg_info *dbgi = get_irn_dbg_info(node);
3653 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3654 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3655 set_ia32_use_frame(new_node);
3657 SET_IA32_ORIG_NODE(new_node, node);
3663 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3665 static ir_node *gen_be_Return(ir_node *node)
3667 ir_graph *irg = current_ir_graph;
3668 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3669 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3670 ir_entity *ent = get_irg_entity(irg);
3671 ir_type *tp = get_entity_type(ent);
3676 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3677 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3679 int pn_ret_val, pn_ret_mem, arity, i;
3681 assert(ret_val != NULL);
3682 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3683 return be_duplicate_node(node);
3686 res_type = get_method_res_type(tp, 0);
3688 if (! is_Primitive_type(res_type)) {
3689 return be_duplicate_node(node);
3692 mode = get_type_mode(res_type);
3693 if (! mode_is_float(mode)) {
3694 return be_duplicate_node(node);
3697 assert(get_method_n_ress(tp) == 1);
3699 pn_ret_val = get_Proj_proj(ret_val);
3700 pn_ret_mem = get_Proj_proj(ret_mem);
3702 /* get the Barrier */
3703 barrier = get_Proj_pred(ret_val);
3705 /* get result input of the Barrier */
3706 ret_val = get_irn_n(barrier, pn_ret_val);
3707 new_ret_val = be_transform_node(ret_val);
3709 /* get memory input of the Barrier */
3710 ret_mem = get_irn_n(barrier, pn_ret_mem);
3711 new_ret_mem = be_transform_node(ret_mem);
3713 frame = get_irg_frame(irg);
3715 dbgi = get_irn_dbg_info(barrier);
3716 block = be_transform_node(get_nodes_block(barrier));
3718 /* store xmm0 onto stack */
3719 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3720 new_ret_mem, new_ret_val);
3721 set_ia32_ls_mode(sse_store, mode);
3722 set_ia32_op_type(sse_store, ia32_AddrModeD);
3723 set_ia32_use_frame(sse_store);
3725 /* load into x87 register */
3726 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3727 set_ia32_op_type(fld, ia32_AddrModeS);
3728 set_ia32_use_frame(fld);
3730 mproj = new_r_Proj(block, fld, mode_M, pn_ia32_vfld_M);
3731 fld = new_r_Proj(block, fld, mode_vfp, pn_ia32_vfld_res);
3733 /* create a new barrier */
3734 arity = get_irn_arity(barrier);
3735 in = ALLOCAN(ir_node*, arity);
3736 for (i = 0; i < arity; ++i) {
3739 if (i == pn_ret_val) {
3741 } else if (i == pn_ret_mem) {
3744 ir_node *in = get_irn_n(barrier, i);
3745 new_in = be_transform_node(in);
3750 new_barrier = new_ir_node(dbgi, irg, block,
3751 get_irn_op(barrier), get_irn_mode(barrier),
3753 copy_node_attr(barrier, new_barrier);
3754 be_duplicate_deps(barrier, new_barrier);
3755 be_set_transformed_node(barrier, new_barrier);
3757 /* transform normally */
3758 return be_duplicate_node(node);
3762 * Transform a be_AddSP into an ia32_SubSP.
3764 static ir_node *gen_be_AddSP(ir_node *node)
3766 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3767 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3769 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3770 match_am | match_immediate);
3774 * Transform a be_SubSP into an ia32_AddSP
3776 static ir_node *gen_be_SubSP(ir_node *node)
3778 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3779 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3781 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3782 match_am | match_immediate);
3786 * Change some phi modes
3788 static ir_node *gen_Phi(ir_node *node)
3790 const arch_register_req_t *req;
3791 ir_node *block = be_transform_node(get_nodes_block(node));
3792 ir_graph *irg = current_ir_graph;
3793 dbg_info *dbgi = get_irn_dbg_info(node);
3794 ir_mode *mode = get_irn_mode(node);
3797 if (ia32_mode_needs_gp_reg(mode)) {
3798 /* we shouldn't have any 64bit stuff around anymore */
3799 assert(get_mode_size_bits(mode) <= 32);
3800 /* all integer operations are on 32bit registers now */
3802 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
3803 } else if (mode_is_float(mode)) {
3804 if (ia32_cg_config.use_sse2) {
3806 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
3809 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
3812 req = arch_no_register_req;
3815 /* phi nodes allow loops, so we use the old arguments for now
3816 * and fix this later */
3817 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3818 get_irn_in(node) + 1);
3819 copy_node_attr(node, phi);
3820 be_duplicate_deps(node, phi);
3822 arch_set_out_register_req(phi, 0, req);
3824 be_enqueue_preds(node);
3829 static ir_node *gen_Jmp(ir_node *node)
3831 ir_node *block = get_nodes_block(node);
3832 ir_node *new_block = be_transform_node(block);
3833 dbg_info *dbgi = get_irn_dbg_info(node);
3836 new_node = new_bd_ia32_Jmp(dbgi, new_block);
3837 SET_IA32_ORIG_NODE(new_node, node);
3845 static ir_node *gen_IJmp(ir_node *node)
3847 ir_node *block = get_nodes_block(node);
3848 ir_node *new_block = be_transform_node(block);
3849 dbg_info *dbgi = get_irn_dbg_info(node);
3850 ir_node *op = get_IJmp_target(node);
3852 ia32_address_mode_t am;
3853 ia32_address_t *addr = &am.addr;
3855 assert(get_irn_mode(op) == mode_P);
3857 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3859 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3860 addr->mem, am.new_op2);
3861 set_am_attributes(new_node, &am);
3862 SET_IA32_ORIG_NODE(new_node, node);
3864 new_node = fix_mem_proj(new_node, &am);
3870 * Transform a Bound node.
3872 static ir_node *gen_Bound(ir_node *node)
3875 ir_node *lower = get_Bound_lower(node);
3876 dbg_info *dbgi = get_irn_dbg_info(node);
3878 if (is_Const_0(lower)) {
3879 /* typical case for Java */
3880 ir_node *sub, *res, *flags, *block;
3882 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3883 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3885 block = get_nodes_block(res);
3886 if (! is_Proj(res)) {
3888 set_irn_mode(sub, mode_T);
3889 res = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_res);
3891 sub = get_Proj_pred(res);
3893 flags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3894 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3895 SET_IA32_ORIG_NODE(new_node, node);
3897 panic("generic Bound not supported in ia32 Backend");
3903 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3905 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3906 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3908 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3909 match_immediate | match_mode_neutral);
3912 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3914 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3915 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3916 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3920 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3922 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3923 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3924 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3928 static ir_node *gen_ia32_l_Add(ir_node *node)
3930 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3931 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3932 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3933 match_commutative | match_am | match_immediate |
3934 match_mode_neutral);
3936 if (is_Proj(lowered)) {
3937 lowered = get_Proj_pred(lowered);
3939 assert(is_ia32_Add(lowered));
3940 set_irn_mode(lowered, mode_T);
3946 static ir_node *gen_ia32_l_Adc(ir_node *node)
3948 return gen_binop_flags(node, new_bd_ia32_Adc,
3949 match_commutative | match_am | match_immediate |
3950 match_mode_neutral);
3954 * Transforms a l_MulS into a "real" MulS node.
3956 * @return the created ia32 Mul node
3958 static ir_node *gen_ia32_l_Mul(ir_node *node)
3960 ir_node *left = get_binop_left(node);
3961 ir_node *right = get_binop_right(node);
3963 return gen_binop(node, left, right, new_bd_ia32_Mul,
3964 match_commutative | match_am | match_mode_neutral);
3968 * Transforms a l_IMulS into a "real" IMul1OPS node.
3970 * @return the created ia32 IMul1OP node
3972 static ir_node *gen_ia32_l_IMul(ir_node *node)
3974 ir_node *left = get_binop_left(node);
3975 ir_node *right = get_binop_right(node);
3977 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3978 match_commutative | match_am | match_mode_neutral);
3981 static ir_node *gen_ia32_l_Sub(ir_node *node)
3983 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3984 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3985 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3986 match_am | match_immediate | match_mode_neutral);
3988 if (is_Proj(lowered)) {
3989 lowered = get_Proj_pred(lowered);
3991 assert(is_ia32_Sub(lowered));
3992 set_irn_mode(lowered, mode_T);
3998 static ir_node *gen_ia32_l_Sbb(ir_node *node)
4000 return gen_binop_flags(node, new_bd_ia32_Sbb,
4001 match_am | match_immediate | match_mode_neutral);
4005 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4006 * op1 - target to be shifted
4007 * op2 - contains bits to be shifted into target
4009 * Only op3 can be an immediate.
4011 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4012 ir_node *low, ir_node *count)
4014 ir_node *block = get_nodes_block(node);
4015 ir_node *new_block = be_transform_node(block);
4016 dbg_info *dbgi = get_irn_dbg_info(node);
4017 ir_node *new_high = be_transform_node(high);
4018 ir_node *new_low = be_transform_node(low);
4022 /* the shift amount can be any mode that is bigger than 5 bits, since all
4023 * other bits are ignored anyway */
4024 while (is_Conv(count) &&
4025 get_irn_n_edges(count) == 1 &&
4026 mode_is_int(get_irn_mode(count))) {
4027 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4028 count = get_Conv_op(count);
4030 new_count = create_immediate_or_transform(count, 0);
4032 if (is_ia32_l_ShlD(node)) {
4033 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4036 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4039 SET_IA32_ORIG_NODE(new_node, node);
4044 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4046 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4047 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4048 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4049 return gen_lowered_64bit_shifts(node, high, low, count);
4052 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4054 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4055 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4056 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4057 return gen_lowered_64bit_shifts(node, high, low, count);
4060 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4062 ir_node *src_block = get_nodes_block(node);
4063 ir_node *block = be_transform_node(src_block);
4064 ir_graph *irg = current_ir_graph;
4065 dbg_info *dbgi = get_irn_dbg_info(node);
4066 ir_node *frame = get_irg_frame(irg);
4067 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4068 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4069 ir_node *new_val_low = be_transform_node(val_low);
4070 ir_node *new_val_high = be_transform_node(val_high);
4072 ir_node *sync, *fild, *res;
4073 ir_node *store_low, *store_high;
4075 if (ia32_cg_config.use_sse2) {
4076 panic("ia32_l_LLtoFloat not implemented for SSE2");
4080 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4082 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4084 SET_IA32_ORIG_NODE(store_low, node);
4085 SET_IA32_ORIG_NODE(store_high, node);
4087 set_ia32_use_frame(store_low);
4088 set_ia32_use_frame(store_high);
4089 set_ia32_op_type(store_low, ia32_AddrModeD);
4090 set_ia32_op_type(store_high, ia32_AddrModeD);
4091 set_ia32_ls_mode(store_low, mode_Iu);
4092 set_ia32_ls_mode(store_high, mode_Is);
4093 add_ia32_am_offs_int(store_high, 4);
4097 sync = new_rd_Sync(dbgi, block, 2, in);
4100 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4102 set_ia32_use_frame(fild);
4103 set_ia32_op_type(fild, ia32_AddrModeS);
4104 set_ia32_ls_mode(fild, mode_Ls);
4106 SET_IA32_ORIG_NODE(fild, node);
4108 res = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
4110 if (! mode_is_signed(get_irn_mode(val_high))) {
4111 ia32_address_mode_t am;
4113 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4116 am.addr.base = noreg_GP;
4117 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4118 am.addr.mem = nomem;
4121 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4122 am.addr.use_frame = 0;
4123 am.addr.frame_entity = NULL;
4124 am.addr.symconst_sign = 0;
4125 am.ls_mode = mode_F;
4126 am.mem_proj = nomem;
4127 am.op_type = ia32_AddrModeS;
4129 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4130 am.pinned = op_pin_state_floats;
4132 am.ins_permuted = 0;
4134 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4135 am.new_op1, am.new_op2, get_fpcw());
4136 set_am_attributes(fadd, &am);
4138 set_irn_mode(fadd, mode_T);
4139 res = new_rd_Proj(NULL, block, fadd, mode_vfp, pn_ia32_res);
4144 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4146 ir_node *src_block = get_nodes_block(node);
4147 ir_node *block = be_transform_node(src_block);
4148 ir_graph *irg = get_Block_irg(block);
4149 dbg_info *dbgi = get_irn_dbg_info(node);
4150 ir_node *frame = get_irg_frame(irg);
4151 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4152 ir_node *new_val = be_transform_node(val);
4153 ir_node *fist, *mem;
4155 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4156 SET_IA32_ORIG_NODE(fist, node);
4157 set_ia32_use_frame(fist);
4158 set_ia32_op_type(fist, ia32_AddrModeD);
4159 set_ia32_ls_mode(fist, mode_Ls);
4165 * the BAD transformer.
4167 static ir_node *bad_transform(ir_node *node)
4169 panic("No transform function for %+F available.", node);
4173 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4175 ir_node *block = be_transform_node(get_nodes_block(node));
4176 ir_graph *irg = get_Block_irg(block);
4177 ir_node *pred = get_Proj_pred(node);
4178 ir_node *new_pred = be_transform_node(pred);
4179 ir_node *frame = get_irg_frame(irg);
4180 dbg_info *dbgi = get_irn_dbg_info(node);
4181 long pn = get_Proj_proj(node);
4186 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4187 SET_IA32_ORIG_NODE(load, node);
4188 set_ia32_use_frame(load);
4189 set_ia32_op_type(load, ia32_AddrModeS);
4190 set_ia32_ls_mode(load, mode_Iu);
4191 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4192 * 32 bit from it with this particular load */
4193 attr = get_ia32_attr(load);
4194 attr->data.need_64bit_stackent = 1;
4196 if (pn == pn_ia32_l_FloattoLL_res_high) {
4197 add_ia32_am_offs_int(load, 4);
4199 assert(pn == pn_ia32_l_FloattoLL_res_low);
4202 proj = new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4208 * Transform the Projs of an AddSP.
4210 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4212 ir_node *block = be_transform_node(get_nodes_block(node));
4213 ir_node *pred = get_Proj_pred(node);
4214 ir_node *new_pred = be_transform_node(pred);
4215 dbg_info *dbgi = get_irn_dbg_info(node);
4216 long proj = get_Proj_proj(node);
4218 if (proj == pn_be_AddSP_sp) {
4219 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4220 pn_ia32_SubSP_stack);
4221 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4223 } else if (proj == pn_be_AddSP_res) {
4224 return new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4225 pn_ia32_SubSP_addr);
4226 } else if (proj == pn_be_AddSP_M) {
4227 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_SubSP_M);
4230 panic("No idea how to transform proj->AddSP");
4234 * Transform the Projs of a SubSP.
4236 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4238 ir_node *block = be_transform_node(get_nodes_block(node));
4239 ir_node *pred = get_Proj_pred(node);
4240 ir_node *new_pred = be_transform_node(pred);
4241 dbg_info *dbgi = get_irn_dbg_info(node);
4242 long proj = get_Proj_proj(node);
4244 if (proj == pn_be_SubSP_sp) {
4245 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4246 pn_ia32_AddSP_stack);
4247 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4249 } else if (proj == pn_be_SubSP_M) {
4250 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_AddSP_M);
4253 panic("No idea how to transform proj->SubSP");
4257 * Transform and renumber the Projs from a Load.
4259 static ir_node *gen_Proj_Load(ir_node *node)
4262 ir_node *block = be_transform_node(get_nodes_block(node));
4263 ir_node *pred = get_Proj_pred(node);
4264 dbg_info *dbgi = get_irn_dbg_info(node);
4265 long proj = get_Proj_proj(node);
4267 /* loads might be part of source address mode matches, so we don't
4268 * transform the ProjMs yet (with the exception of loads whose result is
4271 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4273 ir_node *old_block = get_nodes_block(node);
4275 /* this is needed, because sometimes we have loops that are only
4276 reachable through the ProjM */
4277 be_enqueue_preds(node);
4278 /* do it in 2 steps, to silence firm verifier */
4279 res = new_rd_Proj(dbgi, old_block, pred, mode_M, pn_Load_M);
4280 set_Proj_proj(res, pn_ia32_mem);
4284 /* renumber the proj */
4285 new_pred = be_transform_node(pred);
4286 if (is_ia32_Load(new_pred)) {
4289 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Load_res);
4291 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Load_M);
4292 case pn_Load_X_regular:
4293 return new_rd_Jmp(dbgi, block);
4294 case pn_Load_X_except:
4295 /* This Load might raise an exception. Mark it. */
4296 set_ia32_exc_label(new_pred, 1);
4297 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4301 } else if (is_ia32_Conv_I2I(new_pred) ||
4302 is_ia32_Conv_I2I8Bit(new_pred)) {
4303 set_irn_mode(new_pred, mode_T);
4304 if (proj == pn_Load_res) {
4305 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_res);
4306 } else if (proj == pn_Load_M) {
4307 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_mem);
4309 } else if (is_ia32_xLoad(new_pred)) {
4312 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4314 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xLoad_M);
4315 case pn_Load_X_regular:
4316 return new_rd_Jmp(dbgi, block);
4317 case pn_Load_X_except:
4318 /* This Load might raise an exception. Mark it. */
4319 set_ia32_exc_label(new_pred, 1);
4320 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4324 } else if (is_ia32_vfld(new_pred)) {
4327 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4329 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfld_M);
4330 case pn_Load_X_regular:
4331 return new_rd_Jmp(dbgi, block);
4332 case pn_Load_X_except:
4333 /* This Load might raise an exception. Mark it. */
4334 set_ia32_exc_label(new_pred, 1);
4335 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
4340 /* can happen for ProJMs when source address mode happened for the
4343 /* however it should not be the result proj, as that would mean the
4344 load had multiple users and should not have been used for
4346 if (proj != pn_Load_M) {
4347 panic("internal error: transformed node not a Load");
4349 return new_rd_Proj(dbgi, block, new_pred, mode_M, 1);
4352 panic("No idea how to transform proj");
4356 * Transform and renumber the Projs from a DivMod like instruction.
4358 static ir_node *gen_Proj_DivMod(ir_node *node)
4360 ir_node *block = be_transform_node(get_nodes_block(node));
4361 ir_node *pred = get_Proj_pred(node);
4362 ir_node *new_pred = be_transform_node(pred);
4363 dbg_info *dbgi = get_irn_dbg_info(node);
4364 long proj = get_Proj_proj(node);
4366 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4368 switch (get_irn_opcode(pred)) {
4372 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4374 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4375 case pn_Div_X_regular:
4376 return new_rd_Jmp(dbgi, block);
4377 case pn_Div_X_except:
4378 set_ia32_exc_label(new_pred, 1);
4379 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4387 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4389 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4390 case pn_Mod_X_except:
4391 set_ia32_exc_label(new_pred, 1);
4392 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4400 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4401 case pn_DivMod_res_div:
4402 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4403 case pn_DivMod_res_mod:
4404 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4405 case pn_DivMod_X_regular:
4406 return new_rd_Jmp(dbgi, block);
4407 case pn_DivMod_X_except:
4408 set_ia32_exc_label(new_pred, 1);
4409 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4418 panic("No idea how to transform proj->DivMod");
4422 * Transform and renumber the Projs from a CopyB.
4424 static ir_node *gen_Proj_CopyB(ir_node *node)
4426 ir_node *block = be_transform_node(get_nodes_block(node));
4427 ir_node *pred = get_Proj_pred(node);
4428 ir_node *new_pred = be_transform_node(pred);
4429 dbg_info *dbgi = get_irn_dbg_info(node);
4430 long proj = get_Proj_proj(node);
4433 case pn_CopyB_M_regular:
4434 if (is_ia32_CopyB_i(new_pred)) {
4435 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4436 } else if (is_ia32_CopyB(new_pred)) {
4437 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_M);
4444 panic("No idea how to transform proj->CopyB");
4448 * Transform and renumber the Projs from a Quot.
4450 static ir_node *gen_Proj_Quot(ir_node *node)
4452 ir_node *block = be_transform_node(get_nodes_block(node));
4453 ir_node *pred = get_Proj_pred(node);
4454 ir_node *new_pred = be_transform_node(pred);
4455 dbg_info *dbgi = get_irn_dbg_info(node);
4456 long proj = get_Proj_proj(node);
4460 if (is_ia32_xDiv(new_pred)) {
4461 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xDiv_M);
4462 } else if (is_ia32_vfdiv(new_pred)) {
4463 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4467 if (is_ia32_xDiv(new_pred)) {
4468 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4469 } else if (is_ia32_vfdiv(new_pred)) {
4470 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4473 case pn_Quot_X_regular:
4474 case pn_Quot_X_except:
4479 panic("No idea how to transform proj->Quot");
4482 static ir_node *gen_be_Call(ir_node *node)
4484 dbg_info *const dbgi = get_irn_dbg_info(node);
4485 ir_node *const src_block = get_nodes_block(node);
4486 ir_node *const block = be_transform_node(src_block);
4487 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4488 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4489 ir_node *const sp = be_transform_node(src_sp);
4490 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4491 ia32_address_mode_t am;
4492 ia32_address_t *const addr = &am.addr;
4497 ir_node * eax = noreg_GP;
4498 ir_node * ecx = noreg_GP;
4499 ir_node * edx = noreg_GP;
4500 unsigned const pop = be_Call_get_pop(node);
4501 ir_type *const call_tp = be_Call_get_type(node);
4502 int old_no_pic_adjust;
4504 /* Run the x87 simulator if the call returns a float value */
4505 if (get_method_n_ress(call_tp) > 0) {
4506 ir_type *const res_type = get_method_res_type(call_tp, 0);
4507 ir_mode *const res_mode = get_type_mode(res_type);
4509 if (res_mode != NULL && mode_is_float(res_mode)) {
4510 env_cg->do_x87_sim = 1;
4514 /* We do not want be_Call direct calls */
4515 assert(be_Call_get_entity(node) == NULL);
4517 /* special case for PIC trampoline calls */
4518 old_no_pic_adjust = no_pic_adjust;
4519 no_pic_adjust = env_cg->birg->main_env->options->pic;
4521 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4522 match_am | match_immediate);
4524 no_pic_adjust = old_no_pic_adjust;
4526 i = get_irn_arity(node) - 1;
4527 fpcw = be_transform_node(get_irn_n(node, i--));
4528 for (; i >= be_pos_Call_first_arg; --i) {
4529 arch_register_req_t const *const req = arch_get_register_req(node, i);
4530 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4532 assert(req->type == arch_register_req_type_limited);
4533 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4535 switch (*req->limited) {
4536 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4537 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4538 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4539 default: panic("Invalid GP register for register parameter");
4543 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4544 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4545 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4546 set_am_attributes(call, &am);
4547 call = fix_mem_proj(call, &am);
4549 if (get_irn_pinned(node) == op_pin_state_pinned)
4550 set_irn_pinned(call, op_pin_state_pinned);
4552 SET_IA32_ORIG_NODE(call, node);
4554 if (ia32_cg_config.use_sse2) {
4555 /* remember this call for post-processing */
4556 ARR_APP1(ir_node *, call_list, call);
4557 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4564 * Transform Builtin trap
4566 static ir_node *gen_trap(ir_node *node) {
4567 dbg_info *dbgi = get_irn_dbg_info(node);
4568 ir_node *block = be_transform_node(get_nodes_block(node));
4569 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4571 return new_bd_ia32_UD2(dbgi, block, mem);
4575 * Transform Builtin debugbreak
4577 static ir_node *gen_debugbreak(ir_node *node) {
4578 dbg_info *dbgi = get_irn_dbg_info(node);
4579 ir_node *block = be_transform_node(get_nodes_block(node));
4580 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4582 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4586 * Transform Builtin return_address
4588 static ir_node *gen_return_address(ir_node *node) {
4589 ir_node *param = get_Builtin_param(node, 0);
4590 ir_node *frame = get_Builtin_param(node, 1);
4591 dbg_info *dbgi = get_irn_dbg_info(node);
4592 tarval *tv = get_Const_tarval(param);
4593 unsigned long value = get_tarval_long(tv);
4595 ir_node *block = be_transform_node(get_nodes_block(node));
4596 ir_node *ptr = be_transform_node(frame);
4600 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4601 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4602 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4605 /* load the return address from this frame */
4606 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4608 set_irn_pinned(load, get_irn_pinned(node));
4609 set_ia32_op_type(load, ia32_AddrModeS);
4610 set_ia32_ls_mode(load, mode_Iu);
4612 set_ia32_am_offs_int(load, 0);
4613 set_ia32_use_frame(load);
4614 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4616 if (get_irn_pinned(node) == op_pin_state_floats) {
4617 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4618 && pn_ia32_vfld_res == pn_ia32_Load_res
4619 && pn_ia32_Load_res == pn_ia32_res);
4620 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4623 SET_IA32_ORIG_NODE(load, node);
4624 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4628 * Transform Builtin frame_address
4630 static ir_node *gen_frame_address(ir_node *node) {
4631 ir_node *param = get_Builtin_param(node, 0);
4632 ir_node *frame = get_Builtin_param(node, 1);
4633 dbg_info *dbgi = get_irn_dbg_info(node);
4634 tarval *tv = get_Const_tarval(param);
4635 unsigned long value = get_tarval_long(tv);
4637 ir_node *block = be_transform_node(get_nodes_block(node));
4638 ir_node *ptr = be_transform_node(frame);
4643 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4644 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4645 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4648 /* load the frame address from this frame */
4649 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4651 set_irn_pinned(load, get_irn_pinned(node));
4652 set_ia32_op_type(load, ia32_AddrModeS);
4653 set_ia32_ls_mode(load, mode_Iu);
4655 ent = ia32_get_frame_address_entity();
4657 set_ia32_am_offs_int(load, 0);
4658 set_ia32_use_frame(load);
4659 set_ia32_frame_ent(load, ent);
4661 /* will fail anyway, but gcc does this: */
4662 set_ia32_am_offs_int(load, 0);
4665 if (get_irn_pinned(node) == op_pin_state_floats) {
4666 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4667 && pn_ia32_vfld_res == pn_ia32_Load_res
4668 && pn_ia32_Load_res == pn_ia32_res);
4669 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4672 SET_IA32_ORIG_NODE(load, node);
4673 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4677 * Transform Builtin frame_address
4679 static ir_node *gen_prefetch(ir_node *node) {
4681 ir_node *ptr, *block, *mem, *base, *index;
4682 ir_node *param, *new_node;
4685 ia32_address_t addr;
4687 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4688 /* no prefetch at all, route memory */
4689 return be_transform_node(get_Builtin_mem(node));
4692 param = get_Builtin_param(node, 1);
4693 tv = get_Const_tarval(param);
4694 rw = get_tarval_long(tv);
4696 /* construct load address */
4697 memset(&addr, 0, sizeof(addr));
4698 ptr = get_Builtin_param(node, 0);
4699 ia32_create_address_mode(&addr, ptr, 0);
4706 base = be_transform_node(base);
4709 if (index == NULL) {
4712 index = be_transform_node(index);
4715 dbgi = get_irn_dbg_info(node);
4716 block = be_transform_node(get_nodes_block(node));
4717 mem = be_transform_node(get_Builtin_mem(node));
4719 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4720 /* we have 3DNow!, this was already checked above */
4721 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4722 } else if (ia32_cg_config.use_sse_prefetch) {
4723 /* note: rw == 1 is IGNORED in that case */
4724 param = get_Builtin_param(node, 2);
4725 tv = get_Const_tarval(param);
4726 locality = get_tarval_long(tv);
4728 /* SSE style prefetch */
4731 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4734 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4737 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4740 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4744 assert(ia32_cg_config.use_3dnow_prefetch);
4745 /* 3DNow! style prefetch */
4746 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4749 set_irn_pinned(new_node, get_irn_pinned(node));
4750 set_ia32_op_type(new_node, ia32_AddrModeS);
4751 set_ia32_ls_mode(new_node, mode_Bu);
4752 set_address(new_node, &addr);
4754 SET_IA32_ORIG_NODE(new_node, node);
4756 be_dep_on_frame(new_node);
4757 return new_r_Proj(block, new_node, mode_M, pn_ia32_Prefetch_M);
4761 * Transform bsf like node
4763 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4765 ir_node *param = get_Builtin_param(node, 0);
4766 dbg_info *dbgi = get_irn_dbg_info(node);
4768 ir_node *block = get_nodes_block(node);
4769 ir_node *new_block = be_transform_node(block);
4771 ia32_address_mode_t am;
4772 ia32_address_t *addr = &am.addr;
4775 match_arguments(&am, block, NULL, param, NULL, match_am);
4777 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4778 set_am_attributes(cnt, &am);
4779 set_ia32_ls_mode(cnt, get_irn_mode(param));
4781 SET_IA32_ORIG_NODE(cnt, node);
4782 return fix_mem_proj(cnt, &am);
4786 * Transform builtin ffs.
4788 static ir_node *gen_ffs(ir_node *node)
4790 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4791 ir_node *real = skip_Proj(bsf);
4792 dbg_info *dbgi = get_irn_dbg_info(real);
4793 ir_node *block = get_nodes_block(real);
4794 ir_node *flag, *set, *conv, *neg, *or;
4797 if (get_irn_mode(real) != mode_T) {
4798 set_irn_mode(real, mode_T);
4799 bsf = new_r_Proj(block, real, mode_Iu, pn_ia32_res);
4802 flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
4805 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
4806 SET_IA32_ORIG_NODE(set, node);
4809 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4810 SET_IA32_ORIG_NODE(conv, node);
4813 neg = new_bd_ia32_Neg(dbgi, block, conv);
4816 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4817 set_ia32_commutative(or);
4820 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4824 * Transform builtin clz.
4826 static ir_node *gen_clz(ir_node *node)
4828 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4829 ir_node *real = skip_Proj(bsr);
4830 dbg_info *dbgi = get_irn_dbg_info(real);
4831 ir_node *block = get_nodes_block(real);
4832 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4834 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4838 * Transform builtin ctz.
4840 static ir_node *gen_ctz(ir_node *node)
4842 return gen_unop_AM(node, new_bd_ia32_Bsf);
4846 * Transform builtin parity.
4848 static ir_node *gen_parity(ir_node *node)
4850 ir_node *param = get_Builtin_param(node, 0);
4851 dbg_info *dbgi = get_irn_dbg_info(node);
4853 ir_node *block = get_nodes_block(node);
4855 ir_node *new_block = be_transform_node(block);
4856 ir_node *imm, *cmp, *new_node;
4858 ia32_address_mode_t am;
4859 ia32_address_t *addr = &am.addr;
4863 match_arguments(&am, block, NULL, param, NULL, match_am);
4864 imm = ia32_create_Immediate(NULL, 0, 0);
4865 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4866 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4867 set_am_attributes(cmp, &am);
4868 set_ia32_ls_mode(cmp, mode_Iu);
4870 SET_IA32_ORIG_NODE(cmp, node);
4872 cmp = fix_mem_proj(cmp, &am);
4875 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
4876 SET_IA32_ORIG_NODE(new_node, node);
4879 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4880 nomem, new_node, mode_Bu);
4881 SET_IA32_ORIG_NODE(new_node, node);
4886 * Transform builtin popcount
4888 static ir_node *gen_popcount(ir_node *node) {
4889 ir_node *param = get_Builtin_param(node, 0);
4890 dbg_info *dbgi = get_irn_dbg_info(node);
4892 ir_node *block = get_nodes_block(node);
4893 ir_node *new_block = be_transform_node(block);
4896 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4898 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4899 if (ia32_cg_config.use_popcnt) {
4900 ia32_address_mode_t am;
4901 ia32_address_t *addr = &am.addr;
4904 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4906 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4907 set_am_attributes(cnt, &am);
4908 set_ia32_ls_mode(cnt, get_irn_mode(param));
4910 SET_IA32_ORIG_NODE(cnt, node);
4911 return fix_mem_proj(cnt, &am);
4914 new_param = be_transform_node(param);
4916 /* do the standard popcount algo */
4918 /* m1 = x & 0x55555555 */
4919 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4920 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4923 simm = ia32_create_Immediate(NULL, 0, 1);
4924 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4926 /* m2 = s1 & 0x55555555 */
4927 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4930 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4932 /* m4 = m3 & 0x33333333 */
4933 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4934 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4937 simm = ia32_create_Immediate(NULL, 0, 2);
4938 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4940 /* m5 = s2 & 0x33333333 */
4941 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4944 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4946 /* m7 = m6 & 0x0F0F0F0F */
4947 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4948 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4951 simm = ia32_create_Immediate(NULL, 0, 4);
4952 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4954 /* m8 = s3 & 0x0F0F0F0F */
4955 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4958 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4960 /* m10 = m9 & 0x00FF00FF */
4961 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4962 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4965 simm = ia32_create_Immediate(NULL, 0, 8);
4966 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4968 /* m11 = s4 & 0x00FF00FF */
4969 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4971 /* m12 = m10 + m11 */
4972 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4974 /* m13 = m12 & 0x0000FFFF */
4975 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4976 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4978 /* s5 = m12 >> 16 */
4979 simm = ia32_create_Immediate(NULL, 0, 16);
4980 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4982 /* res = m13 + s5 */
4983 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4987 * Transform builtin byte swap.
4989 static ir_node *gen_bswap(ir_node *node) {
4990 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4991 dbg_info *dbgi = get_irn_dbg_info(node);
4993 ir_node *block = get_nodes_block(node);
4994 ir_node *new_block = be_transform_node(block);
4995 ir_mode *mode = get_irn_mode(param);
4996 unsigned size = get_mode_size_bits(mode);
4997 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5001 if (ia32_cg_config.use_i486) {
5002 /* swap available */
5003 return new_bd_ia32_Bswap(dbgi, new_block, param);
5005 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5006 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5008 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5009 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5011 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5013 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5014 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5016 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5017 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5020 /* swap16 always available */
5021 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5024 panic("Invalid bswap size (%d)", size);
5029 * Transform builtin outport.
5031 static ir_node *gen_outport(ir_node *node) {
5032 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5033 ir_node *oldv = get_Builtin_param(node, 1);
5034 ir_mode *mode = get_irn_mode(oldv);
5035 ir_node *value = be_transform_node(oldv);
5036 ir_node *block = be_transform_node(get_nodes_block(node));
5037 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5038 dbg_info *dbgi = get_irn_dbg_info(node);
5040 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5041 set_ia32_ls_mode(res, mode);
5046 * Transform builtin inport.
5048 static ir_node *gen_inport(ir_node *node) {
5049 ir_type *tp = get_Builtin_type(node);
5050 ir_type *rstp = get_method_res_type(tp, 0);
5051 ir_mode *mode = get_type_mode(rstp);
5052 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5053 ir_node *block = be_transform_node(get_nodes_block(node));
5054 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5055 dbg_info *dbgi = get_irn_dbg_info(node);
5057 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5058 set_ia32_ls_mode(res, mode);
5060 /* check for missing Result Proj */
5065 * Transform a builtin inner trampoline
5067 static ir_node *gen_inner_trampoline(ir_node *node) {
5068 ir_node *ptr = get_Builtin_param(node, 0);
5069 ir_node *callee = get_Builtin_param(node, 1);
5070 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5071 ir_node *mem = get_Builtin_mem(node);
5072 ir_node *block = get_nodes_block(node);
5073 ir_node *new_block = be_transform_node(block);
5077 ir_node *trampoline;
5079 dbg_info *dbgi = get_irn_dbg_info(node);
5080 ia32_address_t addr;
5082 /* construct store address */
5083 memset(&addr, 0, sizeof(addr));
5084 ia32_create_address_mode(&addr, ptr, 0);
5086 if (addr.base == NULL) {
5087 addr.base = noreg_GP;
5089 addr.base = be_transform_node(addr.base);
5092 if (addr.index == NULL) {
5093 addr.index = noreg_GP;
5095 addr.index = be_transform_node(addr.index);
5097 addr.mem = be_transform_node(mem);
5099 /* mov ecx, <env> */
5100 val = ia32_create_Immediate(NULL, 0, 0xB9);
5101 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5102 addr.index, addr.mem, val);
5103 set_irn_pinned(store, get_irn_pinned(node));
5104 set_ia32_op_type(store, ia32_AddrModeD);
5105 set_ia32_ls_mode(store, mode_Bu);
5106 set_address(store, &addr);
5110 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5111 addr.index, addr.mem, env);
5112 set_irn_pinned(store, get_irn_pinned(node));
5113 set_ia32_op_type(store, ia32_AddrModeD);
5114 set_ia32_ls_mode(store, mode_Iu);
5115 set_address(store, &addr);
5119 /* jmp rel <callee> */
5120 val = ia32_create_Immediate(NULL, 0, 0xE9);
5121 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5122 addr.index, addr.mem, val);
5123 set_irn_pinned(store, get_irn_pinned(node));
5124 set_ia32_op_type(store, ia32_AddrModeD);
5125 set_ia32_ls_mode(store, mode_Bu);
5126 set_address(store, &addr);
5130 trampoline = be_transform_node(ptr);
5132 /* the callee is typically an immediate */
5133 if (is_SymConst(callee)) {
5134 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5136 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5138 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5140 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5141 addr.index, addr.mem, rel);
5142 set_irn_pinned(store, get_irn_pinned(node));
5143 set_ia32_op_type(store, ia32_AddrModeD);
5144 set_ia32_ls_mode(store, mode_Iu);
5145 set_address(store, &addr);
5150 return new_r_Tuple(new_block, 2, in);
5154 * Transform Builtin node.
5156 static ir_node *gen_Builtin(ir_node *node) {
5157 ir_builtin_kind kind = get_Builtin_kind(node);
5161 return gen_trap(node);
5162 case ir_bk_debugbreak:
5163 return gen_debugbreak(node);
5164 case ir_bk_return_address:
5165 return gen_return_address(node);
5166 case ir_bk_frame_address:
5167 return gen_frame_address(node);
5168 case ir_bk_prefetch:
5169 return gen_prefetch(node);
5171 return gen_ffs(node);
5173 return gen_clz(node);
5175 return gen_ctz(node);
5177 return gen_parity(node);
5178 case ir_bk_popcount:
5179 return gen_popcount(node);
5181 return gen_bswap(node);
5183 return gen_outport(node);
5185 return gen_inport(node);
5186 case ir_bk_inner_trampoline:
5187 return gen_inner_trampoline(node);
5189 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5193 * Transform Proj(Builtin) node.
5195 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5196 ir_node *node = get_Proj_pred(proj);
5197 ir_node *new_node = be_transform_node(node);
5198 ir_builtin_kind kind = get_Builtin_kind(node);
5201 case ir_bk_return_address:
5202 case ir_bk_frame_address:
5207 case ir_bk_popcount:
5209 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5212 case ir_bk_debugbreak:
5213 case ir_bk_prefetch:
5215 assert(get_Proj_proj(proj) == pn_Builtin_M);
5218 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5219 return new_r_Proj(get_nodes_block(new_node),
5220 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5222 assert(get_Proj_proj(proj) == pn_Builtin_M);
5223 return new_r_Proj(get_nodes_block(new_node),
5224 new_node, mode_M, pn_ia32_Inport_M);
5226 case ir_bk_inner_trampoline:
5227 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5228 return get_Tuple_pred(new_node, 1);
5230 assert(get_Proj_proj(proj) == pn_Builtin_M);
5231 return get_Tuple_pred(new_node, 0);
5234 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5237 static ir_node *gen_be_IncSP(ir_node *node)
5239 ir_node *res = be_duplicate_node(node);
5240 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5246 * Transform the Projs from a be_Call.
5248 static ir_node *gen_Proj_be_Call(ir_node *node)
5250 ir_node *block = be_transform_node(get_nodes_block(node));
5251 ir_node *call = get_Proj_pred(node);
5252 ir_node *new_call = be_transform_node(call);
5253 dbg_info *dbgi = get_irn_dbg_info(node);
5254 long proj = get_Proj_proj(node);
5255 ir_mode *mode = get_irn_mode(node);
5258 if (proj == pn_be_Call_M_regular) {
5259 return new_rd_Proj(dbgi, block, new_call, mode_M, n_ia32_Call_mem);
5261 /* transform call modes */
5262 if (mode_is_data(mode)) {
5263 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5267 /* Map from be_Call to ia32_Call proj number */
5268 if (proj == pn_be_Call_sp) {
5269 proj = pn_ia32_Call_stack;
5270 } else if (proj == pn_be_Call_M_regular) {
5271 proj = pn_ia32_Call_M;
5273 arch_register_req_t const *const req = arch_get_register_req_out(node);
5274 int const n_outs = arch_irn_get_n_outs(new_call);
5277 assert(proj >= pn_be_Call_first_res);
5278 assert(req->type & arch_register_req_type_limited);
5280 for (i = 0; i < n_outs; ++i) {
5281 arch_register_req_t const *const new_req
5282 = arch_get_out_register_req(new_call, i);
5284 if (!(new_req->type & arch_register_req_type_limited) ||
5285 new_req->cls != req->cls ||
5286 *new_req->limited != *req->limited)
5295 res = new_rd_Proj(dbgi, block, new_call, mode, proj);
5297 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5299 case pn_ia32_Call_stack:
5300 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5303 case pn_ia32_Call_fpcw:
5304 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5312 * Transform the Projs from a Cmp.
5314 static ir_node *gen_Proj_Cmp(ir_node *node)
5316 /* this probably means not all mode_b nodes were lowered... */
5317 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5322 * Transform the Projs from a Bound.
5324 static ir_node *gen_Proj_Bound(ir_node *node)
5326 ir_node *new_node, *block;
5327 ir_node *pred = get_Proj_pred(node);
5329 switch (get_Proj_proj(node)) {
5331 return be_transform_node(get_Bound_mem(pred));
5332 case pn_Bound_X_regular:
5333 new_node = be_transform_node(pred);
5334 block = get_nodes_block(new_node);
5335 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_true);
5336 case pn_Bound_X_except:
5337 new_node = be_transform_node(pred);
5338 block = get_nodes_block(new_node);
5339 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_false);
5341 return be_transform_node(get_Bound_index(pred));
5343 panic("unsupported Proj from Bound");
5347 static ir_node *gen_Proj_ASM(ir_node *node)
5349 ir_mode *mode = get_irn_mode(node);
5350 ir_node *pred = get_Proj_pred(node);
5351 ir_node *new_pred = be_transform_node(pred);
5352 ir_node *block = get_nodes_block(new_pred);
5353 long pos = get_Proj_proj(node);
5355 if (mode == mode_M) {
5356 pos = arch_irn_get_n_outs(new_pred)-1;
5357 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5359 } else if (mode_is_float(mode)) {
5362 panic("unexpected proj mode at ASM");
5365 return new_r_Proj(block, new_pred, mode, pos);
5369 * Transform and potentially renumber Proj nodes.
5371 static ir_node *gen_Proj(ir_node *node)
5373 ir_node *pred = get_Proj_pred(node);
5376 switch (get_irn_opcode(pred)) {
5378 proj = get_Proj_proj(node);
5379 if (proj == pn_Store_M) {
5380 return be_transform_node(pred);
5382 panic("No idea how to transform proj->Store");
5385 return gen_Proj_Load(node);
5387 return gen_Proj_ASM(node);
5389 return gen_Proj_Builtin(node);
5393 return gen_Proj_DivMod(node);
5395 return gen_Proj_CopyB(node);
5397 return gen_Proj_Quot(node);
5399 return gen_Proj_be_SubSP(node);
5401 return gen_Proj_be_AddSP(node);
5403 return gen_Proj_be_Call(node);
5405 return gen_Proj_Cmp(node);
5407 return gen_Proj_Bound(node);
5409 proj = get_Proj_proj(node);
5411 case pn_Start_X_initial_exec: {
5412 ir_node *block = get_nodes_block(pred);
5413 ir_node *new_block = be_transform_node(block);
5414 dbg_info *dbgi = get_irn_dbg_info(node);
5415 /* we exchange the ProjX with a jump */
5416 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5421 case pn_Start_P_tls:
5422 return gen_Proj_tls(node);
5427 if (is_ia32_l_FloattoLL(pred)) {
5428 return gen_Proj_l_FloattoLL(node);
5430 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5434 ir_mode *mode = get_irn_mode(node);
5435 if (ia32_mode_needs_gp_reg(mode)) {
5436 ir_node *new_pred = be_transform_node(pred);
5437 ir_node *block = be_transform_node(get_nodes_block(node));
5438 ir_node *new_proj = new_r_Proj(block, new_pred,
5439 mode_Iu, get_Proj_proj(node));
5440 new_proj->node_nr = node->node_nr;
5445 return be_duplicate_node(node);
5449 * Enters all transform functions into the generic pointer
5451 static void register_transformers(void)
5453 /* first clear the generic function pointer for all ops */
5454 clear_irp_opcodes_generic_func();
5456 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5457 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5497 /* transform ops from intrinsic lowering */
5509 GEN(ia32_l_LLtoFloat);
5510 GEN(ia32_l_FloattoLL);
5516 /* we should never see these nodes */
5531 /* handle builtins */
5534 /* handle generic backend nodes */
5548 * Pre-transform all unknown and noreg nodes.
5550 static void ia32_pretransform_node(void)
5552 ia32_code_gen_t *cg = env_cg;
5554 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5555 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5556 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5557 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5558 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5559 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5561 nomem = get_irg_no_mem(current_ir_graph);
5562 noreg_GP = ia32_new_NoReg_gp(cg);
5568 * Walker, checks if all ia32 nodes producing more than one result have their
5569 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5571 static void add_missing_keep_walker(ir_node *node, void *data)
5574 unsigned found_projs = 0;
5575 const ir_edge_t *edge;
5576 ir_mode *mode = get_irn_mode(node);
5581 if (!is_ia32_irn(node))
5584 n_outs = arch_irn_get_n_outs(node);
5587 if (is_ia32_SwitchJmp(node))
5590 assert(n_outs < (int) sizeof(unsigned) * 8);
5591 foreach_out_edge(node, edge) {
5592 ir_node *proj = get_edge_src_irn(edge);
5595 /* The node could be kept */
5599 if (get_irn_mode(proj) == mode_M)
5602 pn = get_Proj_proj(proj);
5603 assert(pn < n_outs);
5604 found_projs |= 1 << pn;
5608 /* are keeps missing? */
5610 for (i = 0; i < n_outs; ++i) {
5613 const arch_register_req_t *req;
5614 const arch_register_class_t *cls;
5616 if (found_projs & (1 << i)) {
5620 req = arch_get_out_register_req(node, i);
5625 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5629 block = get_nodes_block(node);
5630 in[0] = new_r_Proj(block, node, arch_register_class_mode(cls), i);
5631 if (last_keep != NULL) {
5632 be_Keep_add_node(last_keep, cls, in[0]);
5634 last_keep = be_new_Keep(block, 1, in);
5635 if (sched_is_scheduled(node)) {
5636 sched_add_after(node, last_keep);
5643 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5646 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5648 ir_graph *irg = be_get_birg_irg(cg->birg);
5649 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5653 * Post-process all calls if we are in SSE mode.
5654 * The ABI requires that the results are in st0, copy them
5655 * to a xmm register.
5657 static void postprocess_fp_call_results(void) {
5660 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5661 ir_node *call = call_list[i];
5662 ir_type *mtp = call_types[i];
5665 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5666 ir_type *res_tp = get_method_res_type(mtp, j);
5667 ir_node *res, *new_res;
5668 const ir_edge_t *edge, *next;
5671 if (! is_atomic_type(res_tp)) {
5672 /* no floating point return */
5675 mode = get_type_mode(res_tp);
5676 if (! mode_is_float(mode)) {
5677 /* no floating point return */
5681 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5684 /* now patch the users */
5685 foreach_out_edge_safe(res, edge, next) {
5686 ir_node *succ = get_edge_src_irn(edge);
5689 if (be_is_Keep(succ))
5692 if (is_ia32_xStore(succ)) {
5693 /* an xStore can be patched into an vfst */
5694 dbg_info *db = get_irn_dbg_info(succ);
5695 ir_node *block = get_nodes_block(succ);
5696 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5697 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5698 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5699 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5700 ir_mode *mode = get_ia32_ls_mode(succ);
5702 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5703 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5704 if (is_ia32_use_frame(succ))
5705 set_ia32_use_frame(st);
5706 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5707 set_irn_pinned(st, get_irn_pinned(succ));
5708 set_ia32_op_type(st, ia32_AddrModeD);
5712 if (new_res == NULL) {
5713 dbg_info *db = get_irn_dbg_info(call);
5714 ir_node *block = get_nodes_block(call);
5715 ir_node *frame = get_irg_frame(current_ir_graph);
5716 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5717 ir_node *call_mem = new_r_Proj(block, call, mode_M, pn_ia32_Call_M);
5718 ir_node *vfst, *xld, *new_mem;
5720 /* store st(0) on stack */
5721 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5722 set_ia32_op_type(vfst, ia32_AddrModeD);
5723 set_ia32_use_frame(vfst);
5725 /* load into SSE register */
5726 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5727 set_ia32_op_type(xld, ia32_AddrModeS);
5728 set_ia32_use_frame(xld);
5730 new_res = new_r_Proj(block, xld, mode, pn_ia32_xLoad_res);
5731 new_mem = new_r_Proj(block, xld, mode_M, pn_ia32_xLoad_M);
5733 if (old_mem != NULL) {
5734 edges_reroute(old_mem, new_mem, current_ir_graph);
5738 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5745 /* do the transformation */
5746 void ia32_transform_graph(ia32_code_gen_t *cg)
5750 register_transformers();
5752 initial_fpcw = NULL;
5755 be_timer_push(T_HEIGHTS);
5756 heights = heights_new(cg->irg);
5757 be_timer_pop(T_HEIGHTS);
5758 ia32_calculate_non_address_mode_nodes(cg->birg);
5760 /* the transform phase is not safe for CSE (yet) because several nodes get
5761 * attributes set after their creation */
5762 cse_last = get_opt_cse();
5765 call_list = NEW_ARR_F(ir_node *, 0);
5766 call_types = NEW_ARR_F(ir_type *, 0);
5767 be_transform_graph(cg->birg, ia32_pretransform_node);
5769 if (ia32_cg_config.use_sse2)
5770 postprocess_fp_call_results();
5771 DEL_ARR_F(call_types);
5772 DEL_ARR_F(call_list);
5774 set_opt_cse(cse_last);
5776 ia32_free_non_address_mode_nodes();
5777 heights_free(heights);
5781 void ia32_init_transform(void)
5783 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");