2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
292 floatent = create_float_const_entity(node);
293 /* create_float_const_ent is smart and sometimes creates
295 ls_mode = get_type_mode(get_entity_type(floatent));
297 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
299 set_ia32_op_type(load, ia32_AddrModeS);
300 set_ia32_am_sc(load, floatent);
301 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
302 res = new_r_Proj(block, load, mode_vfp, pn_ia32_vfld_res);
305 #ifdef CONSTRUCT_SSE_CONST
307 #endif /* CONSTRUCT_SSE_CONST */
308 SET_IA32_ORIG_NODE(load, node);
310 be_dep_on_frame(load);
312 } else { /* non-float mode */
314 tarval *tv = get_Const_tarval(node);
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
328 be_dep_on_frame(cnst);
334 * Transforms a SymConst.
336 static ir_node *gen_SymConst(ir_node *node)
338 ir_node *old_block = get_nodes_block(node);
339 ir_node *block = be_transform_node(old_block);
340 dbg_info *dbgi = get_irn_dbg_info(node);
341 ir_mode *mode = get_irn_mode(node);
344 if (mode_is_float(mode)) {
345 if (ia32_cg_config.use_sse2)
346 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if (get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
361 SET_IA32_ORIG_NODE(cnst, node);
363 be_dep_on_frame(cnst);
368 * Create a float type for the given mode and cache it.
370 * @param mode the mode for the float type (might be integer mode for SSE2 types)
371 * @param align alignment
373 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
379 if (mode == mode_Iu) {
380 static ir_type *int_Iu[16] = {NULL, };
382 if (int_Iu[align] == NULL) {
383 int_Iu[align] = tp = new_type_primitive(mode);
384 /* set the specified alignment */
385 set_type_alignment_bytes(tp, align);
387 return int_Iu[align];
388 } else if (mode == mode_Lu) {
389 static ir_type *int_Lu[16] = {NULL, };
391 if (int_Lu[align] == NULL) {
392 int_Lu[align] = tp = new_type_primitive(mode);
393 /* set the specified alignment */
394 set_type_alignment_bytes(tp, align);
396 return int_Lu[align];
397 } else if (mode == mode_F) {
398 static ir_type *float_F[16] = {NULL, };
400 if (float_F[align] == NULL) {
401 float_F[align] = tp = new_type_primitive(mode);
402 /* set the specified alignment */
403 set_type_alignment_bytes(tp, align);
405 return float_F[align];
406 } else if (mode == mode_D) {
407 static ir_type *float_D[16] = {NULL, };
409 if (float_D[align] == NULL) {
410 float_D[align] = tp = new_type_primitive(mode);
411 /* set the specified alignment */
412 set_type_alignment_bytes(tp, align);
414 return float_D[align];
416 static ir_type *float_E[16] = {NULL, };
418 if (float_E[align] == NULL) {
419 float_E[align] = tp = new_type_primitive(mode);
420 /* set the specified alignment */
421 set_type_alignment_bytes(tp, align);
423 return float_E[align];
428 * Create a float[2] array type for the given atomic type.
430 * @param tp the atomic type
432 static ir_type *ia32_create_float_array(ir_type *tp)
434 ir_mode *mode = get_type_mode(tp);
435 unsigned align = get_type_alignment_bytes(tp);
440 if (mode == mode_F) {
441 static ir_type *float_F[16] = {NULL, };
443 if (float_F[align] != NULL)
444 return float_F[align];
445 arr = float_F[align] = new_type_array(1, tp);
446 } else if (mode == mode_D) {
447 static ir_type *float_D[16] = {NULL, };
449 if (float_D[align] != NULL)
450 return float_D[align];
451 arr = float_D[align] = new_type_array(1, tp);
453 static ir_type *float_E[16] = {NULL, };
455 if (float_E[align] != NULL)
456 return float_E[align];
457 arr = float_E[align] = new_type_array(1, tp);
459 set_type_alignment_bytes(arr, align);
460 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
461 set_type_state(arr, layout_fixed);
465 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
466 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
468 static const struct {
469 const char *ent_name;
470 const char *cnst_str;
473 } names [ia32_known_const_max] = {
474 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
475 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
476 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
477 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
478 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
480 static ir_entity *ent_cache[ia32_known_const_max];
482 const char *ent_name, *cnst_str;
488 ent_name = names[kct].ent_name;
489 if (! ent_cache[kct]) {
490 cnst_str = names[kct].cnst_str;
492 switch (names[kct].mode) {
493 case 0: mode = mode_Iu; break;
494 case 1: mode = mode_Lu; break;
495 default: mode = mode_F; break;
497 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
498 tp = ia32_create_float_type(mode, names[kct].align);
500 if (kct == ia32_ULLBIAS)
501 tp = ia32_create_float_array(tp);
502 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
504 set_entity_ld_ident(ent, get_entity_ident(ent));
505 set_entity_visibility(ent, visibility_local);
506 set_entity_variability(ent, variability_constant);
507 set_entity_allocation(ent, allocation_static);
509 if (kct == ia32_ULLBIAS) {
510 ir_initializer_t *initializer = create_initializer_compound(2);
512 set_initializer_compound_value(initializer, 0,
513 create_initializer_tarval(get_tarval_null(mode)));
514 set_initializer_compound_value(initializer, 1,
515 create_initializer_tarval(tv));
517 set_entity_initializer(ent, initializer);
519 set_entity_initializer(ent, create_initializer_tarval(tv));
522 /* cache the entry */
523 ent_cache[kct] = ent;
526 return ent_cache[kct];
530 * return true if the node is a Proj(Load) and could be used in source address
531 * mode for another node. Will return only true if the @p other node is not
532 * dependent on the memory of the Load (for binary operations use the other
533 * input here, for unary operations use NULL).
535 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
536 ir_node *other, ir_node *other2, match_flags_t flags)
541 /* float constants are always available */
542 if (is_Const(node)) {
543 ir_mode *mode = get_irn_mode(node);
544 if (mode_is_float(mode)) {
545 if (ia32_cg_config.use_sse2) {
546 if (is_simple_sse_Const(node))
549 if (is_simple_x87_Const(node))
552 if (get_irn_n_edges(node) > 1)
560 load = get_Proj_pred(node);
561 pn = get_Proj_proj(node);
562 if (!is_Load(load) || pn != pn_Load_res)
564 if (get_nodes_block(load) != block)
566 /* we only use address mode if we're the only user of the load */
567 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
569 /* in some edge cases with address mode we might reach the load normally
570 * and through some AM sequence, if it is already materialized then we
571 * can't create an AM node from it */
572 if (be_is_transformed(node))
575 /* don't do AM if other node inputs depend on the load (via mem-proj) */
576 if (other != NULL && prevents_AM(block, load, other))
579 if (other2 != NULL && prevents_AM(block, load, other2))
585 typedef struct ia32_address_mode_t ia32_address_mode_t;
586 struct ia32_address_mode_t {
591 ia32_op_type_t op_type;
595 unsigned commutative : 1;
596 unsigned ins_permuted : 1;
599 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
601 /* construct load address */
602 memset(addr, 0, sizeof(addr[0]));
603 ia32_create_address_mode(addr, ptr, 0);
605 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
606 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
607 addr->mem = be_transform_node(mem);
610 static void build_address(ia32_address_mode_t *am, ir_node *node,
611 ia32_create_am_flags_t flags)
613 ia32_address_t *addr = &am->addr;
619 if (is_Const(node)) {
620 ir_entity *entity = create_float_const_entity(node);
621 addr->base = noreg_GP;
622 addr->index = noreg_GP;
624 addr->symconst_ent = entity;
626 am->ls_mode = get_type_mode(get_entity_type(entity));
627 am->pinned = op_pin_state_floats;
631 load = get_Proj_pred(node);
632 ptr = get_Load_ptr(load);
633 mem = get_Load_mem(load);
634 new_mem = be_transform_node(mem);
635 am->pinned = get_irn_pinned(load);
636 am->ls_mode = get_Load_mode(load);
637 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
640 /* construct load address */
641 ia32_create_address_mode(addr, ptr, flags);
643 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
644 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
648 static void set_address(ir_node *node, const ia32_address_t *addr)
650 set_ia32_am_scale(node, addr->scale);
651 set_ia32_am_sc(node, addr->symconst_ent);
652 set_ia32_am_offs_int(node, addr->offset);
653 if (addr->symconst_sign)
654 set_ia32_am_sc_sign(node);
656 set_ia32_use_frame(node);
657 set_ia32_frame_ent(node, addr->frame_entity);
661 * Apply attributes of a given address mode to a node.
663 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
665 set_address(node, &am->addr);
667 set_ia32_op_type(node, am->op_type);
668 set_ia32_ls_mode(node, am->ls_mode);
669 if (am->pinned == op_pin_state_pinned) {
670 /* beware: some nodes are already pinned and did not allow to change the state */
671 if (get_irn_pinned(node) != op_pin_state_pinned)
672 set_irn_pinned(node, op_pin_state_pinned);
675 set_ia32_commutative(node);
679 * Check, if a given node is a Down-Conv, ie. a integer Conv
680 * from a mode with a mode with more bits to a mode with lesser bits.
681 * Moreover, we return only true if the node has not more than 1 user.
683 * @param node the node
684 * @return non-zero if node is a Down-Conv
686 static int is_downconv(const ir_node *node)
694 /* we only want to skip the conv when we're the only user
695 * (not optimal but for now...)
697 if (get_irn_n_edges(node) > 1)
700 src_mode = get_irn_mode(get_Conv_op(node));
701 dest_mode = get_irn_mode(node);
703 ia32_mode_needs_gp_reg(src_mode) &&
704 ia32_mode_needs_gp_reg(dest_mode) &&
705 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
708 /* Skip all Down-Conv's on a given node and return the resulting node. */
709 ir_node *ia32_skip_downconv(ir_node *node)
711 while (is_downconv(node))
712 node = get_Conv_op(node);
717 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
719 ir_mode *mode = get_irn_mode(node);
724 if (mode_is_signed(mode)) {
729 block = get_nodes_block(node);
730 dbgi = get_irn_dbg_info(node);
732 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
736 * matches operands of a node into ia32 addressing/operand modes. This covers
737 * usage of source address mode, immediates, operations with non 32-bit modes,
739 * The resulting data is filled into the @p am struct. block is the block
740 * of the node whose arguments are matched. op1, op2 are the first and second
741 * input that are matched (op1 may be NULL). other_op is another unrelated
742 * input that is not matched! but which is needed sometimes to check if AM
743 * for op1/op2 is legal.
744 * @p flags describes the supported modes of the operation in detail.
746 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
747 ir_node *op1, ir_node *op2, ir_node *other_op,
750 ia32_address_t *addr = &am->addr;
751 ir_mode *mode = get_irn_mode(op2);
752 int mode_bits = get_mode_size_bits(mode);
753 ir_node *new_op1, *new_op2;
755 unsigned commutative;
756 int use_am_and_immediates;
759 memset(am, 0, sizeof(am[0]));
761 commutative = (flags & match_commutative) != 0;
762 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
763 use_am = (flags & match_am) != 0;
764 use_immediate = (flags & match_immediate) != 0;
765 assert(!use_am_and_immediates || use_immediate);
768 assert(!commutative || op1 != NULL);
769 assert(use_am || !(flags & match_8bit_am));
770 assert(use_am || !(flags & match_16bit_am));
772 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
773 (mode_bits == 16 && !(flags & match_16bit_am))) {
777 /* we can simply skip downconvs for mode neutral nodes: the upper bits
778 * can be random for these operations */
779 if (flags & match_mode_neutral) {
780 op2 = ia32_skip_downconv(op2);
782 op1 = ia32_skip_downconv(op1);
786 /* match immediates. firm nodes are normalized: constants are always on the
789 if (!(flags & match_try_am) && use_immediate) {
790 new_op2 = try_create_Immediate(op2, 0);
793 if (new_op2 == NULL &&
794 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
795 build_address(am, op2, 0);
796 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
797 if (mode_is_float(mode)) {
798 new_op2 = ia32_new_NoReg_vfp(env_cg);
802 am->op_type = ia32_AddrModeS;
803 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
805 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
807 build_address(am, op1, 0);
809 if (mode_is_float(mode)) {
810 noreg = ia32_new_NoReg_vfp(env_cg);
815 if (new_op2 != NULL) {
818 new_op1 = be_transform_node(op2);
820 am->ins_permuted = 1;
822 am->op_type = ia32_AddrModeS;
825 am->op_type = ia32_Normal;
827 if (flags & match_try_am) {
833 mode = get_irn_mode(op2);
834 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
835 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
837 new_op2 = create_upconv(op2, NULL);
838 am->ls_mode = mode_Iu;
840 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
842 new_op2 = be_transform_node(op2);
843 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
846 if (addr->base == NULL)
847 addr->base = noreg_GP;
848 if (addr->index == NULL)
849 addr->index = noreg_GP;
850 if (addr->mem == NULL)
853 am->new_op1 = new_op1;
854 am->new_op2 = new_op2;
855 am->commutative = commutative;
859 * "Fixes" a node that uses address mode by turning it into mode_T
860 * and returning a pn_ia32_res Proj.
862 * @param node the node
863 * @param am its address mode
865 * @return a Proj(pn_ia32_res) if a memory address mode is used,
868 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
873 if (am->mem_proj == NULL)
876 /* we have to create a mode_T so the old MemProj can attach to us */
877 mode = get_irn_mode(node);
878 load = get_Proj_pred(am->mem_proj);
880 be_set_transformed_node(load, node);
882 if (mode != mode_T) {
883 set_irn_mode(node, mode_T);
884 return new_rd_Proj(NULL, get_nodes_block(node), node, mode, pn_ia32_res);
891 * Construct a standard binary operation, set AM and immediate if required.
893 * @param node The original node for which the binop is created
894 * @param op1 The first operand
895 * @param op2 The second operand
896 * @param func The node constructor function
897 * @return The constructed ia32 node.
899 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
900 construct_binop_func *func, match_flags_t flags)
903 ir_node *block, *new_block, *new_node;
904 ia32_address_mode_t am;
905 ia32_address_t *addr = &am.addr;
907 block = get_nodes_block(node);
908 match_arguments(&am, block, op1, op2, NULL, flags);
910 dbgi = get_irn_dbg_info(node);
911 new_block = be_transform_node(block);
912 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
913 am.new_op1, am.new_op2);
914 set_am_attributes(new_node, &am);
915 /* we can't use source address mode anymore when using immediates */
916 if (!(flags & match_am_and_immediates) &&
917 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
918 set_ia32_am_support(new_node, ia32_am_none);
919 SET_IA32_ORIG_NODE(new_node, node);
921 new_node = fix_mem_proj(new_node, &am);
927 * Generic names for the inputs of an ia32 binary op.
930 n_ia32_l_binop_left, /**< ia32 left input */
931 n_ia32_l_binop_right, /**< ia32 right input */
932 n_ia32_l_binop_eflags /**< ia32 eflags input */
934 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
935 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
936 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
937 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
938 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
939 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
942 * Construct a binary operation which also consumes the eflags.
944 * @param node The node to transform
945 * @param func The node constructor function
946 * @param flags The match flags
947 * @return The constructor ia32 node
949 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
952 ir_node *src_block = get_nodes_block(node);
953 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
954 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
955 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
957 ir_node *block, *new_node, *new_eflags;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 match_arguments(&am, src_block, op1, op2, eflags, flags);
963 dbgi = get_irn_dbg_info(node);
964 block = be_transform_node(src_block);
965 new_eflags = be_transform_node(eflags);
966 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2, new_eflags);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
980 static ir_node *get_fpcw(void)
983 if (initial_fpcw != NULL)
986 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
987 &ia32_fp_cw_regs[REG_FPCW]);
988 initial_fpcw = be_transform_node(fpcw);
994 * Construct a standard binary operation, set AM and immediate if required.
996 * @param op1 The first operand
997 * @param op2 The second operand
998 * @param func The node constructor function
999 * @return The constructed ia32 node.
1001 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1002 construct_binop_float_func *func)
1004 ir_mode *mode = get_irn_mode(node);
1006 ir_node *block, *new_block, *new_node;
1007 ia32_address_mode_t am;
1008 ia32_address_t *addr = &am.addr;
1009 ia32_x87_attr_t *attr;
1010 /* All operations are considered commutative, because there are reverse
1012 match_flags_t flags = match_commutative;
1014 /* happens for div nodes... */
1016 mode = get_divop_resmod(node);
1018 /* cannot use address mode with long double on x87 */
1019 if (get_mode_size_bits(mode) <= 64)
1022 block = get_nodes_block(node);
1023 match_arguments(&am, block, op1, op2, NULL, flags);
1025 dbgi = get_irn_dbg_info(node);
1026 new_block = be_transform_node(block);
1027 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1028 am.new_op1, am.new_op2, get_fpcw());
1029 set_am_attributes(new_node, &am);
1031 attr = get_ia32_x87_attr(new_node);
1032 attr->attr.data.ins_permuted = am.ins_permuted;
1034 SET_IA32_ORIG_NODE(new_node, node);
1036 new_node = fix_mem_proj(new_node, &am);
1042 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1044 * @param op1 The first operand
1045 * @param op2 The second operand
1046 * @param func The node constructor function
1047 * @return The constructed ia32 node.
1049 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1050 construct_shift_func *func,
1051 match_flags_t flags)
1054 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1056 assert(! mode_is_float(get_irn_mode(node)));
1057 assert(flags & match_immediate);
1058 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1060 if (flags & match_mode_neutral) {
1061 op1 = ia32_skip_downconv(op1);
1062 new_op1 = be_transform_node(op1);
1063 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1064 new_op1 = create_upconv(op1, node);
1066 new_op1 = be_transform_node(op1);
1069 /* the shift amount can be any mode that is bigger than 5 bits, since all
1070 * other bits are ignored anyway */
1071 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1072 ir_node *const op = get_Conv_op(op2);
1073 if (mode_is_float(get_irn_mode(op)))
1076 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1078 new_op2 = create_immediate_or_transform(op2, 0);
1080 dbgi = get_irn_dbg_info(node);
1081 block = get_nodes_block(node);
1082 new_block = be_transform_node(block);
1083 new_node = func(dbgi, new_block, new_op1, new_op2);
1084 SET_IA32_ORIG_NODE(new_node, node);
1086 /* lowered shift instruction may have a dependency operand, handle it here */
1087 if (get_irn_arity(node) == 3) {
1088 /* we have a dependency */
1089 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1090 add_irn_dep(new_node, new_dep);
1098 * Construct a standard unary operation, set AM and immediate if required.
1100 * @param op The operand
1101 * @param func The node constructor function
1102 * @return The constructed ia32 node.
1104 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1105 match_flags_t flags)
1108 ir_node *block, *new_block, *new_op, *new_node;
1110 assert(flags == 0 || flags == match_mode_neutral);
1111 if (flags & match_mode_neutral) {
1112 op = ia32_skip_downconv(op);
1115 new_op = be_transform_node(op);
1116 dbgi = get_irn_dbg_info(node);
1117 block = get_nodes_block(node);
1118 new_block = be_transform_node(block);
1119 new_node = func(dbgi, new_block, new_op);
1121 SET_IA32_ORIG_NODE(new_node, node);
1126 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1127 ia32_address_t *addr)
1129 ir_node *base, *index, *res;
1135 base = be_transform_node(base);
1138 index = addr->index;
1139 if (index == NULL) {
1142 index = be_transform_node(index);
1145 res = new_bd_ia32_Lea(dbgi, block, base, index);
1146 set_address(res, addr);
1152 * Returns non-zero if a given address mode has a symbolic or
1153 * numerical offset != 0.
1155 static int am_has_immediates(const ia32_address_t *addr)
1157 return addr->offset != 0 || addr->symconst_ent != NULL
1158 || addr->frame_entity || addr->use_frame;
1162 * Creates an ia32 Add.
1164 * @return the created ia32 Add node
1166 static ir_node *gen_Add(ir_node *node)
1168 ir_mode *mode = get_irn_mode(node);
1169 ir_node *op1 = get_Add_left(node);
1170 ir_node *op2 = get_Add_right(node);
1172 ir_node *block, *new_block, *new_node, *add_immediate_op;
1173 ia32_address_t addr;
1174 ia32_address_mode_t am;
1176 if (mode_is_float(mode)) {
1177 if (ia32_cg_config.use_sse2)
1178 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1179 match_commutative | match_am);
1181 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1184 ia32_mark_non_am(node);
1186 op2 = ia32_skip_downconv(op2);
1187 op1 = ia32_skip_downconv(op1);
1191 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1192 * 1. Add with immediate -> Lea
1193 * 2. Add with possible source address mode -> Add
1194 * 3. Otherwise -> Lea
1196 memset(&addr, 0, sizeof(addr));
1197 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1198 add_immediate_op = NULL;
1200 dbgi = get_irn_dbg_info(node);
1201 block = get_nodes_block(node);
1202 new_block = be_transform_node(block);
1205 if (addr.base == NULL && addr.index == NULL) {
1206 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1207 addr.symconst_sign, 0, addr.offset);
1208 be_dep_on_frame(new_node);
1209 SET_IA32_ORIG_NODE(new_node, node);
1212 /* add with immediate? */
1213 if (addr.index == NULL) {
1214 add_immediate_op = addr.base;
1215 } else if (addr.base == NULL && addr.scale == 0) {
1216 add_immediate_op = addr.index;
1219 if (add_immediate_op != NULL) {
1220 if (!am_has_immediates(&addr)) {
1221 #ifdef DEBUG_libfirm
1222 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1225 return be_transform_node(add_immediate_op);
1228 new_node = create_lea_from_address(dbgi, new_block, &addr);
1229 SET_IA32_ORIG_NODE(new_node, node);
1233 /* test if we can use source address mode */
1234 match_arguments(&am, block, op1, op2, NULL, match_commutative
1235 | match_mode_neutral | match_am | match_immediate | match_try_am);
1237 /* construct an Add with source address mode */
1238 if (am.op_type == ia32_AddrModeS) {
1239 ia32_address_t *am_addr = &am.addr;
1240 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1241 am_addr->index, am_addr->mem, am.new_op1,
1243 set_am_attributes(new_node, &am);
1244 SET_IA32_ORIG_NODE(new_node, node);
1246 new_node = fix_mem_proj(new_node, &am);
1251 /* otherwise construct a lea */
1252 new_node = create_lea_from_address(dbgi, new_block, &addr);
1253 SET_IA32_ORIG_NODE(new_node, node);
1258 * Creates an ia32 Mul.
1260 * @return the created ia32 Mul node
1262 static ir_node *gen_Mul(ir_node *node)
1264 ir_node *op1 = get_Mul_left(node);
1265 ir_node *op2 = get_Mul_right(node);
1266 ir_mode *mode = get_irn_mode(node);
1268 if (mode_is_float(mode)) {
1269 if (ia32_cg_config.use_sse2)
1270 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1271 match_commutative | match_am);
1273 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1275 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1276 match_commutative | match_am | match_mode_neutral |
1277 match_immediate | match_am_and_immediates);
1281 * Creates an ia32 Mulh.
1282 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1283 * this result while Mul returns the lower 32 bit.
1285 * @return the created ia32 Mulh node
1287 static ir_node *gen_Mulh(ir_node *node)
1289 ir_node *block = get_nodes_block(node);
1290 ir_node *new_block = be_transform_node(block);
1291 dbg_info *dbgi = get_irn_dbg_info(node);
1292 ir_node *op1 = get_Mulh_left(node);
1293 ir_node *op2 = get_Mulh_right(node);
1294 ir_mode *mode = get_irn_mode(node);
1296 ir_node *proj_res_high;
1298 if (get_mode_size_bits(mode) != 32) {
1299 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1302 if (mode_is_signed(mode)) {
1303 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1304 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1306 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1307 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_Mul_res_high);
1309 return proj_res_high;
1313 * Creates an ia32 And.
1315 * @return The created ia32 And node
1317 static ir_node *gen_And(ir_node *node)
1319 ir_node *op1 = get_And_left(node);
1320 ir_node *op2 = get_And_right(node);
1321 assert(! mode_is_float(get_irn_mode(node)));
1323 /* is it a zero extension? */
1324 if (is_Const(op2)) {
1325 tarval *tv = get_Const_tarval(op2);
1326 long v = get_tarval_long(tv);
1328 if (v == 0xFF || v == 0xFFFF) {
1329 dbg_info *dbgi = get_irn_dbg_info(node);
1330 ir_node *block = get_nodes_block(node);
1337 assert(v == 0xFFFF);
1340 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1345 return gen_binop(node, op1, op2, new_bd_ia32_And,
1346 match_commutative | match_mode_neutral | match_am | match_immediate);
1352 * Creates an ia32 Or.
1354 * @return The created ia32 Or node
1356 static ir_node *gen_Or(ir_node *node)
1358 ir_node *op1 = get_Or_left(node);
1359 ir_node *op2 = get_Or_right(node);
1361 assert (! mode_is_float(get_irn_mode(node)));
1362 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1363 | match_mode_neutral | match_am | match_immediate);
1369 * Creates an ia32 Eor.
1371 * @return The created ia32 Eor node
1373 static ir_node *gen_Eor(ir_node *node)
1375 ir_node *op1 = get_Eor_left(node);
1376 ir_node *op2 = get_Eor_right(node);
1378 assert(! mode_is_float(get_irn_mode(node)));
1379 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1380 | match_mode_neutral | match_am | match_immediate);
1385 * Creates an ia32 Sub.
1387 * @return The created ia32 Sub node
1389 static ir_node *gen_Sub(ir_node *node)
1391 ir_node *op1 = get_Sub_left(node);
1392 ir_node *op2 = get_Sub_right(node);
1393 ir_mode *mode = get_irn_mode(node);
1395 if (mode_is_float(mode)) {
1396 if (ia32_cg_config.use_sse2)
1397 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1399 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1402 if (is_Const(op2)) {
1403 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1407 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1408 | match_am | match_immediate);
1411 static ir_node *transform_AM_mem(ir_node *const block,
1412 ir_node *const src_val,
1413 ir_node *const src_mem,
1414 ir_node *const am_mem)
1416 if (is_NoMem(am_mem)) {
1417 return be_transform_node(src_mem);
1418 } else if (is_Proj(src_val) &&
1420 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1421 /* avoid memory loop */
1423 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1424 ir_node *const ptr_pred = get_Proj_pred(src_val);
1425 int const arity = get_Sync_n_preds(src_mem);
1430 NEW_ARR_A(ir_node*, ins, arity + 1);
1432 /* NOTE: This sometimes produces dead-code because the old sync in
1433 * src_mem might not be used anymore, we should detect this case
1434 * and kill the sync... */
1435 for (i = arity - 1; i >= 0; --i) {
1436 ir_node *const pred = get_Sync_pred(src_mem, i);
1438 /* avoid memory loop */
1439 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1442 ins[n++] = be_transform_node(pred);
1447 return new_r_Sync(block, n, ins);
1451 ins[0] = be_transform_node(src_mem);
1453 return new_r_Sync(block, 2, ins);
1458 * Create a 32bit to 64bit signed extension.
1460 * @param dbgi debug info
1461 * @param block the block where node nodes should be placed
1462 * @param val the value to extend
1463 * @param orig the original node
1465 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1466 ir_node *val, const ir_node *orig)
1471 if (ia32_cg_config.use_short_sex_eax) {
1472 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1473 be_dep_on_frame(pval);
1474 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1476 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1477 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1479 SET_IA32_ORIG_NODE(res, orig);
1484 * Generates an ia32 DivMod with additional infrastructure for the
1485 * register allocator if needed.
1487 static ir_node *create_Div(ir_node *node)
1489 dbg_info *dbgi = get_irn_dbg_info(node);
1490 ir_node *block = get_nodes_block(node);
1491 ir_node *new_block = be_transform_node(block);
1498 ir_node *sign_extension;
1499 ia32_address_mode_t am;
1500 ia32_address_t *addr = &am.addr;
1502 /* the upper bits have random contents for smaller modes */
1503 switch (get_irn_opcode(node)) {
1505 op1 = get_Div_left(node);
1506 op2 = get_Div_right(node);
1507 mem = get_Div_mem(node);
1508 mode = get_Div_resmode(node);
1511 op1 = get_Mod_left(node);
1512 op2 = get_Mod_right(node);
1513 mem = get_Mod_mem(node);
1514 mode = get_Mod_resmode(node);
1517 op1 = get_DivMod_left(node);
1518 op2 = get_DivMod_right(node);
1519 mem = get_DivMod_mem(node);
1520 mode = get_DivMod_resmode(node);
1523 panic("invalid divmod node %+F", node);
1526 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1528 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1529 is the memory of the consumed address. We can have only the second op as address
1530 in Div nodes, so check only op2. */
1531 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1533 if (mode_is_signed(mode)) {
1534 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1535 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1536 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1538 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1539 be_dep_on_frame(sign_extension);
1541 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1542 addr->index, new_mem, am.new_op2,
1543 am.new_op1, sign_extension);
1546 set_irn_pinned(new_node, get_irn_pinned(node));
1548 set_am_attributes(new_node, &am);
1549 SET_IA32_ORIG_NODE(new_node, node);
1551 new_node = fix_mem_proj(new_node, &am);
1557 * Generates an ia32 Mod.
1559 static ir_node *gen_Mod(ir_node *node)
1561 return create_Div(node);
1565 * Generates an ia32 Div.
1567 static ir_node *gen_Div(ir_node *node)
1569 return create_Div(node);
1573 * Generates an ia32 DivMod.
1575 static ir_node *gen_DivMod(ir_node *node)
1577 return create_Div(node);
1583 * Creates an ia32 floating Div.
1585 * @return The created ia32 xDiv node
1587 static ir_node *gen_Quot(ir_node *node)
1589 ir_node *op1 = get_Quot_left(node);
1590 ir_node *op2 = get_Quot_right(node);
1592 if (ia32_cg_config.use_sse2) {
1593 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1595 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1601 * Creates an ia32 Shl.
1603 * @return The created ia32 Shl node
1605 static ir_node *gen_Shl(ir_node *node)
1607 ir_node *left = get_Shl_left(node);
1608 ir_node *right = get_Shl_right(node);
1610 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1611 match_mode_neutral | match_immediate);
1615 * Creates an ia32 Shr.
1617 * @return The created ia32 Shr node
1619 static ir_node *gen_Shr(ir_node *node)
1621 ir_node *left = get_Shr_left(node);
1622 ir_node *right = get_Shr_right(node);
1624 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1630 * Creates an ia32 Sar.
1632 * @return The created ia32 Shrs node
1634 static ir_node *gen_Shrs(ir_node *node)
1636 ir_node *left = get_Shrs_left(node);
1637 ir_node *right = get_Shrs_right(node);
1639 if (is_Const(right)) {
1640 tarval *tv = get_Const_tarval(right);
1641 long val = get_tarval_long(tv);
1643 /* this is a sign extension */
1644 dbg_info *dbgi = get_irn_dbg_info(node);
1645 ir_node *block = be_transform_node(get_nodes_block(node));
1646 ir_node *new_op = be_transform_node(left);
1648 return create_sex_32_64(dbgi, block, new_op, node);
1652 /* 8 or 16 bit sign extension? */
1653 if (is_Const(right) && is_Shl(left)) {
1654 ir_node *shl_left = get_Shl_left(left);
1655 ir_node *shl_right = get_Shl_right(left);
1656 if (is_Const(shl_right)) {
1657 tarval *tv1 = get_Const_tarval(right);
1658 tarval *tv2 = get_Const_tarval(shl_right);
1659 if (tv1 == tv2 && tarval_is_long(tv1)) {
1660 long val = get_tarval_long(tv1);
1661 if (val == 16 || val == 24) {
1662 dbg_info *dbgi = get_irn_dbg_info(node);
1663 ir_node *block = get_nodes_block(node);
1673 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1682 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1688 * Creates an ia32 Rol.
1690 * @param op1 The first operator
1691 * @param op2 The second operator
1692 * @return The created ia32 RotL node
1694 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1696 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1702 * Creates an ia32 Ror.
1703 * NOTE: There is no RotR with immediate because this would always be a RotL
1704 * "imm-mode_size_bits" which can be pre-calculated.
1706 * @param op1 The first operator
1707 * @param op2 The second operator
1708 * @return The created ia32 RotR node
1710 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1712 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1718 * Creates an ia32 RotR or RotL (depending on the found pattern).
1720 * @return The created ia32 RotL or RotR node
1722 static ir_node *gen_Rotl(ir_node *node)
1724 ir_node *rotate = NULL;
1725 ir_node *op1 = get_Rotl_left(node);
1726 ir_node *op2 = get_Rotl_right(node);
1728 /* Firm has only RotL, so we are looking for a right (op2)
1729 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1730 that means we can create a RotR instead of an Add and a RotL */
1734 ir_node *left = get_Add_left(add);
1735 ir_node *right = get_Add_right(add);
1736 if (is_Const(right)) {
1737 tarval *tv = get_Const_tarval(right);
1738 ir_mode *mode = get_irn_mode(node);
1739 long bits = get_mode_size_bits(mode);
1741 if (is_Minus(left) &&
1742 tarval_is_long(tv) &&
1743 get_tarval_long(tv) == bits &&
1746 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1747 rotate = gen_Ror(node, op1, get_Minus_op(left));
1752 if (rotate == NULL) {
1753 rotate = gen_Rol(node, op1, op2);
1762 * Transforms a Minus node.
1764 * @return The created ia32 Minus node
1766 static ir_node *gen_Minus(ir_node *node)
1768 ir_node *op = get_Minus_op(node);
1769 ir_node *block = be_transform_node(get_nodes_block(node));
1770 dbg_info *dbgi = get_irn_dbg_info(node);
1771 ir_mode *mode = get_irn_mode(node);
1776 if (mode_is_float(mode)) {
1777 ir_node *new_op = be_transform_node(op);
1778 if (ia32_cg_config.use_sse2) {
1779 /* TODO: non-optimal... if we have many xXors, then we should
1780 * rather create a load for the const and use that instead of
1781 * several AM nodes... */
1782 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1784 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1785 nomem, new_op, noreg_xmm);
1787 size = get_mode_size_bits(mode);
1788 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1790 set_ia32_am_sc(new_node, ent);
1791 set_ia32_op_type(new_node, ia32_AddrModeS);
1792 set_ia32_ls_mode(new_node, mode);
1794 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1797 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1800 SET_IA32_ORIG_NODE(new_node, node);
1806 * Transforms a Not node.
1808 * @return The created ia32 Not node
1810 static ir_node *gen_Not(ir_node *node)
1812 ir_node *op = get_Not_op(node);
1814 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1815 assert (! mode_is_float(get_irn_mode(node)));
1817 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1823 * Transforms an Abs node.
1825 * @return The created ia32 Abs node
1827 static ir_node *gen_Abs(ir_node *node)
1829 ir_node *block = get_nodes_block(node);
1830 ir_node *new_block = be_transform_node(block);
1831 ir_node *op = get_Abs_op(node);
1832 dbg_info *dbgi = get_irn_dbg_info(node);
1833 ir_mode *mode = get_irn_mode(node);
1839 if (mode_is_float(mode)) {
1840 new_op = be_transform_node(op);
1842 if (ia32_cg_config.use_sse2) {
1843 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1844 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1845 nomem, new_op, noreg_fp);
1847 size = get_mode_size_bits(mode);
1848 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1850 set_ia32_am_sc(new_node, ent);
1852 SET_IA32_ORIG_NODE(new_node, node);
1854 set_ia32_op_type(new_node, ia32_AddrModeS);
1855 set_ia32_ls_mode(new_node, mode);
1857 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1858 SET_IA32_ORIG_NODE(new_node, node);
1861 ir_node *xor, *sign_extension;
1863 if (get_mode_size_bits(mode) == 32) {
1864 new_op = be_transform_node(op);
1866 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1869 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1871 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1872 nomem, new_op, sign_extension);
1873 SET_IA32_ORIG_NODE(xor, node);
1875 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1876 nomem, xor, sign_extension);
1877 SET_IA32_ORIG_NODE(new_node, node);
1884 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1886 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1888 dbg_info *dbgi = get_irn_dbg_info(cmp);
1889 ir_node *block = get_nodes_block(cmp);
1890 ir_node *new_block = be_transform_node(block);
1891 ir_node *op1 = be_transform_node(x);
1892 ir_node *op2 = be_transform_node(n);
1894 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1898 * Transform a node returning a "flag" result.
1900 * @param node the node to transform
1901 * @param pnc_out the compare mode to use
1903 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1910 /* we have a Cmp as input */
1911 if (is_Proj(node)) {
1912 ir_node *pred = get_Proj_pred(node);
1914 pn_Cmp pnc = get_Proj_proj(node);
1915 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1916 ir_node *l = get_Cmp_left(pred);
1917 ir_node *r = get_Cmp_right(pred);
1919 ir_node *la = get_And_left(l);
1920 ir_node *ra = get_And_right(l);
1922 ir_node *c = get_Shl_left(la);
1923 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1924 /* (1 << n) & ra) */
1925 ir_node *n = get_Shl_right(la);
1926 flags = gen_bt(pred, ra, n);
1927 /* we must generate a Jc/Jnc jump */
1928 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1931 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1936 ir_node *c = get_Shl_left(ra);
1937 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1938 /* la & (1 << n)) */
1939 ir_node *n = get_Shl_right(ra);
1940 flags = gen_bt(pred, la, n);
1941 /* we must generate a Jc/Jnc jump */
1942 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1945 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1951 flags = be_transform_node(pred);
1957 /* a mode_b value, we have to compare it against 0 */
1958 dbgi = get_irn_dbg_info(node);
1959 new_block = be_transform_node(get_nodes_block(node));
1960 new_op = be_transform_node(node);
1961 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1962 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1963 *pnc_out = pn_Cmp_Lg;
1968 * Transforms a Load.
1970 * @return the created ia32 Load node
1972 static ir_node *gen_Load(ir_node *node)
1974 ir_node *old_block = get_nodes_block(node);
1975 ir_node *block = be_transform_node(old_block);
1976 ir_node *ptr = get_Load_ptr(node);
1977 ir_node *mem = get_Load_mem(node);
1978 ir_node *new_mem = be_transform_node(mem);
1981 dbg_info *dbgi = get_irn_dbg_info(node);
1982 ir_mode *mode = get_Load_mode(node);
1985 ia32_address_t addr;
1987 /* construct load address */
1988 memset(&addr, 0, sizeof(addr));
1989 ia32_create_address_mode(&addr, ptr, 0);
1996 base = be_transform_node(base);
1999 if (index == NULL) {
2002 index = be_transform_node(index);
2005 if (mode_is_float(mode)) {
2006 if (ia32_cg_config.use_sse2) {
2007 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2009 res_mode = mode_xmm;
2011 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2013 res_mode = mode_vfp;
2016 assert(mode != mode_b);
2018 /* create a conv node with address mode for smaller modes */
2019 if (get_mode_size_bits(mode) < 32) {
2020 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2021 new_mem, noreg_GP, mode);
2023 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2028 set_irn_pinned(new_node, get_irn_pinned(node));
2029 set_ia32_op_type(new_node, ia32_AddrModeS);
2030 set_ia32_ls_mode(new_node, mode);
2031 set_address(new_node, &addr);
2033 if (get_irn_pinned(node) == op_pin_state_floats) {
2034 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2035 && pn_ia32_vfld_res == pn_ia32_Load_res
2036 && pn_ia32_Load_res == pn_ia32_res);
2037 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2040 SET_IA32_ORIG_NODE(new_node, node);
2042 be_dep_on_frame(new_node);
2046 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2047 ir_node *ptr, ir_node *other)
2054 /* we only use address mode if we're the only user of the load */
2055 if (get_irn_n_edges(node) > 1)
2058 load = get_Proj_pred(node);
2061 if (get_nodes_block(load) != block)
2064 /* store should have the same pointer as the load */
2065 if (get_Load_ptr(load) != ptr)
2068 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2069 if (other != NULL &&
2070 get_nodes_block(other) == block &&
2071 heights_reachable_in_block(heights, other, load)) {
2075 if (prevents_AM(block, load, mem))
2077 /* Store should be attached to the load via mem */
2078 assert(heights_reachable_in_block(heights, mem, load));
2083 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2084 ir_node *mem, ir_node *ptr, ir_mode *mode,
2085 construct_binop_dest_func *func,
2086 construct_binop_dest_func *func8bit,
2087 match_flags_t flags)
2089 ir_node *src_block = get_nodes_block(node);
2097 ia32_address_mode_t am;
2098 ia32_address_t *addr = &am.addr;
2099 memset(&am, 0, sizeof(am));
2101 assert(flags & match_immediate); /* there is no destam node without... */
2102 commutative = (flags & match_commutative) != 0;
2104 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2105 build_address(&am, op1, ia32_create_am_double_use);
2106 new_op = create_immediate_or_transform(op2, 0);
2107 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2108 build_address(&am, op2, ia32_create_am_double_use);
2109 new_op = create_immediate_or_transform(op1, 0);
2114 if (addr->base == NULL)
2115 addr->base = noreg_GP;
2116 if (addr->index == NULL)
2117 addr->index = noreg_GP;
2118 if (addr->mem == NULL)
2121 dbgi = get_irn_dbg_info(node);
2122 block = be_transform_node(src_block);
2123 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2125 if (get_mode_size_bits(mode) == 8) {
2126 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2128 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2130 set_address(new_node, addr);
2131 set_ia32_op_type(new_node, ia32_AddrModeD);
2132 set_ia32_ls_mode(new_node, mode);
2133 SET_IA32_ORIG_NODE(new_node, node);
2135 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2136 mem_proj = be_transform_node(am.mem_proj);
2137 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2142 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2143 ir_node *ptr, ir_mode *mode,
2144 construct_unop_dest_func *func)
2146 ir_node *src_block = get_nodes_block(node);
2152 ia32_address_mode_t am;
2153 ia32_address_t *addr = &am.addr;
2155 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2158 memset(&am, 0, sizeof(am));
2159 build_address(&am, op, ia32_create_am_double_use);
2161 dbgi = get_irn_dbg_info(node);
2162 block = be_transform_node(src_block);
2163 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2164 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2165 set_address(new_node, addr);
2166 set_ia32_op_type(new_node, ia32_AddrModeD);
2167 set_ia32_ls_mode(new_node, mode);
2168 SET_IA32_ORIG_NODE(new_node, node);
2170 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2171 mem_proj = be_transform_node(am.mem_proj);
2172 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2177 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2179 ir_mode *mode = get_irn_mode(node);
2180 ir_node *mux_true = get_Mux_true(node);
2181 ir_node *mux_false = get_Mux_false(node);
2191 ia32_address_t addr;
2193 if (get_mode_size_bits(mode) != 8)
2196 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2198 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2204 cond = get_Mux_sel(node);
2205 flags = get_flags_node(cond, &pnc);
2206 /* we can't handle the float special cases with SetM */
2207 if (pnc & ia32_pn_Cmp_float)
2210 pnc = get_negated_pnc(pnc, pnc & ia32_pn_Cmp_float ? mode_F : mode_Is);
2212 build_address_ptr(&addr, ptr, mem);
2214 dbgi = get_irn_dbg_info(node);
2215 block = get_nodes_block(node);
2216 new_block = be_transform_node(block);
2217 new_mem = be_transform_node(mem);
2218 new_node = new_bd_ia32_SetccMem(dbgi, new_block, addr.base,
2219 addr.index, addr.mem, flags, pnc);
2220 set_address(new_node, &addr);
2221 set_ia32_op_type(new_node, ia32_AddrModeD);
2222 set_ia32_ls_mode(new_node, mode);
2223 SET_IA32_ORIG_NODE(new_node, node);
2228 static ir_node *try_create_dest_am(ir_node *node)
2230 ir_node *val = get_Store_value(node);
2231 ir_node *mem = get_Store_mem(node);
2232 ir_node *ptr = get_Store_ptr(node);
2233 ir_mode *mode = get_irn_mode(val);
2234 unsigned bits = get_mode_size_bits(mode);
2239 /* handle only GP modes for now... */
2240 if (!ia32_mode_needs_gp_reg(mode))
2244 /* store must be the only user of the val node */
2245 if (get_irn_n_edges(val) > 1)
2247 /* skip pointless convs */
2249 ir_node *conv_op = get_Conv_op(val);
2250 ir_mode *pred_mode = get_irn_mode(conv_op);
2251 if (!ia32_mode_needs_gp_reg(pred_mode))
2253 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2261 /* value must be in the same block */
2262 if (get_nodes_block(node) != get_nodes_block(val))
2265 switch (get_irn_opcode(val)) {
2267 op1 = get_Add_left(val);
2268 op2 = get_Add_right(val);
2269 if (ia32_cg_config.use_incdec) {
2270 if (is_Const_1(op2)) {
2271 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2273 } else if (is_Const_Minus_1(op2)) {
2274 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2278 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2279 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2280 match_commutative | match_immediate);
2283 op1 = get_Sub_left(val);
2284 op2 = get_Sub_right(val);
2285 if (is_Const(op2)) {
2286 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2288 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2289 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2293 op1 = get_And_left(val);
2294 op2 = get_And_right(val);
2295 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2296 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2297 match_commutative | match_immediate);
2300 op1 = get_Or_left(val);
2301 op2 = get_Or_right(val);
2302 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2303 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2304 match_commutative | match_immediate);
2307 op1 = get_Eor_left(val);
2308 op2 = get_Eor_right(val);
2309 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2310 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2311 match_commutative | match_immediate);
2314 op1 = get_Shl_left(val);
2315 op2 = get_Shl_right(val);
2316 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2317 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2321 op1 = get_Shr_left(val);
2322 op2 = get_Shr_right(val);
2323 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2324 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2328 op1 = get_Shrs_left(val);
2329 op2 = get_Shrs_right(val);
2330 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2331 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2335 op1 = get_Rotl_left(val);
2336 op2 = get_Rotl_right(val);
2337 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2338 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2341 /* TODO: match ROR patterns... */
2343 new_node = try_create_SetMem(val, ptr, mem);
2347 op1 = get_Minus_op(val);
2348 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2351 /* should be lowered already */
2352 assert(mode != mode_b);
2353 op1 = get_Not_op(val);
2354 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2360 if (new_node != NULL) {
2361 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2362 get_irn_pinned(node) == op_pin_state_pinned) {
2363 set_irn_pinned(new_node, op_pin_state_pinned);
2370 static bool possible_int_mode_for_fp(ir_mode *mode)
2374 if (!mode_is_signed(mode))
2376 size = get_mode_size_bits(mode);
2377 if (size != 16 && size != 32)
2382 static int is_float_to_int_conv(const ir_node *node)
2384 ir_mode *mode = get_irn_mode(node);
2388 if (!possible_int_mode_for_fp(mode))
2393 conv_op = get_Conv_op(node);
2394 conv_mode = get_irn_mode(conv_op);
2396 if (!mode_is_float(conv_mode))
2403 * Transform a Store(floatConst) into a sequence of
2406 * @return the created ia32 Store node
2408 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2410 ir_mode *mode = get_irn_mode(cns);
2411 unsigned size = get_mode_size_bytes(mode);
2412 tarval *tv = get_Const_tarval(cns);
2413 ir_node *block = get_nodes_block(node);
2414 ir_node *new_block = be_transform_node(block);
2415 ir_node *ptr = get_Store_ptr(node);
2416 ir_node *mem = get_Store_mem(node);
2417 dbg_info *dbgi = get_irn_dbg_info(node);
2421 ia32_address_t addr;
2423 assert(size % 4 == 0);
2426 build_address_ptr(&addr, ptr, mem);
2430 get_tarval_sub_bits(tv, ofs) |
2431 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2432 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2433 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2434 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2436 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2437 addr.index, addr.mem, imm);
2439 set_irn_pinned(new_node, get_irn_pinned(node));
2440 set_ia32_op_type(new_node, ia32_AddrModeD);
2441 set_ia32_ls_mode(new_node, mode_Iu);
2442 set_address(new_node, &addr);
2443 SET_IA32_ORIG_NODE(new_node, node);
2446 ins[i++] = new_node;
2451 } while (size != 0);
2454 return new_rd_Sync(dbgi, new_block, i, ins);
2461 * Generate a vfist or vfisttp instruction.
2463 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2464 ir_node *mem, ir_node *val, ir_node **fist)
2468 if (ia32_cg_config.use_fisttp) {
2469 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2470 if other users exists */
2471 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2472 ir_node *value = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2473 be_new_Keep(block, 1, &value);
2475 new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2478 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2481 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2487 * Transforms a general (no special case) Store.
2489 * @return the created ia32 Store node
2491 static ir_node *gen_general_Store(ir_node *node)
2493 ir_node *val = get_Store_value(node);
2494 ir_mode *mode = get_irn_mode(val);
2495 ir_node *block = get_nodes_block(node);
2496 ir_node *new_block = be_transform_node(block);
2497 ir_node *ptr = get_Store_ptr(node);
2498 ir_node *mem = get_Store_mem(node);
2499 dbg_info *dbgi = get_irn_dbg_info(node);
2500 ir_node *new_val, *new_node, *store;
2501 ia32_address_t addr;
2503 /* check for destination address mode */
2504 new_node = try_create_dest_am(node);
2505 if (new_node != NULL)
2508 /* construct store address */
2509 memset(&addr, 0, sizeof(addr));
2510 ia32_create_address_mode(&addr, ptr, 0);
2512 if (addr.base == NULL) {
2513 addr.base = noreg_GP;
2515 addr.base = be_transform_node(addr.base);
2518 if (addr.index == NULL) {
2519 addr.index = noreg_GP;
2521 addr.index = be_transform_node(addr.index);
2523 addr.mem = be_transform_node(mem);
2525 if (mode_is_float(mode)) {
2526 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2528 while (is_Conv(val) && mode == get_irn_mode(val)) {
2529 ir_node *op = get_Conv_op(val);
2530 if (!mode_is_float(get_irn_mode(op)))
2534 new_val = be_transform_node(val);
2535 if (ia32_cg_config.use_sse2) {
2536 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2537 addr.index, addr.mem, new_val);
2539 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2540 addr.index, addr.mem, new_val, mode);
2543 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2544 val = get_Conv_op(val);
2546 /* TODO: is this optimisation still necessary at all (middleend)? */
2547 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2548 while (is_Conv(val)) {
2549 ir_node *op = get_Conv_op(val);
2550 if (!mode_is_float(get_irn_mode(op)))
2552 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2556 new_val = be_transform_node(val);
2557 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2559 new_val = create_immediate_or_transform(val, 0);
2560 assert(mode != mode_b);
2562 if (get_mode_size_bits(mode) == 8) {
2563 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2564 addr.index, addr.mem, new_val);
2566 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2567 addr.index, addr.mem, new_val);
2572 set_irn_pinned(store, get_irn_pinned(node));
2573 set_ia32_op_type(store, ia32_AddrModeD);
2574 set_ia32_ls_mode(store, mode);
2576 set_address(store, &addr);
2577 SET_IA32_ORIG_NODE(store, node);
2583 * Transforms a Store.
2585 * @return the created ia32 Store node
2587 static ir_node *gen_Store(ir_node *node)
2589 ir_node *val = get_Store_value(node);
2590 ir_mode *mode = get_irn_mode(val);
2592 if (mode_is_float(mode) && is_Const(val)) {
2593 /* We can transform every floating const store
2594 into a sequence of integer stores.
2595 If the constant is already in a register,
2596 it would be better to use it, but we don't
2597 have this information here. */
2598 return gen_float_const_Store(node, val);
2600 return gen_general_Store(node);
2604 * Transforms a Switch.
2606 * @return the created ia32 SwitchJmp node
2608 static ir_node *create_Switch(ir_node *node)
2610 dbg_info *dbgi = get_irn_dbg_info(node);
2611 ir_node *block = be_transform_node(get_nodes_block(node));
2612 ir_node *sel = get_Cond_selector(node);
2613 ir_node *new_sel = be_transform_node(sel);
2614 long switch_min = LONG_MAX;
2615 long switch_max = LONG_MIN;
2616 long default_pn = get_Cond_default_proj(node);
2618 const ir_edge_t *edge;
2620 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2622 /* determine the smallest switch case value */
2623 foreach_out_edge(node, edge) {
2624 ir_node *proj = get_edge_src_irn(edge);
2625 long pn = get_Proj_proj(proj);
2626 if (pn == default_pn)
2629 if (pn < switch_min)
2631 if (pn > switch_max)
2635 if ((unsigned long) (switch_max - switch_min) > 128000) {
2636 panic("Size of switch %+F bigger than 128000", node);
2639 if (switch_min != 0) {
2640 /* if smallest switch case is not 0 we need an additional sub */
2641 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2642 add_ia32_am_offs_int(new_sel, -switch_min);
2643 set_ia32_op_type(new_sel, ia32_AddrModeS);
2645 SET_IA32_ORIG_NODE(new_sel, node);
2648 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2649 SET_IA32_ORIG_NODE(new_node, node);
2655 * Transform a Cond node.
2657 static ir_node *gen_Cond(ir_node *node)
2659 ir_node *block = get_nodes_block(node);
2660 ir_node *new_block = be_transform_node(block);
2661 dbg_info *dbgi = get_irn_dbg_info(node);
2662 ir_node *sel = get_Cond_selector(node);
2663 ir_mode *sel_mode = get_irn_mode(sel);
2664 ir_node *flags = NULL;
2668 if (sel_mode != mode_b) {
2669 return create_Switch(node);
2672 /* we get flags from a Cmp */
2673 flags = get_flags_node(sel, &pnc);
2675 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2676 SET_IA32_ORIG_NODE(new_node, node);
2682 * Transform a be_Copy.
2684 static ir_node *gen_be_Copy(ir_node *node)
2686 ir_node *new_node = be_duplicate_node(node);
2687 ir_mode *mode = get_irn_mode(new_node);
2689 if (ia32_mode_needs_gp_reg(mode)) {
2690 set_irn_mode(new_node, mode_Iu);
2696 static ir_node *create_Fucom(ir_node *node)
2698 dbg_info *dbgi = get_irn_dbg_info(node);
2699 ir_node *block = get_nodes_block(node);
2700 ir_node *new_block = be_transform_node(block);
2701 ir_node *left = get_Cmp_left(node);
2702 ir_node *new_left = be_transform_node(left);
2703 ir_node *right = get_Cmp_right(node);
2707 if (ia32_cg_config.use_fucomi) {
2708 new_right = be_transform_node(right);
2709 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2711 set_ia32_commutative(new_node);
2712 SET_IA32_ORIG_NODE(new_node, node);
2714 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2715 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2717 new_right = be_transform_node(right);
2718 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2721 set_ia32_commutative(new_node);
2723 SET_IA32_ORIG_NODE(new_node, node);
2725 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2726 SET_IA32_ORIG_NODE(new_node, node);
2732 static ir_node *create_Ucomi(ir_node *node)
2734 dbg_info *dbgi = get_irn_dbg_info(node);
2735 ir_node *src_block = get_nodes_block(node);
2736 ir_node *new_block = be_transform_node(src_block);
2737 ir_node *left = get_Cmp_left(node);
2738 ir_node *right = get_Cmp_right(node);
2740 ia32_address_mode_t am;
2741 ia32_address_t *addr = &am.addr;
2743 match_arguments(&am, src_block, left, right, NULL,
2744 match_commutative | match_am);
2746 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2747 addr->mem, am.new_op1, am.new_op2,
2749 set_am_attributes(new_node, &am);
2751 SET_IA32_ORIG_NODE(new_node, node);
2753 new_node = fix_mem_proj(new_node, &am);
2759 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2760 * to fold an and into a test node
2762 static bool can_fold_test_and(ir_node *node)
2764 const ir_edge_t *edge;
2766 /** we can only have eq and lg projs */
2767 foreach_out_edge(node, edge) {
2768 ir_node *proj = get_edge_src_irn(edge);
2769 pn_Cmp pnc = get_Proj_proj(proj);
2770 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2778 * returns true if it is assured, that the upper bits of a node are "clean"
2779 * which means for a 16 or 8 bit value, that the upper bits in the register
2780 * are 0 for unsigned and a copy of the last significant bit for signed
2783 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2785 assert(ia32_mode_needs_gp_reg(mode));
2786 if (get_mode_size_bits(mode) >= 32)
2789 if (is_Proj(transformed_node))
2790 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2792 switch (get_ia32_irn_opcode(transformed_node)) {
2793 case iro_ia32_Conv_I2I:
2794 case iro_ia32_Conv_I2I8Bit: {
2795 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2796 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2798 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2805 if (mode_is_signed(mode)) {
2806 return false; /* TODO handle signed modes */
2808 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2809 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2810 const ia32_immediate_attr_t *attr
2811 = get_ia32_immediate_attr_const(right);
2812 if (attr->symconst == 0 &&
2813 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2817 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2821 /* TODO too conservative if shift amount is constant */
2822 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2825 if (!mode_is_signed(mode)) {
2827 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2828 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2830 /* TODO if one is known to be zero extended, then || is sufficient */
2835 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2836 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2838 case iro_ia32_Const:
2839 case iro_ia32_Immediate: {
2840 const ia32_immediate_attr_t *attr =
2841 get_ia32_immediate_attr_const(transformed_node);
2842 if (mode_is_signed(mode)) {
2843 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2844 return shifted == 0 || shifted == -1;
2846 unsigned long shifted = (unsigned long)attr->offset;
2847 shifted >>= get_mode_size_bits(mode);
2848 return shifted == 0;
2858 * Generate code for a Cmp.
2860 static ir_node *gen_Cmp(ir_node *node)
2862 dbg_info *dbgi = get_irn_dbg_info(node);
2863 ir_node *block = get_nodes_block(node);
2864 ir_node *new_block = be_transform_node(block);
2865 ir_node *left = get_Cmp_left(node);
2866 ir_node *right = get_Cmp_right(node);
2867 ir_mode *cmp_mode = get_irn_mode(left);
2869 ia32_address_mode_t am;
2870 ia32_address_t *addr = &am.addr;
2873 if (mode_is_float(cmp_mode)) {
2874 if (ia32_cg_config.use_sse2) {
2875 return create_Ucomi(node);
2877 return create_Fucom(node);
2881 assert(ia32_mode_needs_gp_reg(cmp_mode));
2883 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2884 cmp_unsigned = !mode_is_signed(cmp_mode);
2885 if (is_Const_0(right) &&
2887 get_irn_n_edges(left) == 1 &&
2888 can_fold_test_and(node)) {
2889 /* Test(and_left, and_right) */
2890 ir_node *and_left = get_And_left(left);
2891 ir_node *and_right = get_And_right(left);
2893 /* matze: code here used mode instead of cmd_mode, I think it is always
2894 * the same as cmp_mode, but I leave this here to see if this is really
2897 assert(get_irn_mode(and_left) == cmp_mode);
2899 match_arguments(&am, block, and_left, and_right, NULL,
2901 match_am | match_8bit_am | match_16bit_am |
2902 match_am_and_immediates | match_immediate);
2904 /* use 32bit compare mode if possible since the opcode is smaller */
2905 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2906 upper_bits_clean(am.new_op2, cmp_mode)) {
2907 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2910 if (get_mode_size_bits(cmp_mode) == 8) {
2911 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2912 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2915 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2916 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2919 /* Cmp(left, right) */
2920 match_arguments(&am, block, left, right, NULL,
2921 match_commutative | match_am | match_8bit_am |
2922 match_16bit_am | match_am_and_immediates |
2924 /* use 32bit compare mode if possible since the opcode is smaller */
2925 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2926 upper_bits_clean(am.new_op2, cmp_mode)) {
2927 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2930 if (get_mode_size_bits(cmp_mode) == 8) {
2931 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2932 addr->index, addr->mem, am.new_op1,
2933 am.new_op2, am.ins_permuted,
2936 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2937 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2940 set_am_attributes(new_node, &am);
2941 set_ia32_ls_mode(new_node, cmp_mode);
2943 SET_IA32_ORIG_NODE(new_node, node);
2945 new_node = fix_mem_proj(new_node, &am);
2950 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2953 dbg_info *dbgi = get_irn_dbg_info(node);
2954 ir_node *block = get_nodes_block(node);
2955 ir_node *new_block = be_transform_node(block);
2956 ir_node *val_true = get_Mux_true(node);
2957 ir_node *val_false = get_Mux_false(node);
2959 ia32_address_mode_t am;
2960 ia32_address_t *addr;
2962 assert(ia32_cg_config.use_cmov);
2963 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2967 match_arguments(&am, block, val_false, val_true, flags,
2968 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2970 new_node = new_bd_ia32_CMovcc(dbgi, new_block, addr->base, addr->index,
2971 addr->mem, am.new_op1, am.new_op2, new_flags,
2972 am.ins_permuted, pnc);
2973 set_am_attributes(new_node, &am);
2975 SET_IA32_ORIG_NODE(new_node, node);
2977 new_node = fix_mem_proj(new_node, &am);
2983 * Creates a ia32 Setcc instruction.
2985 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2986 ir_node *flags, pn_Cmp pnc, ir_node *orig_node)
2988 ir_mode *mode = get_irn_mode(orig_node);
2991 new_node = new_bd_ia32_Setcc(dbgi, new_block, flags, pnc);
2992 SET_IA32_ORIG_NODE(new_node, orig_node);
2994 /* we might need to conv the result up */
2995 if (get_mode_size_bits(mode) > 8) {
2996 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
2997 nomem, new_node, mode_Bu);
2998 SET_IA32_ORIG_NODE(new_node, orig_node);
3005 * Create instruction for an unsigned Difference or Zero.
3007 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
3009 ir_mode *mode = get_irn_mode(psi);
3010 ir_node *new_node, *sub, *sbb, *eflags, *block;
3014 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3015 match_mode_neutral | match_am | match_immediate | match_two_users);
3017 block = get_nodes_block(new_node);
3019 if (is_Proj(new_node)) {
3020 sub = get_Proj_pred(new_node);
3021 assert(is_ia32_Sub(sub));
3024 set_irn_mode(sub, mode_T);
3025 new_node = new_rd_Proj(NULL, block, sub, mode, pn_ia32_res);
3027 eflags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3029 dbgi = get_irn_dbg_info(psi);
3030 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3032 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3033 set_ia32_commutative(new_node);
3038 * Create an const array of two float consts.
3040 * @param c0 the first constant
3041 * @param c1 the second constant
3042 * @param new_mode IN/OUT for the mode of the constants, if NULL
3043 * smallest possible mode will be used
3045 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3047 ir_mode *mode = *new_mode;
3049 ir_initializer_t *initializer;
3050 tarval *tv0 = get_Const_tarval(c0);
3051 tarval *tv1 = get_Const_tarval(c1);
3054 /* detect the best mode for the constants */
3055 mode = get_tarval_mode(tv0);
3057 if (mode != mode_F) {
3058 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3059 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3061 tv0 = tarval_convert_to(tv0, mode);
3062 tv1 = tarval_convert_to(tv1, mode);
3063 } else if (mode != mode_D) {
3064 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3065 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3067 tv0 = tarval_convert_to(tv0, mode);
3068 tv1 = tarval_convert_to(tv1, mode);
3075 tp = ia32_create_float_type(mode, 4);
3076 tp = ia32_create_float_array(tp);
3078 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3080 set_entity_ld_ident(ent, get_entity_ident(ent));
3081 set_entity_visibility(ent, visibility_local);
3082 set_entity_variability(ent, variability_constant);
3083 set_entity_allocation(ent, allocation_static);
3085 initializer = create_initializer_compound(2);
3087 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3088 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3090 set_entity_initializer(ent, initializer);
3097 * Transforms a Mux node into some code sequence.
3099 * @return The transformed node.
3101 static ir_node *gen_Mux(ir_node *node)
3103 dbg_info *dbgi = get_irn_dbg_info(node);
3104 ir_node *block = get_nodes_block(node);
3105 ir_node *new_block = be_transform_node(block);
3106 ir_node *mux_true = get_Mux_true(node);
3107 ir_node *mux_false = get_Mux_false(node);
3108 ir_node *cond = get_Mux_sel(node);
3109 ir_mode *mode = get_irn_mode(node);
3114 assert(get_irn_mode(cond) == mode_b);
3116 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3117 if (mode_is_float(mode)) {
3118 ir_node *cmp = get_Proj_pred(cond);
3119 ir_node *cmp_left = get_Cmp_left(cmp);
3120 ir_node *cmp_right = get_Cmp_right(cmp);
3121 pn_Cmp pnc = get_Proj_proj(cond);
3123 if (ia32_cg_config.use_sse2) {
3124 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3125 if (cmp_left == mux_true && cmp_right == mux_false) {
3126 /* Mux(a <= b, a, b) => MIN */
3127 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3128 match_commutative | match_am | match_two_users);
3129 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3130 /* Mux(a <= b, b, a) => MAX */
3131 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3132 match_commutative | match_am | match_two_users);
3134 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3135 if (cmp_left == mux_true && cmp_right == mux_false) {
3136 /* Mux(a >= b, a, b) => MAX */
3137 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3138 match_commutative | match_am | match_two_users);
3139 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3140 /* Mux(a >= b, b, a) => MIN */
3141 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3142 match_commutative | match_am | match_two_users);
3146 if (is_Const(mux_true) && is_Const(mux_false)) {
3147 ia32_address_mode_t am;
3152 flags = get_flags_node(cond, &pnc);
3153 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3155 if (ia32_cg_config.use_sse2) {
3156 /* cannot load from different mode on SSE */
3159 /* x87 can load any mode */
3163 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3165 switch (get_mode_size_bytes(new_mode)) {
3175 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3176 set_ia32_am_scale(new_node, 2);
3181 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3182 set_ia32_am_scale(new_node, 1);
3185 /* arg, shift 16 NOT supported */
3187 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3190 panic("Unsupported constant size");
3193 am.ls_mode = new_mode;
3194 am.addr.base = noreg_GP;
3195 am.addr.index = new_node;
3196 am.addr.mem = nomem;
3198 am.addr.scale = scale;
3199 am.addr.use_frame = 0;
3200 am.addr.frame_entity = NULL;
3201 am.addr.symconst_sign = 0;
3202 am.mem_proj = am.addr.mem;
3203 am.op_type = ia32_AddrModeS;
3206 am.pinned = op_pin_state_floats;
3208 am.ins_permuted = 0;
3210 if (ia32_cg_config.use_sse2)
3211 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3213 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3214 set_am_attributes(load, &am);
3216 return new_rd_Proj(NULL, block, load, mode_vfp, pn_ia32_res);
3218 panic("cannot transform floating point Mux");
3221 assert(ia32_mode_needs_gp_reg(mode));
3223 if (is_Proj(cond)) {
3224 ir_node *cmp = get_Proj_pred(cond);
3226 ir_node *cmp_left = get_Cmp_left(cmp);
3227 ir_node *cmp_right = get_Cmp_right(cmp);
3228 pn_Cmp pnc = get_Proj_proj(cond);
3230 /* check for unsigned Doz first */
3231 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3232 is_Const_0(mux_false) && is_Sub(mux_true) &&
3233 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3234 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3235 return create_Doz(node, cmp_left, cmp_right);
3236 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3237 is_Const_0(mux_true) && is_Sub(mux_false) &&
3238 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3239 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3240 return create_Doz(node, cmp_left, cmp_right);
3245 flags = get_flags_node(cond, &pnc);
3247 if (is_Const(mux_true) && is_Const(mux_false)) {
3248 /* both are const, good */
3249 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3250 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3251 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3252 pnc = get_negated_pnc(pnc, pnc & ia32_pn_Cmp_float ? mode_F : mode_Is);
3253 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node);
3255 /* Not that simple. */
3260 new_node = create_CMov(node, cond, flags, pnc);
3268 * Create a conversion from x87 state register to general purpose.
3270 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3272 ir_node *block = be_transform_node(get_nodes_block(node));
3273 ir_node *op = get_Conv_op(node);
3274 ir_node *new_op = be_transform_node(op);
3275 ir_graph *irg = current_ir_graph;
3276 dbg_info *dbgi = get_irn_dbg_info(node);
3277 ir_mode *mode = get_irn_mode(node);
3278 ir_node *fist, *load, *mem;
3280 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3281 set_irn_pinned(fist, op_pin_state_floats);
3282 set_ia32_use_frame(fist);
3283 set_ia32_op_type(fist, ia32_AddrModeD);
3285 assert(get_mode_size_bits(mode) <= 32);
3286 /* exception we can only store signed 32 bit integers, so for unsigned
3287 we store a 64bit (signed) integer and load the lower bits */
3288 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3289 set_ia32_ls_mode(fist, mode_Ls);
3291 set_ia32_ls_mode(fist, mode_Is);
3293 SET_IA32_ORIG_NODE(fist, node);
3296 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3298 set_irn_pinned(load, op_pin_state_floats);
3299 set_ia32_use_frame(load);
3300 set_ia32_op_type(load, ia32_AddrModeS);
3301 set_ia32_ls_mode(load, mode_Is);
3302 if (get_ia32_ls_mode(fist) == mode_Ls) {
3303 ia32_attr_t *attr = get_ia32_attr(load);
3304 attr->data.need_64bit_stackent = 1;
3306 ia32_attr_t *attr = get_ia32_attr(load);
3307 attr->data.need_32bit_stackent = 1;
3309 SET_IA32_ORIG_NODE(load, node);
3311 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
3315 * Creates a x87 strict Conv by placing a Store and a Load
3317 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3319 ir_node *block = get_nodes_block(node);
3320 ir_graph *irg = get_Block_irg(block);
3321 dbg_info *dbgi = get_irn_dbg_info(node);
3322 ir_node *frame = get_irg_frame(irg);
3323 ir_node *store, *load;
3326 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3327 set_ia32_use_frame(store);
3328 set_ia32_op_type(store, ia32_AddrModeD);
3329 SET_IA32_ORIG_NODE(store, node);
3331 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3332 set_ia32_use_frame(load);
3333 set_ia32_op_type(load, ia32_AddrModeS);
3334 SET_IA32_ORIG_NODE(load, node);
3336 new_node = new_r_Proj(block, load, mode_E, pn_ia32_vfld_res);
3340 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3341 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3343 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3345 func = get_mode_size_bits(mode) == 8 ?
3346 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3347 return func(dbgi, block, base, index, mem, val, mode);
3351 * Create a conversion from general purpose to x87 register
3353 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3355 ir_node *src_block = get_nodes_block(node);
3356 ir_node *block = be_transform_node(src_block);
3357 ir_graph *irg = get_Block_irg(block);
3358 dbg_info *dbgi = get_irn_dbg_info(node);
3359 ir_node *op = get_Conv_op(node);
3360 ir_node *new_op = NULL;
3362 ir_mode *store_mode;
3367 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3368 if (possible_int_mode_for_fp(src_mode)) {
3369 ia32_address_mode_t am;
3371 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3372 if (am.op_type == ia32_AddrModeS) {
3373 ia32_address_t *addr = &am.addr;
3375 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3376 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3378 set_am_attributes(fild, &am);
3379 SET_IA32_ORIG_NODE(fild, node);
3381 fix_mem_proj(fild, &am);
3386 if (new_op == NULL) {
3387 new_op = be_transform_node(op);
3390 mode = get_irn_mode(op);
3392 /* first convert to 32 bit signed if necessary */
3393 if (get_mode_size_bits(src_mode) < 32) {
3394 if (!upper_bits_clean(new_op, src_mode)) {
3395 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3396 SET_IA32_ORIG_NODE(new_op, node);
3401 assert(get_mode_size_bits(mode) == 32);
3404 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3406 set_ia32_use_frame(store);
3407 set_ia32_op_type(store, ia32_AddrModeD);
3408 set_ia32_ls_mode(store, mode_Iu);
3410 /* exception for 32bit unsigned, do a 64bit spill+load */
3411 if (!mode_is_signed(mode)) {
3414 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3416 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3417 noreg_GP, nomem, zero_const);
3419 set_ia32_use_frame(zero_store);
3420 set_ia32_op_type(zero_store, ia32_AddrModeD);
3421 add_ia32_am_offs_int(zero_store, 4);
3422 set_ia32_ls_mode(zero_store, mode_Iu);
3427 store = new_rd_Sync(dbgi, block, 2, in);
3428 store_mode = mode_Ls;
3430 store_mode = mode_Is;
3434 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3436 set_ia32_use_frame(fild);
3437 set_ia32_op_type(fild, ia32_AddrModeS);
3438 set_ia32_ls_mode(fild, store_mode);
3440 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3446 * Create a conversion from one integer mode into another one
3448 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3449 dbg_info *dbgi, ir_node *block, ir_node *op,
3452 ir_node *new_block = be_transform_node(block);
3454 ir_mode *smaller_mode;
3455 ia32_address_mode_t am;
3456 ia32_address_t *addr = &am.addr;
3459 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3460 smaller_mode = src_mode;
3462 smaller_mode = tgt_mode;
3465 #ifdef DEBUG_libfirm
3467 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3472 match_arguments(&am, block, NULL, op, NULL,
3473 match_am | match_8bit_am | match_16bit_am);
3475 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3476 /* unnecessary conv. in theory it shouldn't have been AM */
3477 assert(is_ia32_NoReg_GP(addr->base));
3478 assert(is_ia32_NoReg_GP(addr->index));
3479 assert(is_NoMem(addr->mem));
3480 assert(am.addr.offset == 0);
3481 assert(am.addr.symconst_ent == NULL);
3485 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3486 addr->mem, am.new_op2, smaller_mode);
3487 set_am_attributes(new_node, &am);
3488 /* match_arguments assume that out-mode = in-mode, this isn't true here
3490 set_ia32_ls_mode(new_node, smaller_mode);
3491 SET_IA32_ORIG_NODE(new_node, node);
3492 new_node = fix_mem_proj(new_node, &am);
3497 * Transforms a Conv node.
3499 * @return The created ia32 Conv node
3501 static ir_node *gen_Conv(ir_node *node)
3503 ir_node *block = get_nodes_block(node);
3504 ir_node *new_block = be_transform_node(block);
3505 ir_node *op = get_Conv_op(node);
3506 ir_node *new_op = NULL;
3507 dbg_info *dbgi = get_irn_dbg_info(node);
3508 ir_mode *src_mode = get_irn_mode(op);
3509 ir_mode *tgt_mode = get_irn_mode(node);
3510 int src_bits = get_mode_size_bits(src_mode);
3511 int tgt_bits = get_mode_size_bits(tgt_mode);
3512 ir_node *res = NULL;
3514 assert(!mode_is_int(src_mode) || src_bits <= 32);
3515 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3517 /* modeB -> X should already be lowered by the lower_mode_b pass */
3518 if (src_mode == mode_b) {
3519 panic("ConvB not lowered %+F", node);
3522 if (src_mode == tgt_mode) {
3523 if (get_Conv_strict(node)) {
3524 if (ia32_cg_config.use_sse2) {
3525 /* when we are in SSE mode, we can kill all strict no-op conversion */
3526 return be_transform_node(op);
3529 /* this should be optimized already, but who knows... */
3530 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3531 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3532 return be_transform_node(op);
3536 if (mode_is_float(src_mode)) {
3537 new_op = be_transform_node(op);
3538 /* we convert from float ... */
3539 if (mode_is_float(tgt_mode)) {
3541 if (ia32_cg_config.use_sse2) {
3542 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3543 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3545 set_ia32_ls_mode(res, tgt_mode);
3547 if (get_Conv_strict(node)) {
3548 /* if fp_no_float_fold is not set then we assume that we
3549 * don't have any float operations in a non
3550 * mode_float_arithmetic mode and can skip strict upconvs */
3551 if (src_bits < tgt_bits
3552 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3553 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3556 res = gen_x87_strict_conv(tgt_mode, new_op);
3557 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3561 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3566 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3567 if (ia32_cg_config.use_sse2) {
3568 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3570 set_ia32_ls_mode(res, src_mode);
3572 return gen_x87_fp_to_gp(node);
3576 /* we convert from int ... */
3577 if (mode_is_float(tgt_mode)) {
3579 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3580 if (ia32_cg_config.use_sse2) {
3581 new_op = be_transform_node(op);
3582 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3584 set_ia32_ls_mode(res, tgt_mode);
3586 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3587 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3588 res = gen_x87_gp_to_fp(node, src_mode);
3590 /* we need a strict-Conv, if the int mode has more bits than the
3592 if (float_mantissa < int_mantissa) {
3593 res = gen_x87_strict_conv(tgt_mode, res);
3594 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3598 } else if (tgt_mode == mode_b) {
3599 /* mode_b lowering already took care that we only have 0/1 values */
3600 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3601 src_mode, tgt_mode));
3602 return be_transform_node(op);
3605 if (src_bits == tgt_bits) {
3606 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3607 src_mode, tgt_mode));
3608 return be_transform_node(op);
3611 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3619 static ir_node *create_immediate_or_transform(ir_node *node,
3620 char immediate_constraint_type)
3622 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3623 if (new_node == NULL) {
3624 new_node = be_transform_node(node);
3630 * Transforms a FrameAddr into an ia32 Add.
3632 static ir_node *gen_be_FrameAddr(ir_node *node)
3634 ir_node *block = be_transform_node(get_nodes_block(node));
3635 ir_node *op = be_get_FrameAddr_frame(node);
3636 ir_node *new_op = be_transform_node(op);
3637 dbg_info *dbgi = get_irn_dbg_info(node);
3640 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3641 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3642 set_ia32_use_frame(new_node);
3644 SET_IA32_ORIG_NODE(new_node, node);
3650 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3652 static ir_node *gen_be_Return(ir_node *node)
3654 ir_graph *irg = current_ir_graph;
3655 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3656 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3657 ir_entity *ent = get_irg_entity(irg);
3658 ir_type *tp = get_entity_type(ent);
3663 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3664 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3666 int pn_ret_val, pn_ret_mem, arity, i;
3668 assert(ret_val != NULL);
3669 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3670 return be_duplicate_node(node);
3673 res_type = get_method_res_type(tp, 0);
3675 if (! is_Primitive_type(res_type)) {
3676 return be_duplicate_node(node);
3679 mode = get_type_mode(res_type);
3680 if (! mode_is_float(mode)) {
3681 return be_duplicate_node(node);
3684 assert(get_method_n_ress(tp) == 1);
3686 pn_ret_val = get_Proj_proj(ret_val);
3687 pn_ret_mem = get_Proj_proj(ret_mem);
3689 /* get the Barrier */
3690 barrier = get_Proj_pred(ret_val);
3692 /* get result input of the Barrier */
3693 ret_val = get_irn_n(barrier, pn_ret_val);
3694 new_ret_val = be_transform_node(ret_val);
3696 /* get memory input of the Barrier */
3697 ret_mem = get_irn_n(barrier, pn_ret_mem);
3698 new_ret_mem = be_transform_node(ret_mem);
3700 frame = get_irg_frame(irg);
3702 dbgi = get_irn_dbg_info(barrier);
3703 block = be_transform_node(get_nodes_block(barrier));
3705 /* store xmm0 onto stack */
3706 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3707 new_ret_mem, new_ret_val);
3708 set_ia32_ls_mode(sse_store, mode);
3709 set_ia32_op_type(sse_store, ia32_AddrModeD);
3710 set_ia32_use_frame(sse_store);
3712 /* load into x87 register */
3713 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3714 set_ia32_op_type(fld, ia32_AddrModeS);
3715 set_ia32_use_frame(fld);
3717 mproj = new_r_Proj(block, fld, mode_M, pn_ia32_vfld_M);
3718 fld = new_r_Proj(block, fld, mode_vfp, pn_ia32_vfld_res);
3720 /* create a new barrier */
3721 arity = get_irn_arity(barrier);
3722 in = ALLOCAN(ir_node*, arity);
3723 for (i = 0; i < arity; ++i) {
3726 if (i == pn_ret_val) {
3728 } else if (i == pn_ret_mem) {
3731 ir_node *in = get_irn_n(barrier, i);
3732 new_in = be_transform_node(in);
3737 new_barrier = new_ir_node(dbgi, irg, block,
3738 get_irn_op(barrier), get_irn_mode(barrier),
3740 copy_node_attr(barrier, new_barrier);
3741 be_duplicate_deps(barrier, new_barrier);
3742 be_set_transformed_node(barrier, new_barrier);
3744 /* transform normally */
3745 return be_duplicate_node(node);
3749 * Transform a be_AddSP into an ia32_SubSP.
3751 static ir_node *gen_be_AddSP(ir_node *node)
3753 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3754 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3756 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3757 match_am | match_immediate);
3761 * Transform a be_SubSP into an ia32_AddSP
3763 static ir_node *gen_be_SubSP(ir_node *node)
3765 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3766 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3768 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3769 match_am | match_immediate);
3773 * Change some phi modes
3775 static ir_node *gen_Phi(ir_node *node)
3777 const arch_register_req_t *req;
3778 ir_node *block = be_transform_node(get_nodes_block(node));
3779 ir_graph *irg = current_ir_graph;
3780 dbg_info *dbgi = get_irn_dbg_info(node);
3781 ir_mode *mode = get_irn_mode(node);
3784 if (ia32_mode_needs_gp_reg(mode)) {
3785 /* we shouldn't have any 64bit stuff around anymore */
3786 assert(get_mode_size_bits(mode) <= 32);
3787 /* all integer operations are on 32bit registers now */
3789 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
3790 } else if (mode_is_float(mode)) {
3791 if (ia32_cg_config.use_sse2) {
3793 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
3796 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
3799 req = arch_no_register_req;
3802 /* phi nodes allow loops, so we use the old arguments for now
3803 * and fix this later */
3804 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3805 get_irn_in(node) + 1);
3806 copy_node_attr(node, phi);
3807 be_duplicate_deps(node, phi);
3809 arch_set_out_register_req(phi, 0, req);
3811 be_enqueue_preds(node);
3816 static ir_node *gen_Jmp(ir_node *node)
3818 ir_node *block = get_nodes_block(node);
3819 ir_node *new_block = be_transform_node(block);
3820 dbg_info *dbgi = get_irn_dbg_info(node);
3823 new_node = new_bd_ia32_Jmp(dbgi, new_block);
3824 SET_IA32_ORIG_NODE(new_node, node);
3832 static ir_node *gen_IJmp(ir_node *node)
3834 ir_node *block = get_nodes_block(node);
3835 ir_node *new_block = be_transform_node(block);
3836 dbg_info *dbgi = get_irn_dbg_info(node);
3837 ir_node *op = get_IJmp_target(node);
3839 ia32_address_mode_t am;
3840 ia32_address_t *addr = &am.addr;
3842 assert(get_irn_mode(op) == mode_P);
3844 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3846 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3847 addr->mem, am.new_op2);
3848 set_am_attributes(new_node, &am);
3849 SET_IA32_ORIG_NODE(new_node, node);
3851 new_node = fix_mem_proj(new_node, &am);
3857 * Transform a Bound node.
3859 static ir_node *gen_Bound(ir_node *node)
3862 ir_node *lower = get_Bound_lower(node);
3863 dbg_info *dbgi = get_irn_dbg_info(node);
3865 if (is_Const_0(lower)) {
3866 /* typical case for Java */
3867 ir_node *sub, *res, *flags, *block;
3869 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3870 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3872 block = get_nodes_block(res);
3873 if (! is_Proj(res)) {
3875 set_irn_mode(sub, mode_T);
3876 res = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_res);
3878 sub = get_Proj_pred(res);
3880 flags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3881 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3882 SET_IA32_ORIG_NODE(new_node, node);
3884 panic("generic Bound not supported in ia32 Backend");
3890 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3892 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3893 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3895 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3896 match_immediate | match_mode_neutral);
3899 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3901 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3902 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3903 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3907 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3909 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3910 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3911 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3915 static ir_node *gen_ia32_l_Add(ir_node *node)
3917 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3918 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3919 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3920 match_commutative | match_am | match_immediate |
3921 match_mode_neutral);
3923 if (is_Proj(lowered)) {
3924 lowered = get_Proj_pred(lowered);
3926 assert(is_ia32_Add(lowered));
3927 set_irn_mode(lowered, mode_T);
3933 static ir_node *gen_ia32_l_Adc(ir_node *node)
3935 return gen_binop_flags(node, new_bd_ia32_Adc,
3936 match_commutative | match_am | match_immediate |
3937 match_mode_neutral);
3941 * Transforms a l_MulS into a "real" MulS node.
3943 * @return the created ia32 Mul node
3945 static ir_node *gen_ia32_l_Mul(ir_node *node)
3947 ir_node *left = get_binop_left(node);
3948 ir_node *right = get_binop_right(node);
3950 return gen_binop(node, left, right, new_bd_ia32_Mul,
3951 match_commutative | match_am | match_mode_neutral);
3955 * Transforms a l_IMulS into a "real" IMul1OPS node.
3957 * @return the created ia32 IMul1OP node
3959 static ir_node *gen_ia32_l_IMul(ir_node *node)
3961 ir_node *left = get_binop_left(node);
3962 ir_node *right = get_binop_right(node);
3964 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3965 match_commutative | match_am | match_mode_neutral);
3968 static ir_node *gen_ia32_l_Sub(ir_node *node)
3970 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3971 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3972 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3973 match_am | match_immediate | match_mode_neutral);
3975 if (is_Proj(lowered)) {
3976 lowered = get_Proj_pred(lowered);
3978 assert(is_ia32_Sub(lowered));
3979 set_irn_mode(lowered, mode_T);
3985 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3987 return gen_binop_flags(node, new_bd_ia32_Sbb,
3988 match_am | match_immediate | match_mode_neutral);
3992 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3993 * op1 - target to be shifted
3994 * op2 - contains bits to be shifted into target
3996 * Only op3 can be an immediate.
3998 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3999 ir_node *low, ir_node *count)
4001 ir_node *block = get_nodes_block(node);
4002 ir_node *new_block = be_transform_node(block);
4003 dbg_info *dbgi = get_irn_dbg_info(node);
4004 ir_node *new_high = be_transform_node(high);
4005 ir_node *new_low = be_transform_node(low);
4009 /* the shift amount can be any mode that is bigger than 5 bits, since all
4010 * other bits are ignored anyway */
4011 while (is_Conv(count) &&
4012 get_irn_n_edges(count) == 1 &&
4013 mode_is_int(get_irn_mode(count))) {
4014 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4015 count = get_Conv_op(count);
4017 new_count = create_immediate_or_transform(count, 0);
4019 if (is_ia32_l_ShlD(node)) {
4020 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4023 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4026 SET_IA32_ORIG_NODE(new_node, node);
4031 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4033 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4034 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4035 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4036 return gen_lowered_64bit_shifts(node, high, low, count);
4039 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4041 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4042 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4043 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4044 return gen_lowered_64bit_shifts(node, high, low, count);
4047 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4049 ir_node *src_block = get_nodes_block(node);
4050 ir_node *block = be_transform_node(src_block);
4051 ir_graph *irg = current_ir_graph;
4052 dbg_info *dbgi = get_irn_dbg_info(node);
4053 ir_node *frame = get_irg_frame(irg);
4054 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4055 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4056 ir_node *new_val_low = be_transform_node(val_low);
4057 ir_node *new_val_high = be_transform_node(val_high);
4059 ir_node *sync, *fild, *res;
4060 ir_node *store_low, *store_high;
4062 if (ia32_cg_config.use_sse2) {
4063 panic("ia32_l_LLtoFloat not implemented for SSE2");
4067 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4069 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4071 SET_IA32_ORIG_NODE(store_low, node);
4072 SET_IA32_ORIG_NODE(store_high, node);
4074 set_ia32_use_frame(store_low);
4075 set_ia32_use_frame(store_high);
4076 set_ia32_op_type(store_low, ia32_AddrModeD);
4077 set_ia32_op_type(store_high, ia32_AddrModeD);
4078 set_ia32_ls_mode(store_low, mode_Iu);
4079 set_ia32_ls_mode(store_high, mode_Is);
4080 add_ia32_am_offs_int(store_high, 4);
4084 sync = new_rd_Sync(dbgi, block, 2, in);
4087 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4089 set_ia32_use_frame(fild);
4090 set_ia32_op_type(fild, ia32_AddrModeS);
4091 set_ia32_ls_mode(fild, mode_Ls);
4093 SET_IA32_ORIG_NODE(fild, node);
4095 res = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
4097 if (! mode_is_signed(get_irn_mode(val_high))) {
4098 ia32_address_mode_t am;
4100 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4103 am.addr.base = noreg_GP;
4104 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4105 am.addr.mem = nomem;
4108 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4109 am.addr.use_frame = 0;
4110 am.addr.frame_entity = NULL;
4111 am.addr.symconst_sign = 0;
4112 am.ls_mode = mode_F;
4113 am.mem_proj = nomem;
4114 am.op_type = ia32_AddrModeS;
4116 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4117 am.pinned = op_pin_state_floats;
4119 am.ins_permuted = 0;
4121 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4122 am.new_op1, am.new_op2, get_fpcw());
4123 set_am_attributes(fadd, &am);
4125 set_irn_mode(fadd, mode_T);
4126 res = new_rd_Proj(NULL, block, fadd, mode_vfp, pn_ia32_res);
4131 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4133 ir_node *src_block = get_nodes_block(node);
4134 ir_node *block = be_transform_node(src_block);
4135 ir_graph *irg = get_Block_irg(block);
4136 dbg_info *dbgi = get_irn_dbg_info(node);
4137 ir_node *frame = get_irg_frame(irg);
4138 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4139 ir_node *new_val = be_transform_node(val);
4140 ir_node *fist, *mem;
4142 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4143 SET_IA32_ORIG_NODE(fist, node);
4144 set_ia32_use_frame(fist);
4145 set_ia32_op_type(fist, ia32_AddrModeD);
4146 set_ia32_ls_mode(fist, mode_Ls);
4152 * the BAD transformer.
4154 static ir_node *bad_transform(ir_node *node)
4156 panic("No transform function for %+F available.", node);
4160 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4162 ir_node *block = be_transform_node(get_nodes_block(node));
4163 ir_graph *irg = get_Block_irg(block);
4164 ir_node *pred = get_Proj_pred(node);
4165 ir_node *new_pred = be_transform_node(pred);
4166 ir_node *frame = get_irg_frame(irg);
4167 dbg_info *dbgi = get_irn_dbg_info(node);
4168 long pn = get_Proj_proj(node);
4173 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4174 SET_IA32_ORIG_NODE(load, node);
4175 set_ia32_use_frame(load);
4176 set_ia32_op_type(load, ia32_AddrModeS);
4177 set_ia32_ls_mode(load, mode_Iu);
4178 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4179 * 32 bit from it with this particular load */
4180 attr = get_ia32_attr(load);
4181 attr->data.need_64bit_stackent = 1;
4183 if (pn == pn_ia32_l_FloattoLL_res_high) {
4184 add_ia32_am_offs_int(load, 4);
4186 assert(pn == pn_ia32_l_FloattoLL_res_low);
4189 proj = new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4195 * Transform the Projs of an AddSP.
4197 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4199 ir_node *block = be_transform_node(get_nodes_block(node));
4200 ir_node *pred = get_Proj_pred(node);
4201 ir_node *new_pred = be_transform_node(pred);
4202 dbg_info *dbgi = get_irn_dbg_info(node);
4203 long proj = get_Proj_proj(node);
4205 if (proj == pn_be_AddSP_sp) {
4206 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4207 pn_ia32_SubSP_stack);
4208 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4210 } else if (proj == pn_be_AddSP_res) {
4211 return new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4212 pn_ia32_SubSP_addr);
4213 } else if (proj == pn_be_AddSP_M) {
4214 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_SubSP_M);
4217 panic("No idea how to transform proj->AddSP");
4221 * Transform the Projs of a SubSP.
4223 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4225 ir_node *block = be_transform_node(get_nodes_block(node));
4226 ir_node *pred = get_Proj_pred(node);
4227 ir_node *new_pred = be_transform_node(pred);
4228 dbg_info *dbgi = get_irn_dbg_info(node);
4229 long proj = get_Proj_proj(node);
4231 if (proj == pn_be_SubSP_sp) {
4232 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4233 pn_ia32_AddSP_stack);
4234 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4236 } else if (proj == pn_be_SubSP_M) {
4237 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_AddSP_M);
4240 panic("No idea how to transform proj->SubSP");
4244 * Transform and renumber the Projs from a Load.
4246 static ir_node *gen_Proj_Load(ir_node *node)
4249 ir_node *block = be_transform_node(get_nodes_block(node));
4250 ir_node *pred = get_Proj_pred(node);
4251 dbg_info *dbgi = get_irn_dbg_info(node);
4252 long proj = get_Proj_proj(node);
4254 /* loads might be part of source address mode matches, so we don't
4255 * transform the ProjMs yet (with the exception of loads whose result is
4258 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4260 ir_node *old_block = get_nodes_block(node);
4262 /* this is needed, because sometimes we have loops that are only
4263 reachable through the ProjM */
4264 be_enqueue_preds(node);
4265 /* do it in 2 steps, to silence firm verifier */
4266 res = new_rd_Proj(dbgi, old_block, pred, mode_M, pn_Load_M);
4267 set_Proj_proj(res, pn_ia32_mem);
4271 /* renumber the proj */
4272 new_pred = be_transform_node(pred);
4273 if (is_ia32_Load(new_pred)) {
4276 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Load_res);
4278 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Load_M);
4279 case pn_Load_X_regular:
4280 return new_rd_Jmp(dbgi, block);
4281 case pn_Load_X_except:
4282 /* This Load might raise an exception. Mark it. */
4283 set_ia32_exc_label(new_pred, 1);
4284 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4288 } else if (is_ia32_Conv_I2I(new_pred) ||
4289 is_ia32_Conv_I2I8Bit(new_pred)) {
4290 set_irn_mode(new_pred, mode_T);
4291 if (proj == pn_Load_res) {
4292 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_res);
4293 } else if (proj == pn_Load_M) {
4294 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_mem);
4296 } else if (is_ia32_xLoad(new_pred)) {
4299 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4301 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xLoad_M);
4302 case pn_Load_X_regular:
4303 return new_rd_Jmp(dbgi, block);
4304 case pn_Load_X_except:
4305 /* This Load might raise an exception. Mark it. */
4306 set_ia32_exc_label(new_pred, 1);
4307 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4311 } else if (is_ia32_vfld(new_pred)) {
4314 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4316 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfld_M);
4317 case pn_Load_X_regular:
4318 return new_rd_Jmp(dbgi, block);
4319 case pn_Load_X_except:
4320 /* This Load might raise an exception. Mark it. */
4321 set_ia32_exc_label(new_pred, 1);
4322 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
4327 /* can happen for ProJMs when source address mode happened for the
4330 /* however it should not be the result proj, as that would mean the
4331 load had multiple users and should not have been used for
4333 if (proj != pn_Load_M) {
4334 panic("internal error: transformed node not a Load");
4336 return new_rd_Proj(dbgi, block, new_pred, mode_M, 1);
4339 panic("No idea how to transform proj");
4343 * Transform and renumber the Projs from a DivMod like instruction.
4345 static ir_node *gen_Proj_DivMod(ir_node *node)
4347 ir_node *block = be_transform_node(get_nodes_block(node));
4348 ir_node *pred = get_Proj_pred(node);
4349 ir_node *new_pred = be_transform_node(pred);
4350 dbg_info *dbgi = get_irn_dbg_info(node);
4351 long proj = get_Proj_proj(node);
4353 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4355 switch (get_irn_opcode(pred)) {
4359 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4361 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4362 case pn_Div_X_regular:
4363 return new_rd_Jmp(dbgi, block);
4364 case pn_Div_X_except:
4365 set_ia32_exc_label(new_pred, 1);
4366 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4374 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4376 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4377 case pn_Mod_X_except:
4378 set_ia32_exc_label(new_pred, 1);
4379 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4387 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4388 case pn_DivMod_res_div:
4389 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4390 case pn_DivMod_res_mod:
4391 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4392 case pn_DivMod_X_regular:
4393 return new_rd_Jmp(dbgi, block);
4394 case pn_DivMod_X_except:
4395 set_ia32_exc_label(new_pred, 1);
4396 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4405 panic("No idea how to transform proj->DivMod");
4409 * Transform and renumber the Projs from a CopyB.
4411 static ir_node *gen_Proj_CopyB(ir_node *node)
4413 ir_node *block = be_transform_node(get_nodes_block(node));
4414 ir_node *pred = get_Proj_pred(node);
4415 ir_node *new_pred = be_transform_node(pred);
4416 dbg_info *dbgi = get_irn_dbg_info(node);
4417 long proj = get_Proj_proj(node);
4420 case pn_CopyB_M_regular:
4421 if (is_ia32_CopyB_i(new_pred)) {
4422 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4423 } else if (is_ia32_CopyB(new_pred)) {
4424 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_M);
4431 panic("No idea how to transform proj->CopyB");
4435 * Transform and renumber the Projs from a Quot.
4437 static ir_node *gen_Proj_Quot(ir_node *node)
4439 ir_node *block = be_transform_node(get_nodes_block(node));
4440 ir_node *pred = get_Proj_pred(node);
4441 ir_node *new_pred = be_transform_node(pred);
4442 dbg_info *dbgi = get_irn_dbg_info(node);
4443 long proj = get_Proj_proj(node);
4447 if (is_ia32_xDiv(new_pred)) {
4448 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xDiv_M);
4449 } else if (is_ia32_vfdiv(new_pred)) {
4450 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4454 if (is_ia32_xDiv(new_pred)) {
4455 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4456 } else if (is_ia32_vfdiv(new_pred)) {
4457 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4460 case pn_Quot_X_regular:
4461 case pn_Quot_X_except:
4466 panic("No idea how to transform proj->Quot");
4469 static ir_node *gen_be_Call(ir_node *node)
4471 dbg_info *const dbgi = get_irn_dbg_info(node);
4472 ir_node *const src_block = get_nodes_block(node);
4473 ir_node *const block = be_transform_node(src_block);
4474 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4475 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4476 ir_node *const sp = be_transform_node(src_sp);
4477 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4478 ia32_address_mode_t am;
4479 ia32_address_t *const addr = &am.addr;
4484 ir_node * eax = noreg_GP;
4485 ir_node * ecx = noreg_GP;
4486 ir_node * edx = noreg_GP;
4487 unsigned const pop = be_Call_get_pop(node);
4488 ir_type *const call_tp = be_Call_get_type(node);
4489 int old_no_pic_adjust;
4491 /* Run the x87 simulator if the call returns a float value */
4492 if (get_method_n_ress(call_tp) > 0) {
4493 ir_type *const res_type = get_method_res_type(call_tp, 0);
4494 ir_mode *const res_mode = get_type_mode(res_type);
4496 if (res_mode != NULL && mode_is_float(res_mode)) {
4497 env_cg->do_x87_sim = 1;
4501 /* We do not want be_Call direct calls */
4502 assert(be_Call_get_entity(node) == NULL);
4504 /* special case for PIC trampoline calls */
4505 old_no_pic_adjust = no_pic_adjust;
4506 no_pic_adjust = env_cg->birg->main_env->options->pic;
4508 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4509 match_am | match_immediate);
4511 no_pic_adjust = old_no_pic_adjust;
4513 i = get_irn_arity(node) - 1;
4514 fpcw = be_transform_node(get_irn_n(node, i--));
4515 for (; i >= be_pos_Call_first_arg; --i) {
4516 arch_register_req_t const *const req = arch_get_register_req(node, i);
4517 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4519 assert(req->type == arch_register_req_type_limited);
4520 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4522 switch (*req->limited) {
4523 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4524 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4525 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4526 default: panic("Invalid GP register for register parameter");
4530 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4531 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4532 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4533 set_am_attributes(call, &am);
4534 call = fix_mem_proj(call, &am);
4536 if (get_irn_pinned(node) == op_pin_state_pinned)
4537 set_irn_pinned(call, op_pin_state_pinned);
4539 SET_IA32_ORIG_NODE(call, node);
4541 if (ia32_cg_config.use_sse2) {
4542 /* remember this call for post-processing */
4543 ARR_APP1(ir_node *, call_list, call);
4544 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4551 * Transform Builtin trap
4553 static ir_node *gen_trap(ir_node *node) {
4554 dbg_info *dbgi = get_irn_dbg_info(node);
4555 ir_node *block = be_transform_node(get_nodes_block(node));
4556 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4558 return new_bd_ia32_UD2(dbgi, block, mem);
4562 * Transform Builtin debugbreak
4564 static ir_node *gen_debugbreak(ir_node *node) {
4565 dbg_info *dbgi = get_irn_dbg_info(node);
4566 ir_node *block = be_transform_node(get_nodes_block(node));
4567 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4569 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4573 * Transform Builtin return_address
4575 static ir_node *gen_return_address(ir_node *node) {
4576 ir_node *param = get_Builtin_param(node, 0);
4577 ir_node *frame = get_Builtin_param(node, 1);
4578 dbg_info *dbgi = get_irn_dbg_info(node);
4579 tarval *tv = get_Const_tarval(param);
4580 unsigned long value = get_tarval_long(tv);
4582 ir_node *block = be_transform_node(get_nodes_block(node));
4583 ir_node *ptr = be_transform_node(frame);
4587 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4588 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4589 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4592 /* load the return address from this frame */
4593 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4595 set_irn_pinned(load, get_irn_pinned(node));
4596 set_ia32_op_type(load, ia32_AddrModeS);
4597 set_ia32_ls_mode(load, mode_Iu);
4599 set_ia32_am_offs_int(load, 0);
4600 set_ia32_use_frame(load);
4601 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4603 if (get_irn_pinned(node) == op_pin_state_floats) {
4604 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4605 && pn_ia32_vfld_res == pn_ia32_Load_res
4606 && pn_ia32_Load_res == pn_ia32_res);
4607 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4610 SET_IA32_ORIG_NODE(load, node);
4611 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4615 * Transform Builtin frame_address
4617 static ir_node *gen_frame_address(ir_node *node) {
4618 ir_node *param = get_Builtin_param(node, 0);
4619 ir_node *frame = get_Builtin_param(node, 1);
4620 dbg_info *dbgi = get_irn_dbg_info(node);
4621 tarval *tv = get_Const_tarval(param);
4622 unsigned long value = get_tarval_long(tv);
4624 ir_node *block = be_transform_node(get_nodes_block(node));
4625 ir_node *ptr = be_transform_node(frame);
4630 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4631 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4632 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4635 /* load the frame address from this frame */
4636 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4638 set_irn_pinned(load, get_irn_pinned(node));
4639 set_ia32_op_type(load, ia32_AddrModeS);
4640 set_ia32_ls_mode(load, mode_Iu);
4642 ent = ia32_get_frame_address_entity();
4644 set_ia32_am_offs_int(load, 0);
4645 set_ia32_use_frame(load);
4646 set_ia32_frame_ent(load, ent);
4648 /* will fail anyway, but gcc does this: */
4649 set_ia32_am_offs_int(load, 0);
4652 if (get_irn_pinned(node) == op_pin_state_floats) {
4653 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4654 && pn_ia32_vfld_res == pn_ia32_Load_res
4655 && pn_ia32_Load_res == pn_ia32_res);
4656 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4659 SET_IA32_ORIG_NODE(load, node);
4660 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4664 * Transform Builtin frame_address
4666 static ir_node *gen_prefetch(ir_node *node) {
4668 ir_node *ptr, *block, *mem, *base, *index;
4669 ir_node *param, *new_node;
4672 ia32_address_t addr;
4674 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4675 /* no prefetch at all, route memory */
4676 return be_transform_node(get_Builtin_mem(node));
4679 param = get_Builtin_param(node, 1);
4680 tv = get_Const_tarval(param);
4681 rw = get_tarval_long(tv);
4683 /* construct load address */
4684 memset(&addr, 0, sizeof(addr));
4685 ptr = get_Builtin_param(node, 0);
4686 ia32_create_address_mode(&addr, ptr, 0);
4693 base = be_transform_node(base);
4696 if (index == NULL) {
4699 index = be_transform_node(index);
4702 dbgi = get_irn_dbg_info(node);
4703 block = be_transform_node(get_nodes_block(node));
4704 mem = be_transform_node(get_Builtin_mem(node));
4706 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4707 /* we have 3DNow!, this was already checked above */
4708 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4709 } else if (ia32_cg_config.use_sse_prefetch) {
4710 /* note: rw == 1 is IGNORED in that case */
4711 param = get_Builtin_param(node, 2);
4712 tv = get_Const_tarval(param);
4713 locality = get_tarval_long(tv);
4715 /* SSE style prefetch */
4718 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4721 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4724 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4727 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4731 assert(ia32_cg_config.use_3dnow_prefetch);
4732 /* 3DNow! style prefetch */
4733 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4736 set_irn_pinned(new_node, get_irn_pinned(node));
4737 set_ia32_op_type(new_node, ia32_AddrModeS);
4738 set_ia32_ls_mode(new_node, mode_Bu);
4739 set_address(new_node, &addr);
4741 SET_IA32_ORIG_NODE(new_node, node);
4743 be_dep_on_frame(new_node);
4744 return new_r_Proj(block, new_node, mode_M, pn_ia32_Prefetch_M);
4748 * Transform bsf like node
4750 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4752 ir_node *param = get_Builtin_param(node, 0);
4753 dbg_info *dbgi = get_irn_dbg_info(node);
4755 ir_node *block = get_nodes_block(node);
4756 ir_node *new_block = be_transform_node(block);
4758 ia32_address_mode_t am;
4759 ia32_address_t *addr = &am.addr;
4762 match_arguments(&am, block, NULL, param, NULL, match_am);
4764 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4765 set_am_attributes(cnt, &am);
4766 set_ia32_ls_mode(cnt, get_irn_mode(param));
4768 SET_IA32_ORIG_NODE(cnt, node);
4769 return fix_mem_proj(cnt, &am);
4773 * Transform builtin ffs.
4775 static ir_node *gen_ffs(ir_node *node)
4777 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4778 ir_node *real = skip_Proj(bsf);
4779 dbg_info *dbgi = get_irn_dbg_info(real);
4780 ir_node *block = get_nodes_block(real);
4781 ir_node *flag, *set, *conv, *neg, *or;
4784 if (get_irn_mode(real) != mode_T) {
4785 set_irn_mode(real, mode_T);
4786 bsf = new_r_Proj(block, real, mode_Iu, pn_ia32_res);
4789 flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
4792 set = new_bd_ia32_Setcc(dbgi, block, flag, pn_Cmp_Eq);
4793 SET_IA32_ORIG_NODE(set, node);
4796 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4797 SET_IA32_ORIG_NODE(conv, node);
4800 neg = new_bd_ia32_Neg(dbgi, block, conv);
4803 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4804 set_ia32_commutative(or);
4807 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4811 * Transform builtin clz.
4813 static ir_node *gen_clz(ir_node *node)
4815 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4816 ir_node *real = skip_Proj(bsr);
4817 dbg_info *dbgi = get_irn_dbg_info(real);
4818 ir_node *block = get_nodes_block(real);
4819 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4821 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4825 * Transform builtin ctz.
4827 static ir_node *gen_ctz(ir_node *node)
4829 return gen_unop_AM(node, new_bd_ia32_Bsf);
4833 * Transform builtin parity.
4835 static ir_node *gen_parity(ir_node *node)
4837 ir_node *param = get_Builtin_param(node, 0);
4838 dbg_info *dbgi = get_irn_dbg_info(node);
4840 ir_node *block = get_nodes_block(node);
4842 ir_node *new_block = be_transform_node(block);
4843 ir_node *imm, *cmp, *new_node;
4845 ia32_address_mode_t am;
4846 ia32_address_t *addr = &am.addr;
4850 match_arguments(&am, block, NULL, param, NULL, match_am);
4851 imm = ia32_create_Immediate(NULL, 0, 0);
4852 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4853 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4854 set_am_attributes(cmp, &am);
4855 set_ia32_ls_mode(cmp, mode_Iu);
4857 SET_IA32_ORIG_NODE(cmp, node);
4859 cmp = fix_mem_proj(cmp, &am);
4862 new_node = new_bd_ia32_Setcc(dbgi, new_block, cmp, ia32_pn_Cmp_parity);
4863 SET_IA32_ORIG_NODE(new_node, node);
4866 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4867 nomem, new_node, mode_Bu);
4868 SET_IA32_ORIG_NODE(new_node, node);
4873 * Transform builtin popcount
4875 static ir_node *gen_popcount(ir_node *node) {
4876 ir_node *param = get_Builtin_param(node, 0);
4877 dbg_info *dbgi = get_irn_dbg_info(node);
4879 ir_node *block = get_nodes_block(node);
4880 ir_node *new_block = be_transform_node(block);
4883 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4885 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4886 if (ia32_cg_config.use_popcnt) {
4887 ia32_address_mode_t am;
4888 ia32_address_t *addr = &am.addr;
4891 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4893 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4894 set_am_attributes(cnt, &am);
4895 set_ia32_ls_mode(cnt, get_irn_mode(param));
4897 SET_IA32_ORIG_NODE(cnt, node);
4898 return fix_mem_proj(cnt, &am);
4901 new_param = be_transform_node(param);
4903 /* do the standard popcount algo */
4905 /* m1 = x & 0x55555555 */
4906 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4907 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4910 simm = ia32_create_Immediate(NULL, 0, 1);
4911 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4913 /* m2 = s1 & 0x55555555 */
4914 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4917 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4919 /* m4 = m3 & 0x33333333 */
4920 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4921 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4924 simm = ia32_create_Immediate(NULL, 0, 2);
4925 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4927 /* m5 = s2 & 0x33333333 */
4928 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4931 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4933 /* m7 = m6 & 0x0F0F0F0F */
4934 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4935 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4938 simm = ia32_create_Immediate(NULL, 0, 4);
4939 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4941 /* m8 = s3 & 0x0F0F0F0F */
4942 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4945 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4947 /* m10 = m9 & 0x00FF00FF */
4948 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4949 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4952 simm = ia32_create_Immediate(NULL, 0, 8);
4953 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4955 /* m11 = s4 & 0x00FF00FF */
4956 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4958 /* m12 = m10 + m11 */
4959 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4961 /* m13 = m12 & 0x0000FFFF */
4962 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4963 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4965 /* s5 = m12 >> 16 */
4966 simm = ia32_create_Immediate(NULL, 0, 16);
4967 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4969 /* res = m13 + s5 */
4970 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4974 * Transform builtin byte swap.
4976 static ir_node *gen_bswap(ir_node *node) {
4977 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4978 dbg_info *dbgi = get_irn_dbg_info(node);
4980 ir_node *block = get_nodes_block(node);
4981 ir_node *new_block = be_transform_node(block);
4982 ir_mode *mode = get_irn_mode(param);
4983 unsigned size = get_mode_size_bits(mode);
4984 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
4988 if (ia32_cg_config.use_i486) {
4989 /* swap available */
4990 return new_bd_ia32_Bswap(dbgi, new_block, param);
4992 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4993 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4995 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
4996 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
4998 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5000 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5001 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5003 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5004 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5007 /* swap16 always available */
5008 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5011 panic("Invalid bswap size (%d)", size);
5016 * Transform builtin outport.
5018 static ir_node *gen_outport(ir_node *node) {
5019 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5020 ir_node *oldv = get_Builtin_param(node, 1);
5021 ir_mode *mode = get_irn_mode(oldv);
5022 ir_node *value = be_transform_node(oldv);
5023 ir_node *block = be_transform_node(get_nodes_block(node));
5024 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5025 dbg_info *dbgi = get_irn_dbg_info(node);
5027 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5028 set_ia32_ls_mode(res, mode);
5033 * Transform builtin inport.
5035 static ir_node *gen_inport(ir_node *node) {
5036 ir_type *tp = get_Builtin_type(node);
5037 ir_type *rstp = get_method_res_type(tp, 0);
5038 ir_mode *mode = get_type_mode(rstp);
5039 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5040 ir_node *block = be_transform_node(get_nodes_block(node));
5041 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5042 dbg_info *dbgi = get_irn_dbg_info(node);
5044 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5045 set_ia32_ls_mode(res, mode);
5047 /* check for missing Result Proj */
5052 * Transform a builtin inner trampoline
5054 static ir_node *gen_inner_trampoline(ir_node *node) {
5055 ir_node *ptr = get_Builtin_param(node, 0);
5056 ir_node *callee = get_Builtin_param(node, 1);
5057 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5058 ir_node *mem = get_Builtin_mem(node);
5059 ir_node *block = get_nodes_block(node);
5060 ir_node *new_block = be_transform_node(block);
5064 ir_node *trampoline;
5066 dbg_info *dbgi = get_irn_dbg_info(node);
5067 ia32_address_t addr;
5069 /* construct store address */
5070 memset(&addr, 0, sizeof(addr));
5071 ia32_create_address_mode(&addr, ptr, 0);
5073 if (addr.base == NULL) {
5074 addr.base = noreg_GP;
5076 addr.base = be_transform_node(addr.base);
5079 if (addr.index == NULL) {
5080 addr.index = noreg_GP;
5082 addr.index = be_transform_node(addr.index);
5084 addr.mem = be_transform_node(mem);
5086 /* mov ecx, <env> */
5087 val = ia32_create_Immediate(NULL, 0, 0xB9);
5088 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5089 addr.index, addr.mem, val);
5090 set_irn_pinned(store, get_irn_pinned(node));
5091 set_ia32_op_type(store, ia32_AddrModeD);
5092 set_ia32_ls_mode(store, mode_Bu);
5093 set_address(store, &addr);
5097 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5098 addr.index, addr.mem, env);
5099 set_irn_pinned(store, get_irn_pinned(node));
5100 set_ia32_op_type(store, ia32_AddrModeD);
5101 set_ia32_ls_mode(store, mode_Iu);
5102 set_address(store, &addr);
5106 /* jmp rel <callee> */
5107 val = ia32_create_Immediate(NULL, 0, 0xE9);
5108 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5109 addr.index, addr.mem, val);
5110 set_irn_pinned(store, get_irn_pinned(node));
5111 set_ia32_op_type(store, ia32_AddrModeD);
5112 set_ia32_ls_mode(store, mode_Bu);
5113 set_address(store, &addr);
5117 trampoline = be_transform_node(ptr);
5119 /* the callee is typically an immediate */
5120 if (is_SymConst(callee)) {
5121 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5123 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5125 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5127 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5128 addr.index, addr.mem, rel);
5129 set_irn_pinned(store, get_irn_pinned(node));
5130 set_ia32_op_type(store, ia32_AddrModeD);
5131 set_ia32_ls_mode(store, mode_Iu);
5132 set_address(store, &addr);
5137 return new_r_Tuple(new_block, 2, in);
5141 * Transform Builtin node.
5143 static ir_node *gen_Builtin(ir_node *node) {
5144 ir_builtin_kind kind = get_Builtin_kind(node);
5148 return gen_trap(node);
5149 case ir_bk_debugbreak:
5150 return gen_debugbreak(node);
5151 case ir_bk_return_address:
5152 return gen_return_address(node);
5153 case ir_bk_frame_address:
5154 return gen_frame_address(node);
5155 case ir_bk_prefetch:
5156 return gen_prefetch(node);
5158 return gen_ffs(node);
5160 return gen_clz(node);
5162 return gen_ctz(node);
5164 return gen_parity(node);
5165 case ir_bk_popcount:
5166 return gen_popcount(node);
5168 return gen_bswap(node);
5170 return gen_outport(node);
5172 return gen_inport(node);
5173 case ir_bk_inner_trampoline:
5174 return gen_inner_trampoline(node);
5176 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5180 * Transform Proj(Builtin) node.
5182 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5183 ir_node *node = get_Proj_pred(proj);
5184 ir_node *new_node = be_transform_node(node);
5185 ir_builtin_kind kind = get_Builtin_kind(node);
5188 case ir_bk_return_address:
5189 case ir_bk_frame_address:
5194 case ir_bk_popcount:
5196 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5199 case ir_bk_debugbreak:
5200 case ir_bk_prefetch:
5202 assert(get_Proj_proj(proj) == pn_Builtin_M);
5205 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5206 return new_r_Proj(get_nodes_block(new_node),
5207 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5209 assert(get_Proj_proj(proj) == pn_Builtin_M);
5210 return new_r_Proj(get_nodes_block(new_node),
5211 new_node, mode_M, pn_ia32_Inport_M);
5213 case ir_bk_inner_trampoline:
5214 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5215 return get_Tuple_pred(new_node, 1);
5217 assert(get_Proj_proj(proj) == pn_Builtin_M);
5218 return get_Tuple_pred(new_node, 0);
5221 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5224 static ir_node *gen_be_IncSP(ir_node *node)
5226 ir_node *res = be_duplicate_node(node);
5227 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5233 * Transform the Projs from a be_Call.
5235 static ir_node *gen_Proj_be_Call(ir_node *node)
5237 ir_node *block = be_transform_node(get_nodes_block(node));
5238 ir_node *call = get_Proj_pred(node);
5239 ir_node *new_call = be_transform_node(call);
5240 dbg_info *dbgi = get_irn_dbg_info(node);
5241 long proj = get_Proj_proj(node);
5242 ir_mode *mode = get_irn_mode(node);
5245 if (proj == pn_be_Call_M_regular) {
5246 return new_rd_Proj(dbgi, block, new_call, mode_M, n_ia32_Call_mem);
5248 /* transform call modes */
5249 if (mode_is_data(mode)) {
5250 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5254 /* Map from be_Call to ia32_Call proj number */
5255 if (proj == pn_be_Call_sp) {
5256 proj = pn_ia32_Call_stack;
5257 } else if (proj == pn_be_Call_M_regular) {
5258 proj = pn_ia32_Call_M;
5260 arch_register_req_t const *const req = arch_get_register_req_out(node);
5261 int const n_outs = arch_irn_get_n_outs(new_call);
5264 assert(proj >= pn_be_Call_first_res);
5265 assert(req->type & arch_register_req_type_limited);
5267 for (i = 0; i < n_outs; ++i) {
5268 arch_register_req_t const *const new_req
5269 = arch_get_out_register_req(new_call, i);
5271 if (!(new_req->type & arch_register_req_type_limited) ||
5272 new_req->cls != req->cls ||
5273 *new_req->limited != *req->limited)
5282 res = new_rd_Proj(dbgi, block, new_call, mode, proj);
5284 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5286 case pn_ia32_Call_stack:
5287 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5290 case pn_ia32_Call_fpcw:
5291 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5299 * Transform the Projs from a Cmp.
5301 static ir_node *gen_Proj_Cmp(ir_node *node)
5303 /* this probably means not all mode_b nodes were lowered... */
5304 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5309 * Transform the Projs from a Bound.
5311 static ir_node *gen_Proj_Bound(ir_node *node)
5313 ir_node *new_node, *block;
5314 ir_node *pred = get_Proj_pred(node);
5316 switch (get_Proj_proj(node)) {
5318 return be_transform_node(get_Bound_mem(pred));
5319 case pn_Bound_X_regular:
5320 new_node = be_transform_node(pred);
5321 block = get_nodes_block(new_node);
5322 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_true);
5323 case pn_Bound_X_except:
5324 new_node = be_transform_node(pred);
5325 block = get_nodes_block(new_node);
5326 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_false);
5328 return be_transform_node(get_Bound_index(pred));
5330 panic("unsupported Proj from Bound");
5334 static ir_node *gen_Proj_ASM(ir_node *node)
5336 ir_mode *mode = get_irn_mode(node);
5337 ir_node *pred = get_Proj_pred(node);
5338 ir_node *new_pred = be_transform_node(pred);
5339 ir_node *block = get_nodes_block(new_pred);
5340 long pos = get_Proj_proj(node);
5342 if (mode == mode_M) {
5343 pos = arch_irn_get_n_outs(new_pred)-1;
5344 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5346 } else if (mode_is_float(mode)) {
5349 panic("unexpected proj mode at ASM");
5352 return new_r_Proj(block, new_pred, mode, pos);
5356 * Transform and potentially renumber Proj nodes.
5358 static ir_node *gen_Proj(ir_node *node)
5360 ir_node *pred = get_Proj_pred(node);
5363 switch (get_irn_opcode(pred)) {
5365 proj = get_Proj_proj(node);
5366 if (proj == pn_Store_M) {
5367 return be_transform_node(pred);
5369 panic("No idea how to transform proj->Store");
5372 return gen_Proj_Load(node);
5374 return gen_Proj_ASM(node);
5376 return gen_Proj_Builtin(node);
5380 return gen_Proj_DivMod(node);
5382 return gen_Proj_CopyB(node);
5384 return gen_Proj_Quot(node);
5386 return gen_Proj_be_SubSP(node);
5388 return gen_Proj_be_AddSP(node);
5390 return gen_Proj_be_Call(node);
5392 return gen_Proj_Cmp(node);
5394 return gen_Proj_Bound(node);
5396 proj = get_Proj_proj(node);
5398 case pn_Start_X_initial_exec: {
5399 ir_node *block = get_nodes_block(pred);
5400 ir_node *new_block = be_transform_node(block);
5401 dbg_info *dbgi = get_irn_dbg_info(node);
5402 /* we exchange the ProjX with a jump */
5403 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5408 case pn_Start_P_tls:
5409 return gen_Proj_tls(node);
5414 if (is_ia32_l_FloattoLL(pred)) {
5415 return gen_Proj_l_FloattoLL(node);
5417 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5421 ir_mode *mode = get_irn_mode(node);
5422 if (ia32_mode_needs_gp_reg(mode)) {
5423 ir_node *new_pred = be_transform_node(pred);
5424 ir_node *block = be_transform_node(get_nodes_block(node));
5425 ir_node *new_proj = new_r_Proj(block, new_pred,
5426 mode_Iu, get_Proj_proj(node));
5427 new_proj->node_nr = node->node_nr;
5432 return be_duplicate_node(node);
5436 * Enters all transform functions into the generic pointer
5438 static void register_transformers(void)
5440 /* first clear the generic function pointer for all ops */
5441 clear_irp_opcodes_generic_func();
5443 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5444 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5484 /* transform ops from intrinsic lowering */
5496 GEN(ia32_l_LLtoFloat);
5497 GEN(ia32_l_FloattoLL);
5503 /* we should never see these nodes */
5518 /* handle builtins */
5521 /* handle generic backend nodes */
5535 * Pre-transform all unknown and noreg nodes.
5537 static void ia32_pretransform_node(void)
5539 ia32_code_gen_t *cg = env_cg;
5541 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5542 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5543 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5544 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5545 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5546 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5548 nomem = get_irg_no_mem(current_ir_graph);
5549 noreg_GP = ia32_new_NoReg_gp(cg);
5555 * Walker, checks if all ia32 nodes producing more than one result have their
5556 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5558 static void add_missing_keep_walker(ir_node *node, void *data)
5561 unsigned found_projs = 0;
5562 const ir_edge_t *edge;
5563 ir_mode *mode = get_irn_mode(node);
5568 if (!is_ia32_irn(node))
5571 n_outs = arch_irn_get_n_outs(node);
5574 if (is_ia32_SwitchJmp(node))
5577 assert(n_outs < (int) sizeof(unsigned) * 8);
5578 foreach_out_edge(node, edge) {
5579 ir_node *proj = get_edge_src_irn(edge);
5582 /* The node could be kept */
5586 if (get_irn_mode(proj) == mode_M)
5589 pn = get_Proj_proj(proj);
5590 assert(pn < n_outs);
5591 found_projs |= 1 << pn;
5595 /* are keeps missing? */
5597 for (i = 0; i < n_outs; ++i) {
5600 const arch_register_req_t *req;
5601 const arch_register_class_t *cls;
5603 if (found_projs & (1 << i)) {
5607 req = arch_get_out_register_req(node, i);
5612 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5616 block = get_nodes_block(node);
5617 in[0] = new_r_Proj(block, node, arch_register_class_mode(cls), i);
5618 if (last_keep != NULL) {
5619 be_Keep_add_node(last_keep, cls, in[0]);
5621 last_keep = be_new_Keep(block, 1, in);
5622 if (sched_is_scheduled(node)) {
5623 sched_add_after(node, last_keep);
5630 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5633 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5635 ir_graph *irg = be_get_birg_irg(cg->birg);
5636 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5640 * Post-process all calls if we are in SSE mode.
5641 * The ABI requires that the results are in st0, copy them
5642 * to a xmm register.
5644 static void postprocess_fp_call_results(void) {
5647 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5648 ir_node *call = call_list[i];
5649 ir_type *mtp = call_types[i];
5652 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5653 ir_type *res_tp = get_method_res_type(mtp, j);
5654 ir_node *res, *new_res;
5655 const ir_edge_t *edge, *next;
5658 if (! is_atomic_type(res_tp)) {
5659 /* no floating point return */
5662 mode = get_type_mode(res_tp);
5663 if (! mode_is_float(mode)) {
5664 /* no floating point return */
5668 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5671 /* now patch the users */
5672 foreach_out_edge_safe(res, edge, next) {
5673 ir_node *succ = get_edge_src_irn(edge);
5676 if (be_is_Keep(succ))
5679 if (is_ia32_xStore(succ)) {
5680 /* an xStore can be patched into an vfst */
5681 dbg_info *db = get_irn_dbg_info(succ);
5682 ir_node *block = get_nodes_block(succ);
5683 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5684 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5685 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5686 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5687 ir_mode *mode = get_ia32_ls_mode(succ);
5689 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5690 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5691 if (is_ia32_use_frame(succ))
5692 set_ia32_use_frame(st);
5693 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5694 set_irn_pinned(st, get_irn_pinned(succ));
5695 set_ia32_op_type(st, ia32_AddrModeD);
5699 if (new_res == NULL) {
5700 dbg_info *db = get_irn_dbg_info(call);
5701 ir_node *block = get_nodes_block(call);
5702 ir_node *frame = get_irg_frame(current_ir_graph);
5703 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5704 ir_node *call_mem = new_r_Proj(block, call, mode_M, pn_ia32_Call_M);
5705 ir_node *vfst, *xld, *new_mem;
5707 /* store st(0) on stack */
5708 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5709 set_ia32_op_type(vfst, ia32_AddrModeD);
5710 set_ia32_use_frame(vfst);
5712 /* load into SSE register */
5713 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5714 set_ia32_op_type(xld, ia32_AddrModeS);
5715 set_ia32_use_frame(xld);
5717 new_res = new_r_Proj(block, xld, mode, pn_ia32_xLoad_res);
5718 new_mem = new_r_Proj(block, xld, mode_M, pn_ia32_xLoad_M);
5720 if (old_mem != NULL) {
5721 edges_reroute(old_mem, new_mem, current_ir_graph);
5725 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5732 /* do the transformation */
5733 void ia32_transform_graph(ia32_code_gen_t *cg)
5737 register_transformers();
5739 initial_fpcw = NULL;
5742 be_timer_push(T_HEIGHTS);
5743 heights = heights_new(cg->irg);
5744 be_timer_pop(T_HEIGHTS);
5745 ia32_calculate_non_address_mode_nodes(cg->birg);
5747 /* the transform phase is not safe for CSE (yet) because several nodes get
5748 * attributes set after their creation */
5749 cse_last = get_opt_cse();
5752 call_list = NEW_ARR_F(ir_node *, 0);
5753 call_types = NEW_ARR_F(ir_type *, 0);
5754 be_transform_graph(cg->birg, ia32_pretransform_node);
5756 if (ia32_cg_config.use_sse2)
5757 postprocess_fp_call_results();
5758 DEL_ARR_F(call_types);
5759 DEL_ARR_F(call_list);
5761 set_opt_cse(cse_last);
5763 ia32_free_non_address_mode_nodes();
5764 heights_free(heights);
5768 void ia32_init_transform(void)
5770 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");