2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
292 floatent = create_float_const_entity(node);
293 /* create_float_const_ent is smart and sometimes creates
295 ls_mode = get_type_mode(get_entity_type(floatent));
297 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
299 set_ia32_op_type(load, ia32_AddrModeS);
300 set_ia32_am_sc(load, floatent);
301 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
302 res = new_r_Proj(block, load, mode_vfp, pn_ia32_vfld_res);
305 #ifdef CONSTRUCT_SSE_CONST
307 #endif /* CONSTRUCT_SSE_CONST */
308 SET_IA32_ORIG_NODE(load, node);
310 be_dep_on_frame(load);
312 } else { /* non-float mode */
314 tarval *tv = get_Const_tarval(node);
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
328 be_dep_on_frame(cnst);
334 * Transforms a SymConst.
336 static ir_node *gen_SymConst(ir_node *node)
338 ir_node *old_block = get_nodes_block(node);
339 ir_node *block = be_transform_node(old_block);
340 dbg_info *dbgi = get_irn_dbg_info(node);
341 ir_mode *mode = get_irn_mode(node);
344 if (mode_is_float(mode)) {
345 if (ia32_cg_config.use_sse2)
346 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if (get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
361 SET_IA32_ORIG_NODE(cnst, node);
363 be_dep_on_frame(cnst);
368 * Create a float type for the given mode and cache it.
370 * @param mode the mode for the float type (might be integer mode for SSE2 types)
371 * @param align alignment
373 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align)
379 if (mode == mode_Iu) {
380 static ir_type *int_Iu[16] = {NULL, };
382 if (int_Iu[align] == NULL) {
383 int_Iu[align] = tp = new_type_primitive(mode);
384 /* set the specified alignment */
385 set_type_alignment_bytes(tp, align);
387 return int_Iu[align];
388 } else if (mode == mode_Lu) {
389 static ir_type *int_Lu[16] = {NULL, };
391 if (int_Lu[align] == NULL) {
392 int_Lu[align] = tp = new_type_primitive(mode);
393 /* set the specified alignment */
394 set_type_alignment_bytes(tp, align);
396 return int_Lu[align];
397 } else if (mode == mode_F) {
398 static ir_type *float_F[16] = {NULL, };
400 if (float_F[align] == NULL) {
401 float_F[align] = tp = new_type_primitive(mode);
402 /* set the specified alignment */
403 set_type_alignment_bytes(tp, align);
405 return float_F[align];
406 } else if (mode == mode_D) {
407 static ir_type *float_D[16] = {NULL, };
409 if (float_D[align] == NULL) {
410 float_D[align] = tp = new_type_primitive(mode);
411 /* set the specified alignment */
412 set_type_alignment_bytes(tp, align);
414 return float_D[align];
416 static ir_type *float_E[16] = {NULL, };
418 if (float_E[align] == NULL) {
419 float_E[align] = tp = new_type_primitive(mode);
420 /* set the specified alignment */
421 set_type_alignment_bytes(tp, align);
423 return float_E[align];
428 * Create a float[2] array type for the given atomic type.
430 * @param tp the atomic type
432 static ir_type *ia32_create_float_array(ir_type *tp)
434 ir_mode *mode = get_type_mode(tp);
435 unsigned align = get_type_alignment_bytes(tp);
440 if (mode == mode_F) {
441 static ir_type *float_F[16] = {NULL, };
443 if (float_F[align] != NULL)
444 return float_F[align];
445 arr = float_F[align] = new_type_array(1, tp);
446 } else if (mode == mode_D) {
447 static ir_type *float_D[16] = {NULL, };
449 if (float_D[align] != NULL)
450 return float_D[align];
451 arr = float_D[align] = new_type_array(1, tp);
453 static ir_type *float_E[16] = {NULL, };
455 if (float_E[align] != NULL)
456 return float_E[align];
457 arr = float_E[align] = new_type_array(1, tp);
459 set_type_alignment_bytes(arr, align);
460 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
461 set_type_state(arr, layout_fixed);
465 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
466 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
468 static const struct {
469 const char *ent_name;
470 const char *cnst_str;
473 } names [ia32_known_const_max] = {
474 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
475 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
476 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
477 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
478 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
480 static ir_entity *ent_cache[ia32_known_const_max];
482 const char *ent_name, *cnst_str;
488 ent_name = names[kct].ent_name;
489 if (! ent_cache[kct]) {
490 cnst_str = names[kct].cnst_str;
492 switch (names[kct].mode) {
493 case 0: mode = mode_Iu; break;
494 case 1: mode = mode_Lu; break;
495 default: mode = mode_F; break;
497 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
498 tp = ia32_create_float_type(mode, names[kct].align);
500 if (kct == ia32_ULLBIAS)
501 tp = ia32_create_float_array(tp);
502 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
504 set_entity_ld_ident(ent, get_entity_ident(ent));
505 set_entity_visibility(ent, visibility_local);
506 set_entity_variability(ent, variability_constant);
507 set_entity_allocation(ent, allocation_static);
509 if (kct == ia32_ULLBIAS) {
510 ir_initializer_t *initializer = create_initializer_compound(2);
512 set_initializer_compound_value(initializer, 0,
513 create_initializer_tarval(get_tarval_null(mode)));
514 set_initializer_compound_value(initializer, 1,
515 create_initializer_tarval(tv));
517 set_entity_initializer(ent, initializer);
519 set_entity_initializer(ent, create_initializer_tarval(tv));
522 /* cache the entry */
523 ent_cache[kct] = ent;
526 return ent_cache[kct];
530 * return true if the node is a Proj(Load) and could be used in source address
531 * mode for another node. Will return only true if the @p other node is not
532 * dependent on the memory of the Load (for binary operations use the other
533 * input here, for unary operations use NULL).
535 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
536 ir_node *other, ir_node *other2, match_flags_t flags)
541 /* float constants are always available */
542 if (is_Const(node)) {
543 ir_mode *mode = get_irn_mode(node);
544 if (mode_is_float(mode)) {
545 if (ia32_cg_config.use_sse2) {
546 if (is_simple_sse_Const(node))
549 if (is_simple_x87_Const(node))
552 if (get_irn_n_edges(node) > 1)
560 load = get_Proj_pred(node);
561 pn = get_Proj_proj(node);
562 if (!is_Load(load) || pn != pn_Load_res)
564 if (get_nodes_block(load) != block)
566 /* we only use address mode if we're the only user of the load */
567 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
569 /* in some edge cases with address mode we might reach the load normally
570 * and through some AM sequence, if it is already materialized then we
571 * can't create an AM node from it */
572 if (be_is_transformed(node))
575 /* don't do AM if other node inputs depend on the load (via mem-proj) */
576 if (other != NULL && prevents_AM(block, load, other))
579 if (other2 != NULL && prevents_AM(block, load, other2))
585 typedef struct ia32_address_mode_t ia32_address_mode_t;
586 struct ia32_address_mode_t {
591 ia32_op_type_t op_type;
595 unsigned commutative : 1;
596 unsigned ins_permuted : 1;
599 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
601 /* construct load address */
602 memset(addr, 0, sizeof(addr[0]));
603 ia32_create_address_mode(addr, ptr, 0);
605 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
606 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
607 addr->mem = be_transform_node(mem);
610 static void build_address(ia32_address_mode_t *am, ir_node *node,
611 ia32_create_am_flags_t flags)
613 ia32_address_t *addr = &am->addr;
619 if (is_Const(node)) {
620 ir_entity *entity = create_float_const_entity(node);
621 addr->base = noreg_GP;
622 addr->index = noreg_GP;
624 addr->symconst_ent = entity;
626 am->ls_mode = get_type_mode(get_entity_type(entity));
627 am->pinned = op_pin_state_floats;
631 load = get_Proj_pred(node);
632 ptr = get_Load_ptr(load);
633 mem = get_Load_mem(load);
634 new_mem = be_transform_node(mem);
635 am->pinned = get_irn_pinned(load);
636 am->ls_mode = get_Load_mode(load);
637 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
640 /* construct load address */
641 ia32_create_address_mode(addr, ptr, flags);
643 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
644 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
648 static void set_address(ir_node *node, const ia32_address_t *addr)
650 set_ia32_am_scale(node, addr->scale);
651 set_ia32_am_sc(node, addr->symconst_ent);
652 set_ia32_am_offs_int(node, addr->offset);
653 if (addr->symconst_sign)
654 set_ia32_am_sc_sign(node);
656 set_ia32_use_frame(node);
657 set_ia32_frame_ent(node, addr->frame_entity);
661 * Apply attributes of a given address mode to a node.
663 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
665 set_address(node, &am->addr);
667 set_ia32_op_type(node, am->op_type);
668 set_ia32_ls_mode(node, am->ls_mode);
669 if (am->pinned == op_pin_state_pinned) {
670 /* beware: some nodes are already pinned and did not allow to change the state */
671 if (get_irn_pinned(node) != op_pin_state_pinned)
672 set_irn_pinned(node, op_pin_state_pinned);
675 set_ia32_commutative(node);
679 * Check, if a given node is a Down-Conv, ie. a integer Conv
680 * from a mode with a mode with more bits to a mode with lesser bits.
681 * Moreover, we return only true if the node has not more than 1 user.
683 * @param node the node
684 * @return non-zero if node is a Down-Conv
686 static int is_downconv(const ir_node *node)
694 /* we only want to skip the conv when we're the only user
695 * (not optimal but for now...)
697 if (get_irn_n_edges(node) > 1)
700 src_mode = get_irn_mode(get_Conv_op(node));
701 dest_mode = get_irn_mode(node);
703 ia32_mode_needs_gp_reg(src_mode) &&
704 ia32_mode_needs_gp_reg(dest_mode) &&
705 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
708 /* Skip all Down-Conv's on a given node and return the resulting node. */
709 ir_node *ia32_skip_downconv(ir_node *node)
711 while (is_downconv(node))
712 node = get_Conv_op(node);
717 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
719 ir_mode *mode = get_irn_mode(node);
724 if (mode_is_signed(mode)) {
729 block = get_nodes_block(node);
730 dbgi = get_irn_dbg_info(node);
732 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
736 * matches operands of a node into ia32 addressing/operand modes. This covers
737 * usage of source address mode, immediates, operations with non 32-bit modes,
739 * The resulting data is filled into the @p am struct. block is the block
740 * of the node whose arguments are matched. op1, op2 are the first and second
741 * input that are matched (op1 may be NULL). other_op is another unrelated
742 * input that is not matched! but which is needed sometimes to check if AM
743 * for op1/op2 is legal.
744 * @p flags describes the supported modes of the operation in detail.
746 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
747 ir_node *op1, ir_node *op2, ir_node *other_op,
750 ia32_address_t *addr = &am->addr;
751 ir_mode *mode = get_irn_mode(op2);
752 int mode_bits = get_mode_size_bits(mode);
753 ir_node *new_op1, *new_op2;
755 unsigned commutative;
756 int use_am_and_immediates;
759 memset(am, 0, sizeof(am[0]));
761 commutative = (flags & match_commutative) != 0;
762 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
763 use_am = (flags & match_am) != 0;
764 use_immediate = (flags & match_immediate) != 0;
765 assert(!use_am_and_immediates || use_immediate);
768 assert(!commutative || op1 != NULL);
769 assert(use_am || !(flags & match_8bit_am));
770 assert(use_am || !(flags & match_16bit_am));
772 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
773 (mode_bits == 16 && !(flags & match_16bit_am))) {
777 /* we can simply skip downconvs for mode neutral nodes: the upper bits
778 * can be random for these operations */
779 if (flags & match_mode_neutral) {
780 op2 = ia32_skip_downconv(op2);
782 op1 = ia32_skip_downconv(op1);
786 /* match immediates. firm nodes are normalized: constants are always on the
789 if (!(flags & match_try_am) && use_immediate) {
790 new_op2 = try_create_Immediate(op2, 0);
793 if (new_op2 == NULL &&
794 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
795 build_address(am, op2, 0);
796 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
797 if (mode_is_float(mode)) {
798 new_op2 = ia32_new_NoReg_vfp(env_cg);
802 am->op_type = ia32_AddrModeS;
803 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
805 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
807 build_address(am, op1, 0);
809 if (mode_is_float(mode)) {
810 noreg = ia32_new_NoReg_vfp(env_cg);
815 if (new_op2 != NULL) {
818 new_op1 = be_transform_node(op2);
820 am->ins_permuted = 1;
822 am->op_type = ia32_AddrModeS;
825 am->op_type = ia32_Normal;
827 if (flags & match_try_am) {
833 mode = get_irn_mode(op2);
834 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
835 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
837 new_op2 = create_upconv(op2, NULL);
838 am->ls_mode = mode_Iu;
840 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
842 new_op2 = be_transform_node(op2);
843 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
846 if (addr->base == NULL)
847 addr->base = noreg_GP;
848 if (addr->index == NULL)
849 addr->index = noreg_GP;
850 if (addr->mem == NULL)
853 am->new_op1 = new_op1;
854 am->new_op2 = new_op2;
855 am->commutative = commutative;
859 * "Fixes" a node that uses address mode by turning it into mode_T
860 * and returning a pn_ia32_res Proj.
862 * @param node the node
863 * @param am its address mode
865 * @return a Proj(pn_ia32_res) if a memory address mode is used,
868 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
873 if (am->mem_proj == NULL)
876 /* we have to create a mode_T so the old MemProj can attach to us */
877 mode = get_irn_mode(node);
878 load = get_Proj_pred(am->mem_proj);
880 be_set_transformed_node(load, node);
882 if (mode != mode_T) {
883 set_irn_mode(node, mode_T);
884 return new_rd_Proj(NULL, get_nodes_block(node), node, mode, pn_ia32_res);
891 * Construct a standard binary operation, set AM and immediate if required.
893 * @param node The original node for which the binop is created
894 * @param op1 The first operand
895 * @param op2 The second operand
896 * @param func The node constructor function
897 * @return The constructed ia32 node.
899 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
900 construct_binop_func *func, match_flags_t flags)
903 ir_node *block, *new_block, *new_node;
904 ia32_address_mode_t am;
905 ia32_address_t *addr = &am.addr;
907 block = get_nodes_block(node);
908 match_arguments(&am, block, op1, op2, NULL, flags);
910 dbgi = get_irn_dbg_info(node);
911 new_block = be_transform_node(block);
912 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
913 am.new_op1, am.new_op2);
914 set_am_attributes(new_node, &am);
915 /* we can't use source address mode anymore when using immediates */
916 if (!(flags & match_am_and_immediates) &&
917 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
918 set_ia32_am_support(new_node, ia32_am_none);
919 SET_IA32_ORIG_NODE(new_node, node);
921 new_node = fix_mem_proj(new_node, &am);
927 * Generic names for the inputs of an ia32 binary op.
930 n_ia32_l_binop_left, /**< ia32 left input */
931 n_ia32_l_binop_right, /**< ia32 right input */
932 n_ia32_l_binop_eflags /**< ia32 eflags input */
934 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
935 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
936 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
937 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
938 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
939 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
942 * Construct a binary operation which also consumes the eflags.
944 * @param node The node to transform
945 * @param func The node constructor function
946 * @param flags The match flags
947 * @return The constructor ia32 node
949 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
952 ir_node *src_block = get_nodes_block(node);
953 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
954 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
955 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
957 ir_node *block, *new_node, *new_eflags;
958 ia32_address_mode_t am;
959 ia32_address_t *addr = &am.addr;
961 match_arguments(&am, src_block, op1, op2, eflags, flags);
963 dbgi = get_irn_dbg_info(node);
964 block = be_transform_node(src_block);
965 new_eflags = be_transform_node(eflags);
966 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
967 am.new_op1, am.new_op2, new_eflags);
968 set_am_attributes(new_node, &am);
969 /* we can't use source address mode anymore when using immediates */
970 if (!(flags & match_am_and_immediates) &&
971 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
972 set_ia32_am_support(new_node, ia32_am_none);
973 SET_IA32_ORIG_NODE(new_node, node);
975 new_node = fix_mem_proj(new_node, &am);
980 static ir_node *get_fpcw(void)
983 if (initial_fpcw != NULL)
986 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
987 &ia32_fp_cw_regs[REG_FPCW]);
988 initial_fpcw = be_transform_node(fpcw);
994 * Construct a standard binary operation, set AM and immediate if required.
996 * @param op1 The first operand
997 * @param op2 The second operand
998 * @param func The node constructor function
999 * @return The constructed ia32 node.
1001 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1002 construct_binop_float_func *func)
1004 ir_mode *mode = get_irn_mode(node);
1006 ir_node *block, *new_block, *new_node;
1007 ia32_address_mode_t am;
1008 ia32_address_t *addr = &am.addr;
1009 ia32_x87_attr_t *attr;
1010 /* All operations are considered commutative, because there are reverse
1012 match_flags_t flags = match_commutative;
1014 /* happens for div nodes... */
1016 mode = get_divop_resmod(node);
1018 /* cannot use address mode with long double on x87 */
1019 if (get_mode_size_bits(mode) <= 64)
1022 block = get_nodes_block(node);
1023 match_arguments(&am, block, op1, op2, NULL, flags);
1025 dbgi = get_irn_dbg_info(node);
1026 new_block = be_transform_node(block);
1027 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1028 am.new_op1, am.new_op2, get_fpcw());
1029 set_am_attributes(new_node, &am);
1031 attr = get_ia32_x87_attr(new_node);
1032 attr->attr.data.ins_permuted = am.ins_permuted;
1034 SET_IA32_ORIG_NODE(new_node, node);
1036 new_node = fix_mem_proj(new_node, &am);
1042 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1044 * @param op1 The first operand
1045 * @param op2 The second operand
1046 * @param func The node constructor function
1047 * @return The constructed ia32 node.
1049 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1050 construct_shift_func *func,
1051 match_flags_t flags)
1054 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1056 assert(! mode_is_float(get_irn_mode(node)));
1057 assert(flags & match_immediate);
1058 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1060 if (flags & match_mode_neutral) {
1061 op1 = ia32_skip_downconv(op1);
1062 new_op1 = be_transform_node(op1);
1063 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1064 new_op1 = create_upconv(op1, node);
1066 new_op1 = be_transform_node(op1);
1069 /* the shift amount can be any mode that is bigger than 5 bits, since all
1070 * other bits are ignored anyway */
1071 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1072 ir_node *const op = get_Conv_op(op2);
1073 if (mode_is_float(get_irn_mode(op)))
1076 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1078 new_op2 = create_immediate_or_transform(op2, 0);
1080 dbgi = get_irn_dbg_info(node);
1081 block = get_nodes_block(node);
1082 new_block = be_transform_node(block);
1083 new_node = func(dbgi, new_block, new_op1, new_op2);
1084 SET_IA32_ORIG_NODE(new_node, node);
1086 /* lowered shift instruction may have a dependency operand, handle it here */
1087 if (get_irn_arity(node) == 3) {
1088 /* we have a dependency */
1089 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1090 add_irn_dep(new_node, new_dep);
1098 * Construct a standard unary operation, set AM and immediate if required.
1100 * @param op The operand
1101 * @param func The node constructor function
1102 * @return The constructed ia32 node.
1104 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1105 match_flags_t flags)
1108 ir_node *block, *new_block, *new_op, *new_node;
1110 assert(flags == 0 || flags == match_mode_neutral);
1111 if (flags & match_mode_neutral) {
1112 op = ia32_skip_downconv(op);
1115 new_op = be_transform_node(op);
1116 dbgi = get_irn_dbg_info(node);
1117 block = get_nodes_block(node);
1118 new_block = be_transform_node(block);
1119 new_node = func(dbgi, new_block, new_op);
1121 SET_IA32_ORIG_NODE(new_node, node);
1126 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1127 ia32_address_t *addr)
1129 ir_node *base, *index, *res;
1135 base = be_transform_node(base);
1138 index = addr->index;
1139 if (index == NULL) {
1142 index = be_transform_node(index);
1145 res = new_bd_ia32_Lea(dbgi, block, base, index);
1146 set_address(res, addr);
1152 * Returns non-zero if a given address mode has a symbolic or
1153 * numerical offset != 0.
1155 static int am_has_immediates(const ia32_address_t *addr)
1157 return addr->offset != 0 || addr->symconst_ent != NULL
1158 || addr->frame_entity || addr->use_frame;
1162 * Creates an ia32 Add.
1164 * @return the created ia32 Add node
1166 static ir_node *gen_Add(ir_node *node)
1168 ir_mode *mode = get_irn_mode(node);
1169 ir_node *op1 = get_Add_left(node);
1170 ir_node *op2 = get_Add_right(node);
1172 ir_node *block, *new_block, *new_node, *add_immediate_op;
1173 ia32_address_t addr;
1174 ia32_address_mode_t am;
1176 if (mode_is_float(mode)) {
1177 if (ia32_cg_config.use_sse2)
1178 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1179 match_commutative | match_am);
1181 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1184 ia32_mark_non_am(node);
1186 op2 = ia32_skip_downconv(op2);
1187 op1 = ia32_skip_downconv(op1);
1191 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1192 * 1. Add with immediate -> Lea
1193 * 2. Add with possible source address mode -> Add
1194 * 3. Otherwise -> Lea
1196 memset(&addr, 0, sizeof(addr));
1197 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1198 add_immediate_op = NULL;
1200 dbgi = get_irn_dbg_info(node);
1201 block = get_nodes_block(node);
1202 new_block = be_transform_node(block);
1205 if (addr.base == NULL && addr.index == NULL) {
1206 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1207 addr.symconst_sign, 0, addr.offset);
1208 be_dep_on_frame(new_node);
1209 SET_IA32_ORIG_NODE(new_node, node);
1212 /* add with immediate? */
1213 if (addr.index == NULL) {
1214 add_immediate_op = addr.base;
1215 } else if (addr.base == NULL && addr.scale == 0) {
1216 add_immediate_op = addr.index;
1219 if (add_immediate_op != NULL) {
1220 if (!am_has_immediates(&addr)) {
1221 #ifdef DEBUG_libfirm
1222 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1225 return be_transform_node(add_immediate_op);
1228 new_node = create_lea_from_address(dbgi, new_block, &addr);
1229 SET_IA32_ORIG_NODE(new_node, node);
1233 /* test if we can use source address mode */
1234 match_arguments(&am, block, op1, op2, NULL, match_commutative
1235 | match_mode_neutral | match_am | match_immediate | match_try_am);
1237 /* construct an Add with source address mode */
1238 if (am.op_type == ia32_AddrModeS) {
1239 ia32_address_t *am_addr = &am.addr;
1240 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1241 am_addr->index, am_addr->mem, am.new_op1,
1243 set_am_attributes(new_node, &am);
1244 SET_IA32_ORIG_NODE(new_node, node);
1246 new_node = fix_mem_proj(new_node, &am);
1251 /* otherwise construct a lea */
1252 new_node = create_lea_from_address(dbgi, new_block, &addr);
1253 SET_IA32_ORIG_NODE(new_node, node);
1258 * Creates an ia32 Mul.
1260 * @return the created ia32 Mul node
1262 static ir_node *gen_Mul(ir_node *node)
1264 ir_node *op1 = get_Mul_left(node);
1265 ir_node *op2 = get_Mul_right(node);
1266 ir_mode *mode = get_irn_mode(node);
1268 if (mode_is_float(mode)) {
1269 if (ia32_cg_config.use_sse2)
1270 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1271 match_commutative | match_am);
1273 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1275 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1276 match_commutative | match_am | match_mode_neutral |
1277 match_immediate | match_am_and_immediates);
1281 * Creates an ia32 Mulh.
1282 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1283 * this result while Mul returns the lower 32 bit.
1285 * @return the created ia32 Mulh node
1287 static ir_node *gen_Mulh(ir_node *node)
1289 ir_node *block = get_nodes_block(node);
1290 ir_node *new_block = be_transform_node(block);
1291 dbg_info *dbgi = get_irn_dbg_info(node);
1292 ir_node *op1 = get_Mulh_left(node);
1293 ir_node *op2 = get_Mulh_right(node);
1294 ir_mode *mode = get_irn_mode(node);
1296 ir_node *proj_res_high;
1298 if (get_mode_size_bits(mode) != 32) {
1299 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1302 if (mode_is_signed(mode)) {
1303 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1304 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1306 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1307 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_Mul_res_high);
1309 return proj_res_high;
1313 * Creates an ia32 And.
1315 * @return The created ia32 And node
1317 static ir_node *gen_And(ir_node *node)
1319 ir_node *op1 = get_And_left(node);
1320 ir_node *op2 = get_And_right(node);
1321 assert(! mode_is_float(get_irn_mode(node)));
1323 /* is it a zero extension? */
1324 if (is_Const(op2)) {
1325 tarval *tv = get_Const_tarval(op2);
1326 long v = get_tarval_long(tv);
1328 if (v == 0xFF || v == 0xFFFF) {
1329 dbg_info *dbgi = get_irn_dbg_info(node);
1330 ir_node *block = get_nodes_block(node);
1337 assert(v == 0xFFFF);
1340 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1345 return gen_binop(node, op1, op2, new_bd_ia32_And,
1346 match_commutative | match_mode_neutral | match_am | match_immediate);
1352 * Creates an ia32 Or.
1354 * @return The created ia32 Or node
1356 static ir_node *gen_Or(ir_node *node)
1358 ir_node *op1 = get_Or_left(node);
1359 ir_node *op2 = get_Or_right(node);
1361 assert (! mode_is_float(get_irn_mode(node)));
1362 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1363 | match_mode_neutral | match_am | match_immediate);
1369 * Creates an ia32 Eor.
1371 * @return The created ia32 Eor node
1373 static ir_node *gen_Eor(ir_node *node)
1375 ir_node *op1 = get_Eor_left(node);
1376 ir_node *op2 = get_Eor_right(node);
1378 assert(! mode_is_float(get_irn_mode(node)));
1379 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1380 | match_mode_neutral | match_am | match_immediate);
1385 * Creates an ia32 Sub.
1387 * @return The created ia32 Sub node
1389 static ir_node *gen_Sub(ir_node *node)
1391 ir_node *op1 = get_Sub_left(node);
1392 ir_node *op2 = get_Sub_right(node);
1393 ir_mode *mode = get_irn_mode(node);
1395 if (mode_is_float(mode)) {
1396 if (ia32_cg_config.use_sse2)
1397 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1399 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1402 if (is_Const(op2)) {
1403 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1407 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1408 | match_am | match_immediate);
1411 static ir_node *transform_AM_mem(ir_node *const block,
1412 ir_node *const src_val,
1413 ir_node *const src_mem,
1414 ir_node *const am_mem)
1416 if (is_NoMem(am_mem)) {
1417 return be_transform_node(src_mem);
1418 } else if (is_Proj(src_val) &&
1420 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1421 /* avoid memory loop */
1423 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1424 ir_node *const ptr_pred = get_Proj_pred(src_val);
1425 int const arity = get_Sync_n_preds(src_mem);
1430 NEW_ARR_A(ir_node*, ins, arity + 1);
1432 /* NOTE: This sometimes produces dead-code because the old sync in
1433 * src_mem might not be used anymore, we should detect this case
1434 * and kill the sync... */
1435 for (i = arity - 1; i >= 0; --i) {
1436 ir_node *const pred = get_Sync_pred(src_mem, i);
1438 /* avoid memory loop */
1439 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1442 ins[n++] = be_transform_node(pred);
1447 return new_r_Sync(block, n, ins);
1451 ins[0] = be_transform_node(src_mem);
1453 return new_r_Sync(block, 2, ins);
1458 * Create a 32bit to 64bit signed extension.
1460 * @param dbgi debug info
1461 * @param block the block where node nodes should be placed
1462 * @param val the value to extend
1463 * @param orig the original node
1465 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1466 ir_node *val, const ir_node *orig)
1471 if (ia32_cg_config.use_short_sex_eax) {
1472 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1473 be_dep_on_frame(pval);
1474 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1476 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1477 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1479 SET_IA32_ORIG_NODE(res, orig);
1484 * Generates an ia32 DivMod with additional infrastructure for the
1485 * register allocator if needed.
1487 static ir_node *create_Div(ir_node *node)
1489 dbg_info *dbgi = get_irn_dbg_info(node);
1490 ir_node *block = get_nodes_block(node);
1491 ir_node *new_block = be_transform_node(block);
1498 ir_node *sign_extension;
1499 ia32_address_mode_t am;
1500 ia32_address_t *addr = &am.addr;
1502 /* the upper bits have random contents for smaller modes */
1503 switch (get_irn_opcode(node)) {
1505 op1 = get_Div_left(node);
1506 op2 = get_Div_right(node);
1507 mem = get_Div_mem(node);
1508 mode = get_Div_resmode(node);
1511 op1 = get_Mod_left(node);
1512 op2 = get_Mod_right(node);
1513 mem = get_Mod_mem(node);
1514 mode = get_Mod_resmode(node);
1517 op1 = get_DivMod_left(node);
1518 op2 = get_DivMod_right(node);
1519 mem = get_DivMod_mem(node);
1520 mode = get_DivMod_resmode(node);
1523 panic("invalid divmod node %+F", node);
1526 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1528 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1529 is the memory of the consumed address. We can have only the second op as address
1530 in Div nodes, so check only op2. */
1531 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1533 if (mode_is_signed(mode)) {
1534 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1535 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1536 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1538 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1539 be_dep_on_frame(sign_extension);
1541 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1542 addr->index, new_mem, am.new_op2,
1543 am.new_op1, sign_extension);
1546 set_irn_pinned(new_node, get_irn_pinned(node));
1548 set_am_attributes(new_node, &am);
1549 SET_IA32_ORIG_NODE(new_node, node);
1551 new_node = fix_mem_proj(new_node, &am);
1557 * Generates an ia32 Mod.
1559 static ir_node *gen_Mod(ir_node *node)
1561 return create_Div(node);
1565 * Generates an ia32 Div.
1567 static ir_node *gen_Div(ir_node *node)
1569 return create_Div(node);
1573 * Generates an ia32 DivMod.
1575 static ir_node *gen_DivMod(ir_node *node)
1577 return create_Div(node);
1583 * Creates an ia32 floating Div.
1585 * @return The created ia32 xDiv node
1587 static ir_node *gen_Quot(ir_node *node)
1589 ir_node *op1 = get_Quot_left(node);
1590 ir_node *op2 = get_Quot_right(node);
1592 if (ia32_cg_config.use_sse2) {
1593 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1595 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1601 * Creates an ia32 Shl.
1603 * @return The created ia32 Shl node
1605 static ir_node *gen_Shl(ir_node *node)
1607 ir_node *left = get_Shl_left(node);
1608 ir_node *right = get_Shl_right(node);
1610 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1611 match_mode_neutral | match_immediate);
1615 * Creates an ia32 Shr.
1617 * @return The created ia32 Shr node
1619 static ir_node *gen_Shr(ir_node *node)
1621 ir_node *left = get_Shr_left(node);
1622 ir_node *right = get_Shr_right(node);
1624 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1630 * Creates an ia32 Sar.
1632 * @return The created ia32 Shrs node
1634 static ir_node *gen_Shrs(ir_node *node)
1636 ir_node *left = get_Shrs_left(node);
1637 ir_node *right = get_Shrs_right(node);
1639 if (is_Const(right)) {
1640 tarval *tv = get_Const_tarval(right);
1641 long val = get_tarval_long(tv);
1643 /* this is a sign extension */
1644 dbg_info *dbgi = get_irn_dbg_info(node);
1645 ir_node *block = be_transform_node(get_nodes_block(node));
1646 ir_node *new_op = be_transform_node(left);
1648 return create_sex_32_64(dbgi, block, new_op, node);
1652 /* 8 or 16 bit sign extension? */
1653 if (is_Const(right) && is_Shl(left)) {
1654 ir_node *shl_left = get_Shl_left(left);
1655 ir_node *shl_right = get_Shl_right(left);
1656 if (is_Const(shl_right)) {
1657 tarval *tv1 = get_Const_tarval(right);
1658 tarval *tv2 = get_Const_tarval(shl_right);
1659 if (tv1 == tv2 && tarval_is_long(tv1)) {
1660 long val = get_tarval_long(tv1);
1661 if (val == 16 || val == 24) {
1662 dbg_info *dbgi = get_irn_dbg_info(node);
1663 ir_node *block = get_nodes_block(node);
1673 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1682 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1688 * Creates an ia32 Rol.
1690 * @param op1 The first operator
1691 * @param op2 The second operator
1692 * @return The created ia32 RotL node
1694 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1696 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1702 * Creates an ia32 Ror.
1703 * NOTE: There is no RotR with immediate because this would always be a RotL
1704 * "imm-mode_size_bits" which can be pre-calculated.
1706 * @param op1 The first operator
1707 * @param op2 The second operator
1708 * @return The created ia32 RotR node
1710 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1712 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1718 * Creates an ia32 RotR or RotL (depending on the found pattern).
1720 * @return The created ia32 RotL or RotR node
1722 static ir_node *gen_Rotl(ir_node *node)
1724 ir_node *rotate = NULL;
1725 ir_node *op1 = get_Rotl_left(node);
1726 ir_node *op2 = get_Rotl_right(node);
1728 /* Firm has only RotL, so we are looking for a right (op2)
1729 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1730 that means we can create a RotR instead of an Add and a RotL */
1734 ir_node *left = get_Add_left(add);
1735 ir_node *right = get_Add_right(add);
1736 if (is_Const(right)) {
1737 tarval *tv = get_Const_tarval(right);
1738 ir_mode *mode = get_irn_mode(node);
1739 long bits = get_mode_size_bits(mode);
1741 if (is_Minus(left) &&
1742 tarval_is_long(tv) &&
1743 get_tarval_long(tv) == bits &&
1746 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1747 rotate = gen_Ror(node, op1, get_Minus_op(left));
1752 if (rotate == NULL) {
1753 rotate = gen_Rol(node, op1, op2);
1762 * Transforms a Minus node.
1764 * @return The created ia32 Minus node
1766 static ir_node *gen_Minus(ir_node *node)
1768 ir_node *op = get_Minus_op(node);
1769 ir_node *block = be_transform_node(get_nodes_block(node));
1770 dbg_info *dbgi = get_irn_dbg_info(node);
1771 ir_mode *mode = get_irn_mode(node);
1776 if (mode_is_float(mode)) {
1777 ir_node *new_op = be_transform_node(op);
1778 if (ia32_cg_config.use_sse2) {
1779 /* TODO: non-optimal... if we have many xXors, then we should
1780 * rather create a load for the const and use that instead of
1781 * several AM nodes... */
1782 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1784 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1785 nomem, new_op, noreg_xmm);
1787 size = get_mode_size_bits(mode);
1788 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1790 set_ia32_am_sc(new_node, ent);
1791 set_ia32_op_type(new_node, ia32_AddrModeS);
1792 set_ia32_ls_mode(new_node, mode);
1794 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1797 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1800 SET_IA32_ORIG_NODE(new_node, node);
1806 * Transforms a Not node.
1808 * @return The created ia32 Not node
1810 static ir_node *gen_Not(ir_node *node)
1812 ir_node *op = get_Not_op(node);
1814 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1815 assert (! mode_is_float(get_irn_mode(node)));
1817 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1823 * Transforms an Abs node.
1825 * @return The created ia32 Abs node
1827 static ir_node *gen_Abs(ir_node *node)
1829 ir_node *block = get_nodes_block(node);
1830 ir_node *new_block = be_transform_node(block);
1831 ir_node *op = get_Abs_op(node);
1832 dbg_info *dbgi = get_irn_dbg_info(node);
1833 ir_mode *mode = get_irn_mode(node);
1839 if (mode_is_float(mode)) {
1840 new_op = be_transform_node(op);
1842 if (ia32_cg_config.use_sse2) {
1843 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1844 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1845 nomem, new_op, noreg_fp);
1847 size = get_mode_size_bits(mode);
1848 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1850 set_ia32_am_sc(new_node, ent);
1852 SET_IA32_ORIG_NODE(new_node, node);
1854 set_ia32_op_type(new_node, ia32_AddrModeS);
1855 set_ia32_ls_mode(new_node, mode);
1857 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1858 SET_IA32_ORIG_NODE(new_node, node);
1861 ir_node *xor, *sign_extension;
1863 if (get_mode_size_bits(mode) == 32) {
1864 new_op = be_transform_node(op);
1866 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1869 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1871 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1872 nomem, new_op, sign_extension);
1873 SET_IA32_ORIG_NODE(xor, node);
1875 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1876 nomem, xor, sign_extension);
1877 SET_IA32_ORIG_NODE(new_node, node);
1884 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1886 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1888 dbg_info *dbgi = get_irn_dbg_info(cmp);
1889 ir_node *block = get_nodes_block(cmp);
1890 ir_node *new_block = be_transform_node(block);
1891 ir_node *op1 = be_transform_node(x);
1892 ir_node *op2 = be_transform_node(n);
1894 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1898 * Transform a node returning a "flag" result.
1900 * @param node the node to transform
1901 * @param pnc_out the compare mode to use
1903 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1910 /* we have a Cmp as input */
1911 if (is_Proj(node)) {
1912 ir_node *pred = get_Proj_pred(node);
1914 pn_Cmp pnc = get_Proj_proj(node);
1915 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1916 ir_node *l = get_Cmp_left(pred);
1917 ir_node *r = get_Cmp_right(pred);
1919 ir_node *la = get_And_left(l);
1920 ir_node *ra = get_And_right(l);
1922 ir_node *c = get_Shl_left(la);
1923 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1924 /* (1 << n) & ra) */
1925 ir_node *n = get_Shl_right(la);
1926 flags = gen_bt(pred, ra, n);
1927 /* we must generate a Jc/Jnc jump */
1928 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1931 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1936 ir_node *c = get_Shl_left(ra);
1937 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1938 /* la & (1 << n)) */
1939 ir_node *n = get_Shl_right(ra);
1940 flags = gen_bt(pred, la, n);
1941 /* we must generate a Jc/Jnc jump */
1942 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1945 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1951 flags = be_transform_node(pred);
1957 /* a mode_b value, we have to compare it against 0 */
1958 dbgi = get_irn_dbg_info(node);
1959 new_block = be_transform_node(get_nodes_block(node));
1960 new_op = be_transform_node(node);
1961 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1962 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1963 *pnc_out = pn_Cmp_Lg;
1968 * Transforms a Load.
1970 * @return the created ia32 Load node
1972 static ir_node *gen_Load(ir_node *node)
1974 ir_node *old_block = get_nodes_block(node);
1975 ir_node *block = be_transform_node(old_block);
1976 ir_node *ptr = get_Load_ptr(node);
1977 ir_node *mem = get_Load_mem(node);
1978 ir_node *new_mem = be_transform_node(mem);
1981 dbg_info *dbgi = get_irn_dbg_info(node);
1982 ir_mode *mode = get_Load_mode(node);
1985 ia32_address_t addr;
1987 /* construct load address */
1988 memset(&addr, 0, sizeof(addr));
1989 ia32_create_address_mode(&addr, ptr, 0);
1996 base = be_transform_node(base);
1999 if (index == NULL) {
2002 index = be_transform_node(index);
2005 if (mode_is_float(mode)) {
2006 if (ia32_cg_config.use_sse2) {
2007 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2009 res_mode = mode_xmm;
2011 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2013 res_mode = mode_vfp;
2016 assert(mode != mode_b);
2018 /* create a conv node with address mode for smaller modes */
2019 if (get_mode_size_bits(mode) < 32) {
2020 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2021 new_mem, noreg_GP, mode);
2023 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2028 set_irn_pinned(new_node, get_irn_pinned(node));
2029 set_ia32_op_type(new_node, ia32_AddrModeS);
2030 set_ia32_ls_mode(new_node, mode);
2031 set_address(new_node, &addr);
2033 if (get_irn_pinned(node) == op_pin_state_floats) {
2034 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2035 && pn_ia32_vfld_res == pn_ia32_Load_res
2036 && pn_ia32_Load_res == pn_ia32_res);
2037 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2040 SET_IA32_ORIG_NODE(new_node, node);
2042 be_dep_on_frame(new_node);
2046 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2047 ir_node *ptr, ir_node *other)
2054 /* we only use address mode if we're the only user of the load */
2055 if (get_irn_n_edges(node) > 1)
2058 load = get_Proj_pred(node);
2061 if (get_nodes_block(load) != block)
2064 /* store should have the same pointer as the load */
2065 if (get_Load_ptr(load) != ptr)
2068 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2069 if (other != NULL &&
2070 get_nodes_block(other) == block &&
2071 heights_reachable_in_block(heights, other, load)) {
2075 if (prevents_AM(block, load, mem))
2077 /* Store should be attached to the load via mem */
2078 assert(heights_reachable_in_block(heights, mem, load));
2083 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2084 ir_node *mem, ir_node *ptr, ir_mode *mode,
2085 construct_binop_dest_func *func,
2086 construct_binop_dest_func *func8bit,
2087 match_flags_t flags)
2089 ir_node *src_block = get_nodes_block(node);
2097 ia32_address_mode_t am;
2098 ia32_address_t *addr = &am.addr;
2099 memset(&am, 0, sizeof(am));
2101 assert(flags & match_immediate); /* there is no destam node without... */
2102 commutative = (flags & match_commutative) != 0;
2104 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2105 build_address(&am, op1, ia32_create_am_double_use);
2106 new_op = create_immediate_or_transform(op2, 0);
2107 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2108 build_address(&am, op2, ia32_create_am_double_use);
2109 new_op = create_immediate_or_transform(op1, 0);
2114 if (addr->base == NULL)
2115 addr->base = noreg_GP;
2116 if (addr->index == NULL)
2117 addr->index = noreg_GP;
2118 if (addr->mem == NULL)
2121 dbgi = get_irn_dbg_info(node);
2122 block = be_transform_node(src_block);
2123 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2125 if (get_mode_size_bits(mode) == 8) {
2126 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2128 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2130 set_address(new_node, addr);
2131 set_ia32_op_type(new_node, ia32_AddrModeD);
2132 set_ia32_ls_mode(new_node, mode);
2133 SET_IA32_ORIG_NODE(new_node, node);
2135 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2136 mem_proj = be_transform_node(am.mem_proj);
2137 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2142 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2143 ir_node *ptr, ir_mode *mode,
2144 construct_unop_dest_func *func)
2146 ir_node *src_block = get_nodes_block(node);
2152 ia32_address_mode_t am;
2153 ia32_address_t *addr = &am.addr;
2155 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2158 memset(&am, 0, sizeof(am));
2159 build_address(&am, op, ia32_create_am_double_use);
2161 dbgi = get_irn_dbg_info(node);
2162 block = be_transform_node(src_block);
2163 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2164 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2165 set_address(new_node, addr);
2166 set_ia32_op_type(new_node, ia32_AddrModeD);
2167 set_ia32_ls_mode(new_node, mode);
2168 SET_IA32_ORIG_NODE(new_node, node);
2170 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2171 mem_proj = be_transform_node(am.mem_proj);
2172 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2177 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2179 ir_mode *mode = get_irn_mode(node);
2180 ir_node *mux_true = get_Mux_true(node);
2181 ir_node *mux_false = get_Mux_false(node);
2191 ia32_address_t addr;
2193 if (get_mode_size_bits(mode) != 8)
2196 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2198 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2204 build_address_ptr(&addr, ptr, mem);
2206 dbgi = get_irn_dbg_info(node);
2207 block = get_nodes_block(node);
2208 new_block = be_transform_node(block);
2209 cond = get_Mux_sel(node);
2210 flags = get_flags_node(cond, &pnc);
2211 new_mem = be_transform_node(mem);
2212 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2213 addr.index, addr.mem, flags, pnc, negated);
2214 set_address(new_node, &addr);
2215 set_ia32_op_type(new_node, ia32_AddrModeD);
2216 set_ia32_ls_mode(new_node, mode);
2217 SET_IA32_ORIG_NODE(new_node, node);
2222 static ir_node *try_create_dest_am(ir_node *node)
2224 ir_node *val = get_Store_value(node);
2225 ir_node *mem = get_Store_mem(node);
2226 ir_node *ptr = get_Store_ptr(node);
2227 ir_mode *mode = get_irn_mode(val);
2228 unsigned bits = get_mode_size_bits(mode);
2233 /* handle only GP modes for now... */
2234 if (!ia32_mode_needs_gp_reg(mode))
2238 /* store must be the only user of the val node */
2239 if (get_irn_n_edges(val) > 1)
2241 /* skip pointless convs */
2243 ir_node *conv_op = get_Conv_op(val);
2244 ir_mode *pred_mode = get_irn_mode(conv_op);
2245 if (!ia32_mode_needs_gp_reg(pred_mode))
2247 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2255 /* value must be in the same block */
2256 if (get_nodes_block(node) != get_nodes_block(val))
2259 switch (get_irn_opcode(val)) {
2261 op1 = get_Add_left(val);
2262 op2 = get_Add_right(val);
2263 if (ia32_cg_config.use_incdec) {
2264 if (is_Const_1(op2)) {
2265 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2267 } else if (is_Const_Minus_1(op2)) {
2268 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2272 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2273 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2274 match_commutative | match_immediate);
2277 op1 = get_Sub_left(val);
2278 op2 = get_Sub_right(val);
2279 if (is_Const(op2)) {
2280 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2282 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2283 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2287 op1 = get_And_left(val);
2288 op2 = get_And_right(val);
2289 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2290 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2291 match_commutative | match_immediate);
2294 op1 = get_Or_left(val);
2295 op2 = get_Or_right(val);
2296 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2297 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2298 match_commutative | match_immediate);
2301 op1 = get_Eor_left(val);
2302 op2 = get_Eor_right(val);
2303 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2304 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2305 match_commutative | match_immediate);
2308 op1 = get_Shl_left(val);
2309 op2 = get_Shl_right(val);
2310 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2311 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2315 op1 = get_Shr_left(val);
2316 op2 = get_Shr_right(val);
2317 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2318 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2322 op1 = get_Shrs_left(val);
2323 op2 = get_Shrs_right(val);
2324 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2325 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2329 op1 = get_Rotl_left(val);
2330 op2 = get_Rotl_right(val);
2331 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2332 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2335 /* TODO: match ROR patterns... */
2337 new_node = try_create_SetMem(val, ptr, mem);
2340 op1 = get_Minus_op(val);
2341 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2344 /* should be lowered already */
2345 assert(mode != mode_b);
2346 op1 = get_Not_op(val);
2347 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2353 if (new_node != NULL) {
2354 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2355 get_irn_pinned(node) == op_pin_state_pinned) {
2356 set_irn_pinned(new_node, op_pin_state_pinned);
2363 static bool possible_int_mode_for_fp(ir_mode *mode)
2367 if (!mode_is_signed(mode))
2369 size = get_mode_size_bits(mode);
2370 if (size != 16 && size != 32)
2375 static int is_float_to_int_conv(const ir_node *node)
2377 ir_mode *mode = get_irn_mode(node);
2381 if (!possible_int_mode_for_fp(mode))
2386 conv_op = get_Conv_op(node);
2387 conv_mode = get_irn_mode(conv_op);
2389 if (!mode_is_float(conv_mode))
2396 * Transform a Store(floatConst) into a sequence of
2399 * @return the created ia32 Store node
2401 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2403 ir_mode *mode = get_irn_mode(cns);
2404 unsigned size = get_mode_size_bytes(mode);
2405 tarval *tv = get_Const_tarval(cns);
2406 ir_node *block = get_nodes_block(node);
2407 ir_node *new_block = be_transform_node(block);
2408 ir_node *ptr = get_Store_ptr(node);
2409 ir_node *mem = get_Store_mem(node);
2410 dbg_info *dbgi = get_irn_dbg_info(node);
2414 ia32_address_t addr;
2416 assert(size % 4 == 0);
2419 build_address_ptr(&addr, ptr, mem);
2423 get_tarval_sub_bits(tv, ofs) |
2424 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2425 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2426 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2427 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2429 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2430 addr.index, addr.mem, imm);
2432 set_irn_pinned(new_node, get_irn_pinned(node));
2433 set_ia32_op_type(new_node, ia32_AddrModeD);
2434 set_ia32_ls_mode(new_node, mode_Iu);
2435 set_address(new_node, &addr);
2436 SET_IA32_ORIG_NODE(new_node, node);
2439 ins[i++] = new_node;
2444 } while (size != 0);
2447 return new_rd_Sync(dbgi, new_block, i, ins);
2454 * Generate a vfist or vfisttp instruction.
2456 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2457 ir_node *mem, ir_node *val, ir_node **fist)
2461 if (ia32_cg_config.use_fisttp) {
2462 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2463 if other users exists */
2464 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2465 ir_node *value = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2466 be_new_Keep(block, 1, &value);
2468 new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2471 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2474 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2480 * Transforms a general (no special case) Store.
2482 * @return the created ia32 Store node
2484 static ir_node *gen_general_Store(ir_node *node)
2486 ir_node *val = get_Store_value(node);
2487 ir_mode *mode = get_irn_mode(val);
2488 ir_node *block = get_nodes_block(node);
2489 ir_node *new_block = be_transform_node(block);
2490 ir_node *ptr = get_Store_ptr(node);
2491 ir_node *mem = get_Store_mem(node);
2492 dbg_info *dbgi = get_irn_dbg_info(node);
2493 ir_node *new_val, *new_node, *store;
2494 ia32_address_t addr;
2496 /* check for destination address mode */
2497 new_node = try_create_dest_am(node);
2498 if (new_node != NULL)
2501 /* construct store address */
2502 memset(&addr, 0, sizeof(addr));
2503 ia32_create_address_mode(&addr, ptr, 0);
2505 if (addr.base == NULL) {
2506 addr.base = noreg_GP;
2508 addr.base = be_transform_node(addr.base);
2511 if (addr.index == NULL) {
2512 addr.index = noreg_GP;
2514 addr.index = be_transform_node(addr.index);
2516 addr.mem = be_transform_node(mem);
2518 if (mode_is_float(mode)) {
2519 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2521 while (is_Conv(val) && mode == get_irn_mode(val)) {
2522 ir_node *op = get_Conv_op(val);
2523 if (!mode_is_float(get_irn_mode(op)))
2527 new_val = be_transform_node(val);
2528 if (ia32_cg_config.use_sse2) {
2529 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2530 addr.index, addr.mem, new_val);
2532 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2533 addr.index, addr.mem, new_val, mode);
2536 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2537 val = get_Conv_op(val);
2539 /* TODO: is this optimisation still necessary at all (middleend)? */
2540 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2541 while (is_Conv(val)) {
2542 ir_node *op = get_Conv_op(val);
2543 if (!mode_is_float(get_irn_mode(op)))
2545 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2549 new_val = be_transform_node(val);
2550 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2552 new_val = create_immediate_or_transform(val, 0);
2553 assert(mode != mode_b);
2555 if (get_mode_size_bits(mode) == 8) {
2556 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2557 addr.index, addr.mem, new_val);
2559 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2560 addr.index, addr.mem, new_val);
2565 set_irn_pinned(store, get_irn_pinned(node));
2566 set_ia32_op_type(store, ia32_AddrModeD);
2567 set_ia32_ls_mode(store, mode);
2569 set_address(store, &addr);
2570 SET_IA32_ORIG_NODE(store, node);
2576 * Transforms a Store.
2578 * @return the created ia32 Store node
2580 static ir_node *gen_Store(ir_node *node)
2582 ir_node *val = get_Store_value(node);
2583 ir_mode *mode = get_irn_mode(val);
2585 if (mode_is_float(mode) && is_Const(val)) {
2586 /* We can transform every floating const store
2587 into a sequence of integer stores.
2588 If the constant is already in a register,
2589 it would be better to use it, but we don't
2590 have this information here. */
2591 return gen_float_const_Store(node, val);
2593 return gen_general_Store(node);
2597 * Transforms a Switch.
2599 * @return the created ia32 SwitchJmp node
2601 static ir_node *create_Switch(ir_node *node)
2603 dbg_info *dbgi = get_irn_dbg_info(node);
2604 ir_node *block = be_transform_node(get_nodes_block(node));
2605 ir_node *sel = get_Cond_selector(node);
2606 ir_node *new_sel = be_transform_node(sel);
2607 long switch_min = LONG_MAX;
2608 long switch_max = LONG_MIN;
2609 long default_pn = get_Cond_default_proj(node);
2611 const ir_edge_t *edge;
2613 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2615 /* determine the smallest switch case value */
2616 foreach_out_edge(node, edge) {
2617 ir_node *proj = get_edge_src_irn(edge);
2618 long pn = get_Proj_proj(proj);
2619 if (pn == default_pn)
2622 if (pn < switch_min)
2624 if (pn > switch_max)
2628 if ((unsigned long) (switch_max - switch_min) > 128000) {
2629 panic("Size of switch %+F bigger than 128000", node);
2632 if (switch_min != 0) {
2633 /* if smallest switch case is not 0 we need an additional sub */
2634 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2635 add_ia32_am_offs_int(new_sel, -switch_min);
2636 set_ia32_op_type(new_sel, ia32_AddrModeS);
2638 SET_IA32_ORIG_NODE(new_sel, node);
2641 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2642 SET_IA32_ORIG_NODE(new_node, node);
2648 * Transform a Cond node.
2650 static ir_node *gen_Cond(ir_node *node)
2652 ir_node *block = get_nodes_block(node);
2653 ir_node *new_block = be_transform_node(block);
2654 dbg_info *dbgi = get_irn_dbg_info(node);
2655 ir_node *sel = get_Cond_selector(node);
2656 ir_mode *sel_mode = get_irn_mode(sel);
2657 ir_node *flags = NULL;
2661 if (sel_mode != mode_b) {
2662 return create_Switch(node);
2665 /* we get flags from a Cmp */
2666 flags = get_flags_node(sel, &pnc);
2668 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2669 SET_IA32_ORIG_NODE(new_node, node);
2675 * Transform a be_Copy.
2677 static ir_node *gen_be_Copy(ir_node *node)
2679 ir_node *new_node = be_duplicate_node(node);
2680 ir_mode *mode = get_irn_mode(new_node);
2682 if (ia32_mode_needs_gp_reg(mode)) {
2683 set_irn_mode(new_node, mode_Iu);
2689 static ir_node *create_Fucom(ir_node *node)
2691 dbg_info *dbgi = get_irn_dbg_info(node);
2692 ir_node *block = get_nodes_block(node);
2693 ir_node *new_block = be_transform_node(block);
2694 ir_node *left = get_Cmp_left(node);
2695 ir_node *new_left = be_transform_node(left);
2696 ir_node *right = get_Cmp_right(node);
2700 if (ia32_cg_config.use_fucomi) {
2701 new_right = be_transform_node(right);
2702 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2704 set_ia32_commutative(new_node);
2705 SET_IA32_ORIG_NODE(new_node, node);
2707 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2708 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2710 new_right = be_transform_node(right);
2711 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2714 set_ia32_commutative(new_node);
2716 SET_IA32_ORIG_NODE(new_node, node);
2718 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2719 SET_IA32_ORIG_NODE(new_node, node);
2725 static ir_node *create_Ucomi(ir_node *node)
2727 dbg_info *dbgi = get_irn_dbg_info(node);
2728 ir_node *src_block = get_nodes_block(node);
2729 ir_node *new_block = be_transform_node(src_block);
2730 ir_node *left = get_Cmp_left(node);
2731 ir_node *right = get_Cmp_right(node);
2733 ia32_address_mode_t am;
2734 ia32_address_t *addr = &am.addr;
2736 match_arguments(&am, src_block, left, right, NULL,
2737 match_commutative | match_am);
2739 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2740 addr->mem, am.new_op1, am.new_op2,
2742 set_am_attributes(new_node, &am);
2744 SET_IA32_ORIG_NODE(new_node, node);
2746 new_node = fix_mem_proj(new_node, &am);
2752 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2753 * to fold an and into a test node
2755 static bool can_fold_test_and(ir_node *node)
2757 const ir_edge_t *edge;
2759 /** we can only have eq and lg projs */
2760 foreach_out_edge(node, edge) {
2761 ir_node *proj = get_edge_src_irn(edge);
2762 pn_Cmp pnc = get_Proj_proj(proj);
2763 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2771 * returns true if it is assured, that the upper bits of a node are "clean"
2772 * which means for a 16 or 8 bit value, that the upper bits in the register
2773 * are 0 for unsigned and a copy of the last significant bit for signed
2776 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2778 assert(ia32_mode_needs_gp_reg(mode));
2779 if (get_mode_size_bits(mode) >= 32)
2782 if (is_Proj(transformed_node))
2783 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2785 switch (get_ia32_irn_opcode(transformed_node)) {
2786 case iro_ia32_Conv_I2I:
2787 case iro_ia32_Conv_I2I8Bit: {
2788 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2789 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2791 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2798 if (mode_is_signed(mode)) {
2799 return false; /* TODO handle signed modes */
2801 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2802 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2803 const ia32_immediate_attr_t *attr
2804 = get_ia32_immediate_attr_const(right);
2805 if (attr->symconst == 0 &&
2806 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2810 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2814 /* TODO too conservative if shift amount is constant */
2815 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2818 if (!mode_is_signed(mode)) {
2820 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2821 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2823 /* TODO if one is known to be zero extended, then || is sufficient */
2828 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2829 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2831 case iro_ia32_Const:
2832 case iro_ia32_Immediate: {
2833 const ia32_immediate_attr_t *attr =
2834 get_ia32_immediate_attr_const(transformed_node);
2835 if (mode_is_signed(mode)) {
2836 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2837 return shifted == 0 || shifted == -1;
2839 unsigned long shifted = (unsigned long)attr->offset;
2840 shifted >>= get_mode_size_bits(mode);
2841 return shifted == 0;
2851 * Generate code for a Cmp.
2853 static ir_node *gen_Cmp(ir_node *node)
2855 dbg_info *dbgi = get_irn_dbg_info(node);
2856 ir_node *block = get_nodes_block(node);
2857 ir_node *new_block = be_transform_node(block);
2858 ir_node *left = get_Cmp_left(node);
2859 ir_node *right = get_Cmp_right(node);
2860 ir_mode *cmp_mode = get_irn_mode(left);
2862 ia32_address_mode_t am;
2863 ia32_address_t *addr = &am.addr;
2866 if (mode_is_float(cmp_mode)) {
2867 if (ia32_cg_config.use_sse2) {
2868 return create_Ucomi(node);
2870 return create_Fucom(node);
2874 assert(ia32_mode_needs_gp_reg(cmp_mode));
2876 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2877 cmp_unsigned = !mode_is_signed(cmp_mode);
2878 if (is_Const_0(right) &&
2880 get_irn_n_edges(left) == 1 &&
2881 can_fold_test_and(node)) {
2882 /* Test(and_left, and_right) */
2883 ir_node *and_left = get_And_left(left);
2884 ir_node *and_right = get_And_right(left);
2886 /* matze: code here used mode instead of cmd_mode, I think it is always
2887 * the same as cmp_mode, but I leave this here to see if this is really
2890 assert(get_irn_mode(and_left) == cmp_mode);
2892 match_arguments(&am, block, and_left, and_right, NULL,
2894 match_am | match_8bit_am | match_16bit_am |
2895 match_am_and_immediates | match_immediate);
2897 /* use 32bit compare mode if possible since the opcode is smaller */
2898 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2899 upper_bits_clean(am.new_op2, cmp_mode)) {
2900 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2903 if (get_mode_size_bits(cmp_mode) == 8) {
2904 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2905 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2908 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2909 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2912 /* Cmp(left, right) */
2913 match_arguments(&am, block, left, right, NULL,
2914 match_commutative | match_am | match_8bit_am |
2915 match_16bit_am | match_am_and_immediates |
2917 /* use 32bit compare mode if possible since the opcode is smaller */
2918 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2919 upper_bits_clean(am.new_op2, cmp_mode)) {
2920 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2923 if (get_mode_size_bits(cmp_mode) == 8) {
2924 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2925 addr->index, addr->mem, am.new_op1,
2926 am.new_op2, am.ins_permuted,
2929 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2930 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2933 set_am_attributes(new_node, &am);
2934 set_ia32_ls_mode(new_node, cmp_mode);
2936 SET_IA32_ORIG_NODE(new_node, node);
2938 new_node = fix_mem_proj(new_node, &am);
2943 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2946 dbg_info *dbgi = get_irn_dbg_info(node);
2947 ir_node *block = get_nodes_block(node);
2948 ir_node *new_block = be_transform_node(block);
2949 ir_node *val_true = get_Mux_true(node);
2950 ir_node *val_false = get_Mux_false(node);
2952 ia32_address_mode_t am;
2953 ia32_address_t *addr;
2955 assert(ia32_cg_config.use_cmov);
2956 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2960 match_arguments(&am, block, val_false, val_true, flags,
2961 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2963 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2964 addr->mem, am.new_op1, am.new_op2, new_flags,
2965 am.ins_permuted, pnc);
2966 set_am_attributes(new_node, &am);
2968 SET_IA32_ORIG_NODE(new_node, node);
2970 new_node = fix_mem_proj(new_node, &am);
2976 * Creates a ia32 Setcc instruction.
2978 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2979 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2982 ir_mode *mode = get_irn_mode(orig_node);
2985 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2986 SET_IA32_ORIG_NODE(new_node, orig_node);
2988 /* we might need to conv the result up */
2989 if (get_mode_size_bits(mode) > 8) {
2990 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
2991 nomem, new_node, mode_Bu);
2992 SET_IA32_ORIG_NODE(new_node, orig_node);
2999 * Create instruction for an unsigned Difference or Zero.
3001 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
3003 ir_mode *mode = get_irn_mode(psi);
3004 ir_node *new_node, *sub, *sbb, *eflags, *block;
3008 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3009 match_mode_neutral | match_am | match_immediate | match_two_users);
3011 block = get_nodes_block(new_node);
3013 if (is_Proj(new_node)) {
3014 sub = get_Proj_pred(new_node);
3015 assert(is_ia32_Sub(sub));
3018 set_irn_mode(sub, mode_T);
3019 new_node = new_rd_Proj(NULL, block, sub, mode, pn_ia32_res);
3021 eflags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3023 dbgi = get_irn_dbg_info(psi);
3024 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3026 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3027 set_ia32_commutative(new_node);
3032 * Create an const array of two float consts.
3034 * @param c0 the first constant
3035 * @param c1 the second constant
3036 * @param new_mode IN/OUT for the mode of the constants, if NULL
3037 * smallest possible mode will be used
3039 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3041 ir_mode *mode = *new_mode;
3043 ir_initializer_t *initializer;
3044 tarval *tv0 = get_Const_tarval(c0);
3045 tarval *tv1 = get_Const_tarval(c1);
3048 /* detect the best mode for the constants */
3049 mode = get_tarval_mode(tv0);
3051 if (mode != mode_F) {
3052 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3053 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3055 tv0 = tarval_convert_to(tv0, mode);
3056 tv1 = tarval_convert_to(tv1, mode);
3057 } else if (mode != mode_D) {
3058 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3059 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3061 tv0 = tarval_convert_to(tv0, mode);
3062 tv1 = tarval_convert_to(tv1, mode);
3069 tp = ia32_create_float_type(mode, 4);
3070 tp = ia32_create_float_array(tp);
3072 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3074 set_entity_ld_ident(ent, get_entity_ident(ent));
3075 set_entity_visibility(ent, visibility_local);
3076 set_entity_variability(ent, variability_constant);
3077 set_entity_allocation(ent, allocation_static);
3079 initializer = create_initializer_compound(2);
3081 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3082 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3084 set_entity_initializer(ent, initializer);
3091 * Transforms a Mux node into some code sequence.
3093 * @return The transformed node.
3095 static ir_node *gen_Mux(ir_node *node)
3097 dbg_info *dbgi = get_irn_dbg_info(node);
3098 ir_node *block = get_nodes_block(node);
3099 ir_node *new_block = be_transform_node(block);
3100 ir_node *mux_true = get_Mux_true(node);
3101 ir_node *mux_false = get_Mux_false(node);
3102 ir_node *cond = get_Mux_sel(node);
3103 ir_mode *mode = get_irn_mode(node);
3108 assert(get_irn_mode(cond) == mode_b);
3110 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3111 if (mode_is_float(mode)) {
3112 ir_node *cmp = get_Proj_pred(cond);
3113 ir_node *cmp_left = get_Cmp_left(cmp);
3114 ir_node *cmp_right = get_Cmp_right(cmp);
3115 pn_Cmp pnc = get_Proj_proj(cond);
3117 if (ia32_cg_config.use_sse2) {
3118 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3119 if (cmp_left == mux_true && cmp_right == mux_false) {
3120 /* Mux(a <= b, a, b) => MIN */
3121 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3122 match_commutative | match_am | match_two_users);
3123 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3124 /* Mux(a <= b, b, a) => MAX */
3125 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3126 match_commutative | match_am | match_two_users);
3128 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3129 if (cmp_left == mux_true && cmp_right == mux_false) {
3130 /* Mux(a >= b, a, b) => MAX */
3131 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3132 match_commutative | match_am | match_two_users);
3133 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3134 /* Mux(a >= b, b, a) => MIN */
3135 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3136 match_commutative | match_am | match_two_users);
3140 if (is_Const(mux_true) && is_Const(mux_false)) {
3141 ia32_address_mode_t am;
3146 flags = get_flags_node(cond, &pnc);
3147 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_permuted=*/0);
3149 if (ia32_cg_config.use_sse2) {
3150 /* cannot load from different mode on SSE */
3153 /* x87 can load any mode */
3157 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3159 switch (get_mode_size_bytes(new_mode)) {
3169 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3170 set_ia32_am_scale(new_node, 2);
3175 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3176 set_ia32_am_scale(new_node, 1);
3179 /* arg, shift 16 NOT supported */
3181 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3184 panic("Unsupported constant size");
3187 am.ls_mode = new_mode;
3188 am.addr.base = noreg_GP;
3189 am.addr.index = new_node;
3190 am.addr.mem = nomem;
3192 am.addr.scale = scale;
3193 am.addr.use_frame = 0;
3194 am.addr.frame_entity = NULL;
3195 am.addr.symconst_sign = 0;
3196 am.mem_proj = am.addr.mem;
3197 am.op_type = ia32_AddrModeS;
3200 am.pinned = op_pin_state_floats;
3202 am.ins_permuted = 0;
3204 if (ia32_cg_config.use_sse2)
3205 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3207 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3208 set_am_attributes(load, &am);
3210 return new_rd_Proj(NULL, block, load, mode_vfp, pn_ia32_res);
3212 panic("cannot transform floating point Mux");
3215 assert(ia32_mode_needs_gp_reg(mode));
3217 if (is_Proj(cond)) {
3218 ir_node *cmp = get_Proj_pred(cond);
3220 ir_node *cmp_left = get_Cmp_left(cmp);
3221 ir_node *cmp_right = get_Cmp_right(cmp);
3222 pn_Cmp pnc = get_Proj_proj(cond);
3224 /* check for unsigned Doz first */
3225 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3226 is_Const_0(mux_false) && is_Sub(mux_true) &&
3227 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3228 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3229 return create_Doz(node, cmp_left, cmp_right);
3230 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3231 is_Const_0(mux_true) && is_Sub(mux_false) &&
3232 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3233 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3234 return create_Doz(node, cmp_left, cmp_right);
3239 flags = get_flags_node(cond, &pnc);
3241 if (is_Const(mux_true) && is_Const(mux_false)) {
3242 /* both are const, good */
3243 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3244 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3245 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3246 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3248 /* Not that simple. */
3253 new_node = create_CMov(node, cond, flags, pnc);
3261 * Create a conversion from x87 state register to general purpose.
3263 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3265 ir_node *block = be_transform_node(get_nodes_block(node));
3266 ir_node *op = get_Conv_op(node);
3267 ir_node *new_op = be_transform_node(op);
3268 ir_graph *irg = current_ir_graph;
3269 dbg_info *dbgi = get_irn_dbg_info(node);
3270 ir_mode *mode = get_irn_mode(node);
3271 ir_node *fist, *load, *mem;
3273 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3274 set_irn_pinned(fist, op_pin_state_floats);
3275 set_ia32_use_frame(fist);
3276 set_ia32_op_type(fist, ia32_AddrModeD);
3278 assert(get_mode_size_bits(mode) <= 32);
3279 /* exception we can only store signed 32 bit integers, so for unsigned
3280 we store a 64bit (signed) integer and load the lower bits */
3281 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3282 set_ia32_ls_mode(fist, mode_Ls);
3284 set_ia32_ls_mode(fist, mode_Is);
3286 SET_IA32_ORIG_NODE(fist, node);
3289 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3291 set_irn_pinned(load, op_pin_state_floats);
3292 set_ia32_use_frame(load);
3293 set_ia32_op_type(load, ia32_AddrModeS);
3294 set_ia32_ls_mode(load, mode_Is);
3295 if (get_ia32_ls_mode(fist) == mode_Ls) {
3296 ia32_attr_t *attr = get_ia32_attr(load);
3297 attr->data.need_64bit_stackent = 1;
3299 ia32_attr_t *attr = get_ia32_attr(load);
3300 attr->data.need_32bit_stackent = 1;
3302 SET_IA32_ORIG_NODE(load, node);
3304 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
3308 * Creates a x87 strict Conv by placing a Store and a Load
3310 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3312 ir_node *block = get_nodes_block(node);
3313 ir_graph *irg = get_Block_irg(block);
3314 dbg_info *dbgi = get_irn_dbg_info(node);
3315 ir_node *frame = get_irg_frame(irg);
3316 ir_node *store, *load;
3319 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3320 set_ia32_use_frame(store);
3321 set_ia32_op_type(store, ia32_AddrModeD);
3322 SET_IA32_ORIG_NODE(store, node);
3324 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3325 set_ia32_use_frame(load);
3326 set_ia32_op_type(load, ia32_AddrModeS);
3327 SET_IA32_ORIG_NODE(load, node);
3329 new_node = new_r_Proj(block, load, mode_E, pn_ia32_vfld_res);
3333 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3334 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3336 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3338 func = get_mode_size_bits(mode) == 8 ?
3339 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3340 return func(dbgi, block, base, index, mem, val, mode);
3344 * Create a conversion from general purpose to x87 register
3346 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3348 ir_node *src_block = get_nodes_block(node);
3349 ir_node *block = be_transform_node(src_block);
3350 ir_graph *irg = get_Block_irg(block);
3351 dbg_info *dbgi = get_irn_dbg_info(node);
3352 ir_node *op = get_Conv_op(node);
3353 ir_node *new_op = NULL;
3355 ir_mode *store_mode;
3360 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3361 if (possible_int_mode_for_fp(src_mode)) {
3362 ia32_address_mode_t am;
3364 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3365 if (am.op_type == ia32_AddrModeS) {
3366 ia32_address_t *addr = &am.addr;
3368 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3369 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3371 set_am_attributes(fild, &am);
3372 SET_IA32_ORIG_NODE(fild, node);
3374 fix_mem_proj(fild, &am);
3379 if (new_op == NULL) {
3380 new_op = be_transform_node(op);
3383 mode = get_irn_mode(op);
3385 /* first convert to 32 bit signed if necessary */
3386 if (get_mode_size_bits(src_mode) < 32) {
3387 if (!upper_bits_clean(new_op, src_mode)) {
3388 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3389 SET_IA32_ORIG_NODE(new_op, node);
3394 assert(get_mode_size_bits(mode) == 32);
3397 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3399 set_ia32_use_frame(store);
3400 set_ia32_op_type(store, ia32_AddrModeD);
3401 set_ia32_ls_mode(store, mode_Iu);
3403 /* exception for 32bit unsigned, do a 64bit spill+load */
3404 if (!mode_is_signed(mode)) {
3407 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3409 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3410 noreg_GP, nomem, zero_const);
3412 set_ia32_use_frame(zero_store);
3413 set_ia32_op_type(zero_store, ia32_AddrModeD);
3414 add_ia32_am_offs_int(zero_store, 4);
3415 set_ia32_ls_mode(zero_store, mode_Iu);
3420 store = new_rd_Sync(dbgi, block, 2, in);
3421 store_mode = mode_Ls;
3423 store_mode = mode_Is;
3427 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3429 set_ia32_use_frame(fild);
3430 set_ia32_op_type(fild, ia32_AddrModeS);
3431 set_ia32_ls_mode(fild, store_mode);
3433 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3439 * Create a conversion from one integer mode into another one
3441 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3442 dbg_info *dbgi, ir_node *block, ir_node *op,
3445 ir_node *new_block = be_transform_node(block);
3447 ir_mode *smaller_mode;
3448 ia32_address_mode_t am;
3449 ia32_address_t *addr = &am.addr;
3452 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3453 smaller_mode = src_mode;
3455 smaller_mode = tgt_mode;
3458 #ifdef DEBUG_libfirm
3460 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3465 match_arguments(&am, block, NULL, op, NULL,
3466 match_am | match_8bit_am | match_16bit_am);
3468 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3469 /* unnecessary conv. in theory it shouldn't have been AM */
3470 assert(is_ia32_NoReg_GP(addr->base));
3471 assert(is_ia32_NoReg_GP(addr->index));
3472 assert(is_NoMem(addr->mem));
3473 assert(am.addr.offset == 0);
3474 assert(am.addr.symconst_ent == NULL);
3478 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3479 addr->mem, am.new_op2, smaller_mode);
3480 set_am_attributes(new_node, &am);
3481 /* match_arguments assume that out-mode = in-mode, this isn't true here
3483 set_ia32_ls_mode(new_node, smaller_mode);
3484 SET_IA32_ORIG_NODE(new_node, node);
3485 new_node = fix_mem_proj(new_node, &am);
3490 * Transforms a Conv node.
3492 * @return The created ia32 Conv node
3494 static ir_node *gen_Conv(ir_node *node)
3496 ir_node *block = get_nodes_block(node);
3497 ir_node *new_block = be_transform_node(block);
3498 ir_node *op = get_Conv_op(node);
3499 ir_node *new_op = NULL;
3500 dbg_info *dbgi = get_irn_dbg_info(node);
3501 ir_mode *src_mode = get_irn_mode(op);
3502 ir_mode *tgt_mode = get_irn_mode(node);
3503 int src_bits = get_mode_size_bits(src_mode);
3504 int tgt_bits = get_mode_size_bits(tgt_mode);
3505 ir_node *res = NULL;
3507 assert(!mode_is_int(src_mode) || src_bits <= 32);
3508 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3510 /* modeB -> X should already be lowered by the lower_mode_b pass */
3511 if (src_mode == mode_b) {
3512 panic("ConvB not lowered %+F", node);
3515 if (src_mode == tgt_mode) {
3516 if (get_Conv_strict(node)) {
3517 if (ia32_cg_config.use_sse2) {
3518 /* when we are in SSE mode, we can kill all strict no-op conversion */
3519 return be_transform_node(op);
3522 /* this should be optimized already, but who knows... */
3523 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3524 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3525 return be_transform_node(op);
3529 if (mode_is_float(src_mode)) {
3530 new_op = be_transform_node(op);
3531 /* we convert from float ... */
3532 if (mode_is_float(tgt_mode)) {
3534 /* Matze: I'm a bit unsure what the following is for? seems wrong
3536 if (src_mode == mode_E && tgt_mode == mode_D
3537 && !get_Conv_strict(node)) {
3538 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3544 if (ia32_cg_config.use_sse2) {
3545 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3546 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3548 set_ia32_ls_mode(res, tgt_mode);
3550 if (get_Conv_strict(node)) {
3551 /* if fp_no_float_fold is not set then we assume that we
3552 * don't have any float operations in a non
3553 * mode_float_arithmetic mode and can skip strict upconvs */
3554 if (src_bits < tgt_bits
3555 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3556 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3559 res = gen_x87_strict_conv(tgt_mode, new_op);
3560 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3564 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3569 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3570 if (ia32_cg_config.use_sse2) {
3571 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3573 set_ia32_ls_mode(res, src_mode);
3575 return gen_x87_fp_to_gp(node);
3579 /* we convert from int ... */
3580 if (mode_is_float(tgt_mode)) {
3582 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3583 if (ia32_cg_config.use_sse2) {
3584 new_op = be_transform_node(op);
3585 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3587 set_ia32_ls_mode(res, tgt_mode);
3589 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3590 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3591 res = gen_x87_gp_to_fp(node, src_mode);
3593 /* we need a strict-Conv, if the int mode has more bits than the
3595 if (float_mantissa < int_mantissa) {
3596 res = gen_x87_strict_conv(tgt_mode, res);
3597 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3601 } else if (tgt_mode == mode_b) {
3602 /* mode_b lowering already took care that we only have 0/1 values */
3603 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3604 src_mode, tgt_mode));
3605 return be_transform_node(op);
3608 if (src_bits == tgt_bits) {
3609 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3610 src_mode, tgt_mode));
3611 return be_transform_node(op);
3614 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3622 static ir_node *create_immediate_or_transform(ir_node *node,
3623 char immediate_constraint_type)
3625 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3626 if (new_node == NULL) {
3627 new_node = be_transform_node(node);
3633 * Transforms a FrameAddr into an ia32 Add.
3635 static ir_node *gen_be_FrameAddr(ir_node *node)
3637 ir_node *block = be_transform_node(get_nodes_block(node));
3638 ir_node *op = be_get_FrameAddr_frame(node);
3639 ir_node *new_op = be_transform_node(op);
3640 dbg_info *dbgi = get_irn_dbg_info(node);
3643 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3644 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3645 set_ia32_use_frame(new_node);
3647 SET_IA32_ORIG_NODE(new_node, node);
3653 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3655 static ir_node *gen_be_Return(ir_node *node)
3657 ir_graph *irg = current_ir_graph;
3658 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3659 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3660 ir_entity *ent = get_irg_entity(irg);
3661 ir_type *tp = get_entity_type(ent);
3666 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3667 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3669 int pn_ret_val, pn_ret_mem, arity, i;
3671 assert(ret_val != NULL);
3672 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3673 return be_duplicate_node(node);
3676 res_type = get_method_res_type(tp, 0);
3678 if (! is_Primitive_type(res_type)) {
3679 return be_duplicate_node(node);
3682 mode = get_type_mode(res_type);
3683 if (! mode_is_float(mode)) {
3684 return be_duplicate_node(node);
3687 assert(get_method_n_ress(tp) == 1);
3689 pn_ret_val = get_Proj_proj(ret_val);
3690 pn_ret_mem = get_Proj_proj(ret_mem);
3692 /* get the Barrier */
3693 barrier = get_Proj_pred(ret_val);
3695 /* get result input of the Barrier */
3696 ret_val = get_irn_n(barrier, pn_ret_val);
3697 new_ret_val = be_transform_node(ret_val);
3699 /* get memory input of the Barrier */
3700 ret_mem = get_irn_n(barrier, pn_ret_mem);
3701 new_ret_mem = be_transform_node(ret_mem);
3703 frame = get_irg_frame(irg);
3705 dbgi = get_irn_dbg_info(barrier);
3706 block = be_transform_node(get_nodes_block(barrier));
3708 /* store xmm0 onto stack */
3709 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3710 new_ret_mem, new_ret_val);
3711 set_ia32_ls_mode(sse_store, mode);
3712 set_ia32_op_type(sse_store, ia32_AddrModeD);
3713 set_ia32_use_frame(sse_store);
3715 /* load into x87 register */
3716 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3717 set_ia32_op_type(fld, ia32_AddrModeS);
3718 set_ia32_use_frame(fld);
3720 mproj = new_r_Proj(block, fld, mode_M, pn_ia32_vfld_M);
3721 fld = new_r_Proj(block, fld, mode_vfp, pn_ia32_vfld_res);
3723 /* create a new barrier */
3724 arity = get_irn_arity(barrier);
3725 in = ALLOCAN(ir_node*, arity);
3726 for (i = 0; i < arity; ++i) {
3729 if (i == pn_ret_val) {
3731 } else if (i == pn_ret_mem) {
3734 ir_node *in = get_irn_n(barrier, i);
3735 new_in = be_transform_node(in);
3740 new_barrier = new_ir_node(dbgi, irg, block,
3741 get_irn_op(barrier), get_irn_mode(barrier),
3743 copy_node_attr(barrier, new_barrier);
3744 be_duplicate_deps(barrier, new_barrier);
3745 be_set_transformed_node(barrier, new_barrier);
3747 /* transform normally */
3748 return be_duplicate_node(node);
3752 * Transform a be_AddSP into an ia32_SubSP.
3754 static ir_node *gen_be_AddSP(ir_node *node)
3756 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3757 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3759 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3760 match_am | match_immediate);
3764 * Transform a be_SubSP into an ia32_AddSP
3766 static ir_node *gen_be_SubSP(ir_node *node)
3768 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3769 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3771 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3772 match_am | match_immediate);
3776 * Change some phi modes
3778 static ir_node *gen_Phi(ir_node *node)
3780 const arch_register_req_t *req;
3781 ir_node *block = be_transform_node(get_nodes_block(node));
3782 ir_graph *irg = current_ir_graph;
3783 dbg_info *dbgi = get_irn_dbg_info(node);
3784 ir_mode *mode = get_irn_mode(node);
3787 if (ia32_mode_needs_gp_reg(mode)) {
3788 /* we shouldn't have any 64bit stuff around anymore */
3789 assert(get_mode_size_bits(mode) <= 32);
3790 /* all integer operations are on 32bit registers now */
3792 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
3793 } else if (mode_is_float(mode)) {
3794 if (ia32_cg_config.use_sse2) {
3796 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
3799 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
3802 req = arch_no_register_req;
3805 /* phi nodes allow loops, so we use the old arguments for now
3806 * and fix this later */
3807 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3808 get_irn_in(node) + 1);
3809 copy_node_attr(node, phi);
3810 be_duplicate_deps(node, phi);
3812 arch_set_out_register_req(phi, 0, req);
3814 be_enqueue_preds(node);
3819 static ir_node *gen_Jmp(ir_node *node)
3821 ir_node *block = get_nodes_block(node);
3822 ir_node *new_block = be_transform_node(block);
3823 dbg_info *dbgi = get_irn_dbg_info(node);
3826 new_node = new_bd_ia32_Jmp(dbgi, new_block);
3827 SET_IA32_ORIG_NODE(new_node, node);
3835 static ir_node *gen_IJmp(ir_node *node)
3837 ir_node *block = get_nodes_block(node);
3838 ir_node *new_block = be_transform_node(block);
3839 dbg_info *dbgi = get_irn_dbg_info(node);
3840 ir_node *op = get_IJmp_target(node);
3842 ia32_address_mode_t am;
3843 ia32_address_t *addr = &am.addr;
3845 assert(get_irn_mode(op) == mode_P);
3847 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3849 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3850 addr->mem, am.new_op2);
3851 set_am_attributes(new_node, &am);
3852 SET_IA32_ORIG_NODE(new_node, node);
3854 new_node = fix_mem_proj(new_node, &am);
3860 * Transform a Bound node.
3862 static ir_node *gen_Bound(ir_node *node)
3865 ir_node *lower = get_Bound_lower(node);
3866 dbg_info *dbgi = get_irn_dbg_info(node);
3868 if (is_Const_0(lower)) {
3869 /* typical case for Java */
3870 ir_node *sub, *res, *flags, *block;
3872 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3873 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3875 block = get_nodes_block(res);
3876 if (! is_Proj(res)) {
3878 set_irn_mode(sub, mode_T);
3879 res = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_res);
3881 sub = get_Proj_pred(res);
3883 flags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3884 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3885 SET_IA32_ORIG_NODE(new_node, node);
3887 panic("generic Bound not supported in ia32 Backend");
3893 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3895 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3896 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3898 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3899 match_immediate | match_mode_neutral);
3902 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3904 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3905 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3906 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3910 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3912 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3913 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3914 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3918 static ir_node *gen_ia32_l_Add(ir_node *node)
3920 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3921 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3922 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3923 match_commutative | match_am | match_immediate |
3924 match_mode_neutral);
3926 if (is_Proj(lowered)) {
3927 lowered = get_Proj_pred(lowered);
3929 assert(is_ia32_Add(lowered));
3930 set_irn_mode(lowered, mode_T);
3936 static ir_node *gen_ia32_l_Adc(ir_node *node)
3938 return gen_binop_flags(node, new_bd_ia32_Adc,
3939 match_commutative | match_am | match_immediate |
3940 match_mode_neutral);
3944 * Transforms a l_MulS into a "real" MulS node.
3946 * @return the created ia32 Mul node
3948 static ir_node *gen_ia32_l_Mul(ir_node *node)
3950 ir_node *left = get_binop_left(node);
3951 ir_node *right = get_binop_right(node);
3953 return gen_binop(node, left, right, new_bd_ia32_Mul,
3954 match_commutative | match_am | match_mode_neutral);
3958 * Transforms a l_IMulS into a "real" IMul1OPS node.
3960 * @return the created ia32 IMul1OP node
3962 static ir_node *gen_ia32_l_IMul(ir_node *node)
3964 ir_node *left = get_binop_left(node);
3965 ir_node *right = get_binop_right(node);
3967 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3968 match_commutative | match_am | match_mode_neutral);
3971 static ir_node *gen_ia32_l_Sub(ir_node *node)
3973 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3974 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3975 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3976 match_am | match_immediate | match_mode_neutral);
3978 if (is_Proj(lowered)) {
3979 lowered = get_Proj_pred(lowered);
3981 assert(is_ia32_Sub(lowered));
3982 set_irn_mode(lowered, mode_T);
3988 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3990 return gen_binop_flags(node, new_bd_ia32_Sbb,
3991 match_am | match_immediate | match_mode_neutral);
3995 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3996 * op1 - target to be shifted
3997 * op2 - contains bits to be shifted into target
3999 * Only op3 can be an immediate.
4001 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4002 ir_node *low, ir_node *count)
4004 ir_node *block = get_nodes_block(node);
4005 ir_node *new_block = be_transform_node(block);
4006 dbg_info *dbgi = get_irn_dbg_info(node);
4007 ir_node *new_high = be_transform_node(high);
4008 ir_node *new_low = be_transform_node(low);
4012 /* the shift amount can be any mode that is bigger than 5 bits, since all
4013 * other bits are ignored anyway */
4014 while (is_Conv(count) &&
4015 get_irn_n_edges(count) == 1 &&
4016 mode_is_int(get_irn_mode(count))) {
4017 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4018 count = get_Conv_op(count);
4020 new_count = create_immediate_or_transform(count, 0);
4022 if (is_ia32_l_ShlD(node)) {
4023 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4026 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4029 SET_IA32_ORIG_NODE(new_node, node);
4034 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4036 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4037 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4038 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4039 return gen_lowered_64bit_shifts(node, high, low, count);
4042 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4044 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4045 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4046 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4047 return gen_lowered_64bit_shifts(node, high, low, count);
4050 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4052 ir_node *src_block = get_nodes_block(node);
4053 ir_node *block = be_transform_node(src_block);
4054 ir_graph *irg = current_ir_graph;
4055 dbg_info *dbgi = get_irn_dbg_info(node);
4056 ir_node *frame = get_irg_frame(irg);
4057 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4058 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4059 ir_node *new_val_low = be_transform_node(val_low);
4060 ir_node *new_val_high = be_transform_node(val_high);
4062 ir_node *sync, *fild, *res;
4063 ir_node *store_low, *store_high;
4065 if (ia32_cg_config.use_sse2) {
4066 panic("ia32_l_LLtoFloat not implemented for SSE2");
4070 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4072 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4074 SET_IA32_ORIG_NODE(store_low, node);
4075 SET_IA32_ORIG_NODE(store_high, node);
4077 set_ia32_use_frame(store_low);
4078 set_ia32_use_frame(store_high);
4079 set_ia32_op_type(store_low, ia32_AddrModeD);
4080 set_ia32_op_type(store_high, ia32_AddrModeD);
4081 set_ia32_ls_mode(store_low, mode_Iu);
4082 set_ia32_ls_mode(store_high, mode_Is);
4083 add_ia32_am_offs_int(store_high, 4);
4087 sync = new_rd_Sync(dbgi, block, 2, in);
4090 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4092 set_ia32_use_frame(fild);
4093 set_ia32_op_type(fild, ia32_AddrModeS);
4094 set_ia32_ls_mode(fild, mode_Ls);
4096 SET_IA32_ORIG_NODE(fild, node);
4098 res = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
4100 if (! mode_is_signed(get_irn_mode(val_high))) {
4101 ia32_address_mode_t am;
4103 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4106 am.addr.base = noreg_GP;
4107 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4108 am.addr.mem = nomem;
4111 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4112 am.addr.use_frame = 0;
4113 am.addr.frame_entity = NULL;
4114 am.addr.symconst_sign = 0;
4115 am.ls_mode = mode_F;
4116 am.mem_proj = nomem;
4117 am.op_type = ia32_AddrModeS;
4119 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4120 am.pinned = op_pin_state_floats;
4122 am.ins_permuted = 0;
4124 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4125 am.new_op1, am.new_op2, get_fpcw());
4126 set_am_attributes(fadd, &am);
4128 set_irn_mode(fadd, mode_T);
4129 res = new_rd_Proj(NULL, block, fadd, mode_vfp, pn_ia32_res);
4134 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4136 ir_node *src_block = get_nodes_block(node);
4137 ir_node *block = be_transform_node(src_block);
4138 ir_graph *irg = get_Block_irg(block);
4139 dbg_info *dbgi = get_irn_dbg_info(node);
4140 ir_node *frame = get_irg_frame(irg);
4141 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4142 ir_node *new_val = be_transform_node(val);
4143 ir_node *fist, *mem;
4145 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4146 SET_IA32_ORIG_NODE(fist, node);
4147 set_ia32_use_frame(fist);
4148 set_ia32_op_type(fist, ia32_AddrModeD);
4149 set_ia32_ls_mode(fist, mode_Ls);
4155 * the BAD transformer.
4157 static ir_node *bad_transform(ir_node *node)
4159 panic("No transform function for %+F available.", node);
4163 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4165 ir_node *block = be_transform_node(get_nodes_block(node));
4166 ir_graph *irg = get_Block_irg(block);
4167 ir_node *pred = get_Proj_pred(node);
4168 ir_node *new_pred = be_transform_node(pred);
4169 ir_node *frame = get_irg_frame(irg);
4170 dbg_info *dbgi = get_irn_dbg_info(node);
4171 long pn = get_Proj_proj(node);
4176 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4177 SET_IA32_ORIG_NODE(load, node);
4178 set_ia32_use_frame(load);
4179 set_ia32_op_type(load, ia32_AddrModeS);
4180 set_ia32_ls_mode(load, mode_Iu);
4181 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4182 * 32 bit from it with this particular load */
4183 attr = get_ia32_attr(load);
4184 attr->data.need_64bit_stackent = 1;
4186 if (pn == pn_ia32_l_FloattoLL_res_high) {
4187 add_ia32_am_offs_int(load, 4);
4189 assert(pn == pn_ia32_l_FloattoLL_res_low);
4192 proj = new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4198 * Transform the Projs of an AddSP.
4200 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4202 ir_node *block = be_transform_node(get_nodes_block(node));
4203 ir_node *pred = get_Proj_pred(node);
4204 ir_node *new_pred = be_transform_node(pred);
4205 dbg_info *dbgi = get_irn_dbg_info(node);
4206 long proj = get_Proj_proj(node);
4208 if (proj == pn_be_AddSP_sp) {
4209 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4210 pn_ia32_SubSP_stack);
4211 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4213 } else if (proj == pn_be_AddSP_res) {
4214 return new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4215 pn_ia32_SubSP_addr);
4216 } else if (proj == pn_be_AddSP_M) {
4217 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_SubSP_M);
4220 panic("No idea how to transform proj->AddSP");
4224 * Transform the Projs of a SubSP.
4226 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4228 ir_node *block = be_transform_node(get_nodes_block(node));
4229 ir_node *pred = get_Proj_pred(node);
4230 ir_node *new_pred = be_transform_node(pred);
4231 dbg_info *dbgi = get_irn_dbg_info(node);
4232 long proj = get_Proj_proj(node);
4234 if (proj == pn_be_SubSP_sp) {
4235 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4236 pn_ia32_AddSP_stack);
4237 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4239 } else if (proj == pn_be_SubSP_M) {
4240 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_AddSP_M);
4243 panic("No idea how to transform proj->SubSP");
4247 * Transform and renumber the Projs from a Load.
4249 static ir_node *gen_Proj_Load(ir_node *node)
4252 ir_node *block = be_transform_node(get_nodes_block(node));
4253 ir_node *pred = get_Proj_pred(node);
4254 dbg_info *dbgi = get_irn_dbg_info(node);
4255 long proj = get_Proj_proj(node);
4257 /* loads might be part of source address mode matches, so we don't
4258 * transform the ProjMs yet (with the exception of loads whose result is
4261 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4263 ir_node *old_block = get_nodes_block(node);
4265 /* this is needed, because sometimes we have loops that are only
4266 reachable through the ProjM */
4267 be_enqueue_preds(node);
4268 /* do it in 2 steps, to silence firm verifier */
4269 res = new_rd_Proj(dbgi, old_block, pred, mode_M, pn_Load_M);
4270 set_Proj_proj(res, pn_ia32_mem);
4274 /* renumber the proj */
4275 new_pred = be_transform_node(pred);
4276 if (is_ia32_Load(new_pred)) {
4279 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Load_res);
4281 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Load_M);
4282 case pn_Load_X_regular:
4283 return new_rd_Jmp(dbgi, block);
4284 case pn_Load_X_except:
4285 /* This Load might raise an exception. Mark it. */
4286 set_ia32_exc_label(new_pred, 1);
4287 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4291 } else if (is_ia32_Conv_I2I(new_pred) ||
4292 is_ia32_Conv_I2I8Bit(new_pred)) {
4293 set_irn_mode(new_pred, mode_T);
4294 if (proj == pn_Load_res) {
4295 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_res);
4296 } else if (proj == pn_Load_M) {
4297 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_mem);
4299 } else if (is_ia32_xLoad(new_pred)) {
4302 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4304 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xLoad_M);
4305 case pn_Load_X_regular:
4306 return new_rd_Jmp(dbgi, block);
4307 case pn_Load_X_except:
4308 /* This Load might raise an exception. Mark it. */
4309 set_ia32_exc_label(new_pred, 1);
4310 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4314 } else if (is_ia32_vfld(new_pred)) {
4317 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4319 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfld_M);
4320 case pn_Load_X_regular:
4321 return new_rd_Jmp(dbgi, block);
4322 case pn_Load_X_except:
4323 /* This Load might raise an exception. Mark it. */
4324 set_ia32_exc_label(new_pred, 1);
4325 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
4330 /* can happen for ProJMs when source address mode happened for the
4333 /* however it should not be the result proj, as that would mean the
4334 load had multiple users and should not have been used for
4336 if (proj != pn_Load_M) {
4337 panic("internal error: transformed node not a Load");
4339 return new_rd_Proj(dbgi, block, new_pred, mode_M, 1);
4342 panic("No idea how to transform proj");
4346 * Transform and renumber the Projs from a DivMod like instruction.
4348 static ir_node *gen_Proj_DivMod(ir_node *node)
4350 ir_node *block = be_transform_node(get_nodes_block(node));
4351 ir_node *pred = get_Proj_pred(node);
4352 ir_node *new_pred = be_transform_node(pred);
4353 dbg_info *dbgi = get_irn_dbg_info(node);
4354 long proj = get_Proj_proj(node);
4356 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4358 switch (get_irn_opcode(pred)) {
4362 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4364 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4365 case pn_Div_X_regular:
4366 return new_rd_Jmp(dbgi, block);
4367 case pn_Div_X_except:
4368 set_ia32_exc_label(new_pred, 1);
4369 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4377 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4379 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4380 case pn_Mod_X_except:
4381 set_ia32_exc_label(new_pred, 1);
4382 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4390 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4391 case pn_DivMod_res_div:
4392 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4393 case pn_DivMod_res_mod:
4394 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4395 case pn_DivMod_X_regular:
4396 return new_rd_Jmp(dbgi, block);
4397 case pn_DivMod_X_except:
4398 set_ia32_exc_label(new_pred, 1);
4399 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4408 panic("No idea how to transform proj->DivMod");
4412 * Transform and renumber the Projs from a CopyB.
4414 static ir_node *gen_Proj_CopyB(ir_node *node)
4416 ir_node *block = be_transform_node(get_nodes_block(node));
4417 ir_node *pred = get_Proj_pred(node);
4418 ir_node *new_pred = be_transform_node(pred);
4419 dbg_info *dbgi = get_irn_dbg_info(node);
4420 long proj = get_Proj_proj(node);
4423 case pn_CopyB_M_regular:
4424 if (is_ia32_CopyB_i(new_pred)) {
4425 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4426 } else if (is_ia32_CopyB(new_pred)) {
4427 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_M);
4434 panic("No idea how to transform proj->CopyB");
4438 * Transform and renumber the Projs from a Quot.
4440 static ir_node *gen_Proj_Quot(ir_node *node)
4442 ir_node *block = be_transform_node(get_nodes_block(node));
4443 ir_node *pred = get_Proj_pred(node);
4444 ir_node *new_pred = be_transform_node(pred);
4445 dbg_info *dbgi = get_irn_dbg_info(node);
4446 long proj = get_Proj_proj(node);
4450 if (is_ia32_xDiv(new_pred)) {
4451 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xDiv_M);
4452 } else if (is_ia32_vfdiv(new_pred)) {
4453 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4457 if (is_ia32_xDiv(new_pred)) {
4458 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4459 } else if (is_ia32_vfdiv(new_pred)) {
4460 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4463 case pn_Quot_X_regular:
4464 case pn_Quot_X_except:
4469 panic("No idea how to transform proj->Quot");
4472 static ir_node *gen_be_Call(ir_node *node)
4474 dbg_info *const dbgi = get_irn_dbg_info(node);
4475 ir_node *const src_block = get_nodes_block(node);
4476 ir_node *const block = be_transform_node(src_block);
4477 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4478 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4479 ir_node *const sp = be_transform_node(src_sp);
4480 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4481 ia32_address_mode_t am;
4482 ia32_address_t *const addr = &am.addr;
4487 ir_node * eax = noreg_GP;
4488 ir_node * ecx = noreg_GP;
4489 ir_node * edx = noreg_GP;
4490 unsigned const pop = be_Call_get_pop(node);
4491 ir_type *const call_tp = be_Call_get_type(node);
4492 int old_no_pic_adjust;
4494 /* Run the x87 simulator if the call returns a float value */
4495 if (get_method_n_ress(call_tp) > 0) {
4496 ir_type *const res_type = get_method_res_type(call_tp, 0);
4497 ir_mode *const res_mode = get_type_mode(res_type);
4499 if (res_mode != NULL && mode_is_float(res_mode)) {
4500 env_cg->do_x87_sim = 1;
4504 /* We do not want be_Call direct calls */
4505 assert(be_Call_get_entity(node) == NULL);
4507 /* special case for PIC trampoline calls */
4508 old_no_pic_adjust = no_pic_adjust;
4509 no_pic_adjust = env_cg->birg->main_env->options->pic;
4511 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4512 match_am | match_immediate);
4514 no_pic_adjust = old_no_pic_adjust;
4516 i = get_irn_arity(node) - 1;
4517 fpcw = be_transform_node(get_irn_n(node, i--));
4518 for (; i >= be_pos_Call_first_arg; --i) {
4519 arch_register_req_t const *const req = arch_get_register_req(node, i);
4520 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4522 assert(req->type == arch_register_req_type_limited);
4523 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4525 switch (*req->limited) {
4526 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4527 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4528 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4529 default: panic("Invalid GP register for register parameter");
4533 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4534 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4535 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4536 set_am_attributes(call, &am);
4537 call = fix_mem_proj(call, &am);
4539 if (get_irn_pinned(node) == op_pin_state_pinned)
4540 set_irn_pinned(call, op_pin_state_pinned);
4542 SET_IA32_ORIG_NODE(call, node);
4544 if (ia32_cg_config.use_sse2) {
4545 /* remember this call for post-processing */
4546 ARR_APP1(ir_node *, call_list, call);
4547 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4554 * Transform Builtin trap
4556 static ir_node *gen_trap(ir_node *node) {
4557 dbg_info *dbgi = get_irn_dbg_info(node);
4558 ir_node *block = be_transform_node(get_nodes_block(node));
4559 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4561 return new_bd_ia32_UD2(dbgi, block, mem);
4565 * Transform Builtin debugbreak
4567 static ir_node *gen_debugbreak(ir_node *node) {
4568 dbg_info *dbgi = get_irn_dbg_info(node);
4569 ir_node *block = be_transform_node(get_nodes_block(node));
4570 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4572 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4576 * Transform Builtin return_address
4578 static ir_node *gen_return_address(ir_node *node) {
4579 ir_node *param = get_Builtin_param(node, 0);
4580 ir_node *frame = get_Builtin_param(node, 1);
4581 dbg_info *dbgi = get_irn_dbg_info(node);
4582 tarval *tv = get_Const_tarval(param);
4583 unsigned long value = get_tarval_long(tv);
4585 ir_node *block = be_transform_node(get_nodes_block(node));
4586 ir_node *ptr = be_transform_node(frame);
4590 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4591 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4592 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4595 /* load the return address from this frame */
4596 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4598 set_irn_pinned(load, get_irn_pinned(node));
4599 set_ia32_op_type(load, ia32_AddrModeS);
4600 set_ia32_ls_mode(load, mode_Iu);
4602 set_ia32_am_offs_int(load, 0);
4603 set_ia32_use_frame(load);
4604 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4606 if (get_irn_pinned(node) == op_pin_state_floats) {
4607 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4608 && pn_ia32_vfld_res == pn_ia32_Load_res
4609 && pn_ia32_Load_res == pn_ia32_res);
4610 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4613 SET_IA32_ORIG_NODE(load, node);
4614 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4618 * Transform Builtin frame_address
4620 static ir_node *gen_frame_address(ir_node *node) {
4621 ir_node *param = get_Builtin_param(node, 0);
4622 ir_node *frame = get_Builtin_param(node, 1);
4623 dbg_info *dbgi = get_irn_dbg_info(node);
4624 tarval *tv = get_Const_tarval(param);
4625 unsigned long value = get_tarval_long(tv);
4627 ir_node *block = be_transform_node(get_nodes_block(node));
4628 ir_node *ptr = be_transform_node(frame);
4633 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4634 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4635 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4638 /* load the frame address from this frame */
4639 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4641 set_irn_pinned(load, get_irn_pinned(node));
4642 set_ia32_op_type(load, ia32_AddrModeS);
4643 set_ia32_ls_mode(load, mode_Iu);
4645 ent = ia32_get_frame_address_entity();
4647 set_ia32_am_offs_int(load, 0);
4648 set_ia32_use_frame(load);
4649 set_ia32_frame_ent(load, ent);
4651 /* will fail anyway, but gcc does this: */
4652 set_ia32_am_offs_int(load, 0);
4655 if (get_irn_pinned(node) == op_pin_state_floats) {
4656 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4657 && pn_ia32_vfld_res == pn_ia32_Load_res
4658 && pn_ia32_Load_res == pn_ia32_res);
4659 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4662 SET_IA32_ORIG_NODE(load, node);
4663 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4667 * Transform Builtin frame_address
4669 static ir_node *gen_prefetch(ir_node *node) {
4671 ir_node *ptr, *block, *mem, *base, *index;
4672 ir_node *param, *new_node;
4675 ia32_address_t addr;
4677 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4678 /* no prefetch at all, route memory */
4679 return be_transform_node(get_Builtin_mem(node));
4682 param = get_Builtin_param(node, 1);
4683 tv = get_Const_tarval(param);
4684 rw = get_tarval_long(tv);
4686 /* construct load address */
4687 memset(&addr, 0, sizeof(addr));
4688 ptr = get_Builtin_param(node, 0);
4689 ia32_create_address_mode(&addr, ptr, 0);
4696 base = be_transform_node(base);
4699 if (index == NULL) {
4702 index = be_transform_node(index);
4705 dbgi = get_irn_dbg_info(node);
4706 block = be_transform_node(get_nodes_block(node));
4707 mem = be_transform_node(get_Builtin_mem(node));
4709 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4710 /* we have 3DNow!, this was already checked above */
4711 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4712 } else if (ia32_cg_config.use_sse_prefetch) {
4713 /* note: rw == 1 is IGNORED in that case */
4714 param = get_Builtin_param(node, 2);
4715 tv = get_Const_tarval(param);
4716 locality = get_tarval_long(tv);
4718 /* SSE style prefetch */
4721 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4724 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4727 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4730 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4734 assert(ia32_cg_config.use_3dnow_prefetch);
4735 /* 3DNow! style prefetch */
4736 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4739 set_irn_pinned(new_node, get_irn_pinned(node));
4740 set_ia32_op_type(new_node, ia32_AddrModeS);
4741 set_ia32_ls_mode(new_node, mode_Bu);
4742 set_address(new_node, &addr);
4744 SET_IA32_ORIG_NODE(new_node, node);
4746 be_dep_on_frame(new_node);
4747 return new_r_Proj(block, new_node, mode_M, pn_ia32_Prefetch_M);
4751 * Transform bsf like node
4753 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4755 ir_node *param = get_Builtin_param(node, 0);
4756 dbg_info *dbgi = get_irn_dbg_info(node);
4758 ir_node *block = get_nodes_block(node);
4759 ir_node *new_block = be_transform_node(block);
4761 ia32_address_mode_t am;
4762 ia32_address_t *addr = &am.addr;
4765 match_arguments(&am, block, NULL, param, NULL, match_am);
4767 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4768 set_am_attributes(cnt, &am);
4769 set_ia32_ls_mode(cnt, get_irn_mode(param));
4771 SET_IA32_ORIG_NODE(cnt, node);
4772 return fix_mem_proj(cnt, &am);
4776 * Transform builtin ffs.
4778 static ir_node *gen_ffs(ir_node *node)
4780 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4781 ir_node *real = skip_Proj(bsf);
4782 dbg_info *dbgi = get_irn_dbg_info(real);
4783 ir_node *block = get_nodes_block(real);
4784 ir_node *flag, *set, *conv, *neg, *or;
4787 if (get_irn_mode(real) != mode_T) {
4788 set_irn_mode(real, mode_T);
4789 bsf = new_r_Proj(block, real, mode_Iu, pn_ia32_res);
4792 flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
4795 set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
4796 SET_IA32_ORIG_NODE(set, node);
4799 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4800 SET_IA32_ORIG_NODE(conv, node);
4803 neg = new_bd_ia32_Neg(dbgi, block, conv);
4806 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4807 set_ia32_commutative(or);
4810 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4814 * Transform builtin clz.
4816 static ir_node *gen_clz(ir_node *node)
4818 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4819 ir_node *real = skip_Proj(bsr);
4820 dbg_info *dbgi = get_irn_dbg_info(real);
4821 ir_node *block = get_nodes_block(real);
4822 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4824 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4828 * Transform builtin ctz.
4830 static ir_node *gen_ctz(ir_node *node)
4832 return gen_unop_AM(node, new_bd_ia32_Bsf);
4836 * Transform builtin parity.
4838 static ir_node *gen_parity(ir_node *node)
4840 ir_node *param = get_Builtin_param(node, 0);
4841 dbg_info *dbgi = get_irn_dbg_info(node);
4843 ir_node *block = get_nodes_block(node);
4845 ir_node *new_block = be_transform_node(block);
4846 ir_node *imm, *cmp, *new_node;
4848 ia32_address_mode_t am;
4849 ia32_address_t *addr = &am.addr;
4853 match_arguments(&am, block, NULL, param, NULL, match_am);
4854 imm = ia32_create_Immediate(NULL, 0, 0);
4855 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4856 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4857 set_am_attributes(cmp, &am);
4858 set_ia32_ls_mode(cmp, mode_Iu);
4860 SET_IA32_ORIG_NODE(cmp, node);
4862 cmp = fix_mem_proj(cmp, &am);
4865 new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
4866 SET_IA32_ORIG_NODE(new_node, node);
4869 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4870 nomem, new_node, mode_Bu);
4871 SET_IA32_ORIG_NODE(new_node, node);
4876 * Transform builtin popcount
4878 static ir_node *gen_popcount(ir_node *node) {
4879 ir_node *param = get_Builtin_param(node, 0);
4880 dbg_info *dbgi = get_irn_dbg_info(node);
4882 ir_node *block = get_nodes_block(node);
4883 ir_node *new_block = be_transform_node(block);
4886 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4888 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4889 if (ia32_cg_config.use_popcnt) {
4890 ia32_address_mode_t am;
4891 ia32_address_t *addr = &am.addr;
4894 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4896 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4897 set_am_attributes(cnt, &am);
4898 set_ia32_ls_mode(cnt, get_irn_mode(param));
4900 SET_IA32_ORIG_NODE(cnt, node);
4901 return fix_mem_proj(cnt, &am);
4904 new_param = be_transform_node(param);
4906 /* do the standard popcount algo */
4908 /* m1 = x & 0x55555555 */
4909 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4910 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4913 simm = ia32_create_Immediate(NULL, 0, 1);
4914 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4916 /* m2 = s1 & 0x55555555 */
4917 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4920 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4922 /* m4 = m3 & 0x33333333 */
4923 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4924 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4927 simm = ia32_create_Immediate(NULL, 0, 2);
4928 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4930 /* m5 = s2 & 0x33333333 */
4931 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4934 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4936 /* m7 = m6 & 0x0F0F0F0F */
4937 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4938 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4941 simm = ia32_create_Immediate(NULL, 0, 4);
4942 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4944 /* m8 = s3 & 0x0F0F0F0F */
4945 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4948 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4950 /* m10 = m9 & 0x00FF00FF */
4951 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4952 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4955 simm = ia32_create_Immediate(NULL, 0, 8);
4956 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4958 /* m11 = s4 & 0x00FF00FF */
4959 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4961 /* m12 = m10 + m11 */
4962 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4964 /* m13 = m12 & 0x0000FFFF */
4965 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4966 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4968 /* s5 = m12 >> 16 */
4969 simm = ia32_create_Immediate(NULL, 0, 16);
4970 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4972 /* res = m13 + s5 */
4973 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4977 * Transform builtin byte swap.
4979 static ir_node *gen_bswap(ir_node *node) {
4980 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4981 dbg_info *dbgi = get_irn_dbg_info(node);
4983 ir_node *block = get_nodes_block(node);
4984 ir_node *new_block = be_transform_node(block);
4985 ir_mode *mode = get_irn_mode(param);
4986 unsigned size = get_mode_size_bits(mode);
4987 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
4991 if (ia32_cg_config.use_i486) {
4992 /* swap available */
4993 return new_bd_ia32_Bswap(dbgi, new_block, param);
4995 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
4996 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
4998 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
4999 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5001 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5003 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5004 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5006 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5007 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5010 /* swap16 always available */
5011 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5014 panic("Invalid bswap size (%d)", size);
5019 * Transform builtin outport.
5021 static ir_node *gen_outport(ir_node *node) {
5022 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5023 ir_node *oldv = get_Builtin_param(node, 1);
5024 ir_mode *mode = get_irn_mode(oldv);
5025 ir_node *value = be_transform_node(oldv);
5026 ir_node *block = be_transform_node(get_nodes_block(node));
5027 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5028 dbg_info *dbgi = get_irn_dbg_info(node);
5030 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5031 set_ia32_ls_mode(res, mode);
5036 * Transform builtin inport.
5038 static ir_node *gen_inport(ir_node *node) {
5039 ir_type *tp = get_Builtin_type(node);
5040 ir_type *rstp = get_method_res_type(tp, 0);
5041 ir_mode *mode = get_type_mode(rstp);
5042 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5043 ir_node *block = be_transform_node(get_nodes_block(node));
5044 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5045 dbg_info *dbgi = get_irn_dbg_info(node);
5047 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5048 set_ia32_ls_mode(res, mode);
5050 /* check for missing Result Proj */
5055 * Transform a builtin inner trampoline
5057 static ir_node *gen_inner_trampoline(ir_node *node) {
5058 ir_node *ptr = get_Builtin_param(node, 0);
5059 ir_node *callee = get_Builtin_param(node, 1);
5060 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5061 ir_node *mem = get_Builtin_mem(node);
5062 ir_node *block = get_nodes_block(node);
5063 ir_node *new_block = be_transform_node(block);
5067 ir_node *trampoline;
5069 dbg_info *dbgi = get_irn_dbg_info(node);
5070 ia32_address_t addr;
5072 /* construct store address */
5073 memset(&addr, 0, sizeof(addr));
5074 ia32_create_address_mode(&addr, ptr, 0);
5076 if (addr.base == NULL) {
5077 addr.base = noreg_GP;
5079 addr.base = be_transform_node(addr.base);
5082 if (addr.index == NULL) {
5083 addr.index = noreg_GP;
5085 addr.index = be_transform_node(addr.index);
5087 addr.mem = be_transform_node(mem);
5089 /* mov ecx, <env> */
5090 val = ia32_create_Immediate(NULL, 0, 0xB9);
5091 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5092 addr.index, addr.mem, val);
5093 set_irn_pinned(store, get_irn_pinned(node));
5094 set_ia32_op_type(store, ia32_AddrModeD);
5095 set_ia32_ls_mode(store, mode_Bu);
5096 set_address(store, &addr);
5100 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5101 addr.index, addr.mem, env);
5102 set_irn_pinned(store, get_irn_pinned(node));
5103 set_ia32_op_type(store, ia32_AddrModeD);
5104 set_ia32_ls_mode(store, mode_Iu);
5105 set_address(store, &addr);
5109 /* jmp rel <callee> */
5110 val = ia32_create_Immediate(NULL, 0, 0xE9);
5111 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5112 addr.index, addr.mem, val);
5113 set_irn_pinned(store, get_irn_pinned(node));
5114 set_ia32_op_type(store, ia32_AddrModeD);
5115 set_ia32_ls_mode(store, mode_Bu);
5116 set_address(store, &addr);
5120 trampoline = be_transform_node(ptr);
5122 /* the callee is typically an immediate */
5123 if (is_SymConst(callee)) {
5124 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5126 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5128 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5130 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5131 addr.index, addr.mem, rel);
5132 set_irn_pinned(store, get_irn_pinned(node));
5133 set_ia32_op_type(store, ia32_AddrModeD);
5134 set_ia32_ls_mode(store, mode_Iu);
5135 set_address(store, &addr);
5140 return new_r_Tuple(new_block, 2, in);
5144 * Transform Builtin node.
5146 static ir_node *gen_Builtin(ir_node *node) {
5147 ir_builtin_kind kind = get_Builtin_kind(node);
5151 return gen_trap(node);
5152 case ir_bk_debugbreak:
5153 return gen_debugbreak(node);
5154 case ir_bk_return_address:
5155 return gen_return_address(node);
5156 case ir_bk_frame_address:
5157 return gen_frame_address(node);
5158 case ir_bk_prefetch:
5159 return gen_prefetch(node);
5161 return gen_ffs(node);
5163 return gen_clz(node);
5165 return gen_ctz(node);
5167 return gen_parity(node);
5168 case ir_bk_popcount:
5169 return gen_popcount(node);
5171 return gen_bswap(node);
5173 return gen_outport(node);
5175 return gen_inport(node);
5176 case ir_bk_inner_trampoline:
5177 return gen_inner_trampoline(node);
5179 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5183 * Transform Proj(Builtin) node.
5185 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5186 ir_node *node = get_Proj_pred(proj);
5187 ir_node *new_node = be_transform_node(node);
5188 ir_builtin_kind kind = get_Builtin_kind(node);
5191 case ir_bk_return_address:
5192 case ir_bk_frame_address:
5197 case ir_bk_popcount:
5199 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5202 case ir_bk_debugbreak:
5203 case ir_bk_prefetch:
5205 assert(get_Proj_proj(proj) == pn_Builtin_M);
5208 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5209 return new_r_Proj(get_nodes_block(new_node),
5210 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5212 assert(get_Proj_proj(proj) == pn_Builtin_M);
5213 return new_r_Proj(get_nodes_block(new_node),
5214 new_node, mode_M, pn_ia32_Inport_M);
5216 case ir_bk_inner_trampoline:
5217 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5218 return get_Tuple_pred(new_node, 1);
5220 assert(get_Proj_proj(proj) == pn_Builtin_M);
5221 return get_Tuple_pred(new_node, 0);
5224 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5227 static ir_node *gen_be_IncSP(ir_node *node)
5229 ir_node *res = be_duplicate_node(node);
5230 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5236 * Transform the Projs from a be_Call.
5238 static ir_node *gen_Proj_be_Call(ir_node *node)
5240 ir_node *block = be_transform_node(get_nodes_block(node));
5241 ir_node *call = get_Proj_pred(node);
5242 ir_node *new_call = be_transform_node(call);
5243 dbg_info *dbgi = get_irn_dbg_info(node);
5244 long proj = get_Proj_proj(node);
5245 ir_mode *mode = get_irn_mode(node);
5248 if (proj == pn_be_Call_M_regular) {
5249 return new_rd_Proj(dbgi, block, new_call, mode_M, n_ia32_Call_mem);
5251 /* transform call modes */
5252 if (mode_is_data(mode)) {
5253 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5257 /* Map from be_Call to ia32_Call proj number */
5258 if (proj == pn_be_Call_sp) {
5259 proj = pn_ia32_Call_stack;
5260 } else if (proj == pn_be_Call_M_regular) {
5261 proj = pn_ia32_Call_M;
5263 arch_register_req_t const *const req = arch_get_register_req_out(node);
5264 int const n_outs = arch_irn_get_n_outs(new_call);
5267 assert(proj >= pn_be_Call_first_res);
5268 assert(req->type & arch_register_req_type_limited);
5270 for (i = 0; i < n_outs; ++i) {
5271 arch_register_req_t const *const new_req
5272 = arch_get_out_register_req(new_call, i);
5274 if (!(new_req->type & arch_register_req_type_limited) ||
5275 new_req->cls != req->cls ||
5276 *new_req->limited != *req->limited)
5285 res = new_rd_Proj(dbgi, block, new_call, mode, proj);
5287 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5289 case pn_ia32_Call_stack:
5290 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5293 case pn_ia32_Call_fpcw:
5294 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5302 * Transform the Projs from a Cmp.
5304 static ir_node *gen_Proj_Cmp(ir_node *node)
5306 /* this probably means not all mode_b nodes were lowered... */
5307 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5312 * Transform the Projs from a Bound.
5314 static ir_node *gen_Proj_Bound(ir_node *node)
5316 ir_node *new_node, *block;
5317 ir_node *pred = get_Proj_pred(node);
5319 switch (get_Proj_proj(node)) {
5321 return be_transform_node(get_Bound_mem(pred));
5322 case pn_Bound_X_regular:
5323 new_node = be_transform_node(pred);
5324 block = get_nodes_block(new_node);
5325 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_true);
5326 case pn_Bound_X_except:
5327 new_node = be_transform_node(pred);
5328 block = get_nodes_block(new_node);
5329 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_false);
5331 return be_transform_node(get_Bound_index(pred));
5333 panic("unsupported Proj from Bound");
5337 static ir_node *gen_Proj_ASM(ir_node *node)
5339 ir_mode *mode = get_irn_mode(node);
5340 ir_node *pred = get_Proj_pred(node);
5341 ir_node *new_pred = be_transform_node(pred);
5342 ir_node *block = get_nodes_block(new_pred);
5343 long pos = get_Proj_proj(node);
5345 if (mode == mode_M) {
5346 pos = arch_irn_get_n_outs(new_pred)-1;
5347 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5349 } else if (mode_is_float(mode)) {
5352 panic("unexpected proj mode at ASM");
5355 return new_r_Proj(block, new_pred, mode, pos);
5359 * Transform and potentially renumber Proj nodes.
5361 static ir_node *gen_Proj(ir_node *node)
5363 ir_node *pred = get_Proj_pred(node);
5366 switch (get_irn_opcode(pred)) {
5368 proj = get_Proj_proj(node);
5369 if (proj == pn_Store_M) {
5370 return be_transform_node(pred);
5372 panic("No idea how to transform proj->Store");
5375 return gen_Proj_Load(node);
5377 return gen_Proj_ASM(node);
5379 return gen_Proj_Builtin(node);
5383 return gen_Proj_DivMod(node);
5385 return gen_Proj_CopyB(node);
5387 return gen_Proj_Quot(node);
5389 return gen_Proj_be_SubSP(node);
5391 return gen_Proj_be_AddSP(node);
5393 return gen_Proj_be_Call(node);
5395 return gen_Proj_Cmp(node);
5397 return gen_Proj_Bound(node);
5399 proj = get_Proj_proj(node);
5401 case pn_Start_X_initial_exec: {
5402 ir_node *block = get_nodes_block(pred);
5403 ir_node *new_block = be_transform_node(block);
5404 dbg_info *dbgi = get_irn_dbg_info(node);
5405 /* we exchange the ProjX with a jump */
5406 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5411 case pn_Start_P_tls:
5412 return gen_Proj_tls(node);
5417 if (is_ia32_l_FloattoLL(pred)) {
5418 return gen_Proj_l_FloattoLL(node);
5420 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5424 ir_mode *mode = get_irn_mode(node);
5425 if (ia32_mode_needs_gp_reg(mode)) {
5426 ir_node *new_pred = be_transform_node(pred);
5427 ir_node *block = be_transform_node(get_nodes_block(node));
5428 ir_node *new_proj = new_r_Proj(block, new_pred,
5429 mode_Iu, get_Proj_proj(node));
5430 new_proj->node_nr = node->node_nr;
5435 return be_duplicate_node(node);
5439 * Enters all transform functions into the generic pointer
5441 static void register_transformers(void)
5443 /* first clear the generic function pointer for all ops */
5444 clear_irp_opcodes_generic_func();
5446 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5447 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5487 /* transform ops from intrinsic lowering */
5499 GEN(ia32_l_LLtoFloat);
5500 GEN(ia32_l_FloattoLL);
5506 /* we should never see these nodes */
5521 /* handle builtins */
5524 /* handle generic backend nodes */
5538 * Pre-transform all unknown and noreg nodes.
5540 static void ia32_pretransform_node(void)
5542 ia32_code_gen_t *cg = env_cg;
5544 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5545 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5546 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5547 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5548 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5549 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5551 nomem = get_irg_no_mem(current_ir_graph);
5552 noreg_GP = ia32_new_NoReg_gp(cg);
5558 * Walker, checks if all ia32 nodes producing more than one result have their
5559 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5561 static void add_missing_keep_walker(ir_node *node, void *data)
5564 unsigned found_projs = 0;
5565 const ir_edge_t *edge;
5566 ir_mode *mode = get_irn_mode(node);
5571 if (!is_ia32_irn(node))
5574 n_outs = arch_irn_get_n_outs(node);
5577 if (is_ia32_SwitchJmp(node))
5580 assert(n_outs < (int) sizeof(unsigned) * 8);
5581 foreach_out_edge(node, edge) {
5582 ir_node *proj = get_edge_src_irn(edge);
5585 /* The node could be kept */
5589 if (get_irn_mode(proj) == mode_M)
5592 pn = get_Proj_proj(proj);
5593 assert(pn < n_outs);
5594 found_projs |= 1 << pn;
5598 /* are keeps missing? */
5600 for (i = 0; i < n_outs; ++i) {
5603 const arch_register_req_t *req;
5604 const arch_register_class_t *cls;
5606 if (found_projs & (1 << i)) {
5610 req = arch_get_out_register_req(node, i);
5615 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5619 block = get_nodes_block(node);
5620 in[0] = new_r_Proj(block, node, arch_register_class_mode(cls), i);
5621 if (last_keep != NULL) {
5622 be_Keep_add_node(last_keep, cls, in[0]);
5624 last_keep = be_new_Keep(block, 1, in);
5625 if (sched_is_scheduled(node)) {
5626 sched_add_after(node, last_keep);
5633 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5636 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5638 ir_graph *irg = be_get_birg_irg(cg->birg);
5639 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5643 * Post-process all calls if we are in SSE mode.
5644 * The ABI requires that the results are in st0, copy them
5645 * to a xmm register.
5647 static void postprocess_fp_call_results(void) {
5650 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5651 ir_node *call = call_list[i];
5652 ir_type *mtp = call_types[i];
5655 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5656 ir_type *res_tp = get_method_res_type(mtp, j);
5657 ir_node *res, *new_res;
5658 const ir_edge_t *edge, *next;
5661 if (! is_atomic_type(res_tp)) {
5662 /* no floating point return */
5665 mode = get_type_mode(res_tp);
5666 if (! mode_is_float(mode)) {
5667 /* no floating point return */
5671 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5674 /* now patch the users */
5675 foreach_out_edge_safe(res, edge, next) {
5676 ir_node *succ = get_edge_src_irn(edge);
5679 if (be_is_Keep(succ))
5682 if (is_ia32_xStore(succ)) {
5683 /* an xStore can be patched into an vfst */
5684 dbg_info *db = get_irn_dbg_info(succ);
5685 ir_node *block = get_nodes_block(succ);
5686 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5687 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5688 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5689 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5690 ir_mode *mode = get_ia32_ls_mode(succ);
5692 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5693 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5694 if (is_ia32_use_frame(succ))
5695 set_ia32_use_frame(st);
5696 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5697 set_irn_pinned(st, get_irn_pinned(succ));
5698 set_ia32_op_type(st, ia32_AddrModeD);
5702 if (new_res == NULL) {
5703 dbg_info *db = get_irn_dbg_info(call);
5704 ir_node *block = get_nodes_block(call);
5705 ir_node *frame = get_irg_frame(current_ir_graph);
5706 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5707 ir_node *call_mem = new_r_Proj(block, call, mode_M, pn_ia32_Call_M);
5708 ir_node *vfst, *xld, *new_mem;
5710 /* store st(0) on stack */
5711 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5712 set_ia32_op_type(vfst, ia32_AddrModeD);
5713 set_ia32_use_frame(vfst);
5715 /* load into SSE register */
5716 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5717 set_ia32_op_type(xld, ia32_AddrModeS);
5718 set_ia32_use_frame(xld);
5720 new_res = new_r_Proj(block, xld, mode, pn_ia32_xLoad_res);
5721 new_mem = new_r_Proj(block, xld, mode_M, pn_ia32_xLoad_M);
5723 if (old_mem != NULL) {
5724 edges_reroute(old_mem, new_mem, current_ir_graph);
5728 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5735 /* do the transformation */
5736 void ia32_transform_graph(ia32_code_gen_t *cg)
5740 register_transformers();
5742 initial_fpcw = NULL;
5745 be_timer_push(T_HEIGHTS);
5746 heights = heights_new(cg->irg);
5747 be_timer_pop(T_HEIGHTS);
5748 ia32_calculate_non_address_mode_nodes(cg->birg);
5750 /* the transform phase is not safe for CSE (yet) because several nodes get
5751 * attributes set after their creation */
5752 cse_last = get_opt_cse();
5755 call_list = NEW_ARR_F(ir_node *, 0);
5756 call_types = NEW_ARR_F(ir_type *, 0);
5757 be_transform_graph(cg->birg, ia32_pretransform_node);
5759 if (ia32_cg_config.use_sse2)
5760 postprocess_fp_call_results();
5761 DEL_ARR_F(call_types);
5762 DEL_ARR_F(call_list);
5764 set_opt_cse(cse_last);
5766 ia32_free_non_address_mode_nodes();
5767 heights_free(heights);
5771 void ia32_init_transform(void)
5773 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");