2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode.h"
52 #include "../besched.h"
54 #include "../beutil.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 /* define this to construct SSE constants instead of load them */
74 #undef CONSTRUCT_SSE_CONST
77 #define SFP_SIGN "0x80000000"
78 #define DFP_SIGN "0x8000000000000000"
79 #define SFP_ABS "0x7FFFFFFF"
80 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
81 #define DFP_INTMAX "9223372036854775807"
82 #define ULL_BIAS "18446744073709551616"
84 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
85 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
86 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
87 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
88 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
90 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
91 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
93 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
95 static ir_node *initial_fpcw = NULL;
98 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
99 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
102 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
103 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
106 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
107 ir_node *op1, ir_node *op2);
109 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
112 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem);
115 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
116 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
119 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
121 static ir_node *create_immediate_or_transform(ir_node *node,
122 char immediate_constraint_type);
124 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
125 dbg_info *dbgi, ir_node *block,
126 ir_node *op, ir_node *orig_node);
128 /* its enough to have those once */
129 static ir_node *nomem, *noreg_GP;
131 /** a list to postprocess all calls */
132 static ir_node **call_list;
133 static ir_type **call_types;
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node)
138 return is_Const(node) && is_Const_null(node);
141 /** Return non-zero is a node represents the 1 constant. */
142 static bool is_Const_1(ir_node *node)
144 return is_Const(node) && is_Const_one(node);
147 /** Return non-zero is a node represents the -1 constant. */
148 static bool is_Const_Minus_1(ir_node *node)
150 return is_Const(node) && is_Const_all_one(node);
154 * returns true if constant can be created with a simple float command
156 static bool is_simple_x87_Const(ir_node *node)
158 tarval *tv = get_Const_tarval(node);
159 if (tarval_is_null(tv) || tarval_is_one(tv))
162 /* TODO: match all the other float constants */
167 * returns true if constant can be created with a simple float command
169 static bool is_simple_sse_Const(ir_node *node)
171 tarval *tv = get_Const_tarval(node);
172 ir_mode *mode = get_tarval_mode(tv);
177 if (tarval_is_null(tv)
178 #ifdef CONSTRUCT_SSE_CONST
183 #ifdef CONSTRUCT_SSE_CONST
184 if (mode == mode_D) {
185 unsigned val = get_tarval_sub_bits(tv, 0) |
186 (get_tarval_sub_bits(tv, 1) << 8) |
187 (get_tarval_sub_bits(tv, 2) << 16) |
188 (get_tarval_sub_bits(tv, 3) << 24);
190 /* lower 32bit are zero, really a 32bit constant */
193 #endif /* CONSTRUCT_SSE_CONST */
194 /* TODO: match all the other float constants */
199 * Transforms a Const.
201 static ir_node *gen_Const(ir_node *node)
203 ir_node *old_block = get_nodes_block(node);
204 ir_node *block = be_transform_node(old_block);
205 dbg_info *dbgi = get_irn_dbg_info(node);
206 ir_mode *mode = get_irn_mode(node);
208 assert(is_Const(node));
210 if (mode_is_float(mode)) {
215 if (ia32_cg_config.use_sse2) {
216 tarval *tv = get_Const_tarval(node);
217 if (tarval_is_null(tv)) {
218 load = new_bd_ia32_xZero(dbgi, block);
219 set_ia32_ls_mode(load, mode);
221 #ifdef CONSTRUCT_SSE_CONST
222 } else if (tarval_is_one(tv)) {
223 int cnst = mode == mode_F ? 26 : 55;
224 ir_node *imm1 = ia32_create_Immediate(NULL, 0, cnst);
225 ir_node *imm2 = ia32_create_Immediate(NULL, 0, 2);
226 ir_node *pslld, *psrld;
228 load = new_bd_ia32_xAllOnes(dbgi, block);
229 set_ia32_ls_mode(load, mode);
230 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
231 set_ia32_ls_mode(pslld, mode);
232 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
233 set_ia32_ls_mode(psrld, mode);
235 #endif /* CONSTRUCT_SSE_CONST */
236 } else if (mode == mode_F) {
237 /* we can place any 32bit constant by using a movd gp, sse */
238 unsigned val = get_tarval_sub_bits(tv, 0) |
239 (get_tarval_sub_bits(tv, 1) << 8) |
240 (get_tarval_sub_bits(tv, 2) << 16) |
241 (get_tarval_sub_bits(tv, 3) << 24);
242 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
243 load = new_bd_ia32_xMovd(dbgi, block, cnst);
244 set_ia32_ls_mode(load, mode);
247 #ifdef CONSTRUCT_SSE_CONST
248 if (mode == mode_D) {
249 unsigned val = get_tarval_sub_bits(tv, 0) |
250 (get_tarval_sub_bits(tv, 1) << 8) |
251 (get_tarval_sub_bits(tv, 2) << 16) |
252 (get_tarval_sub_bits(tv, 3) << 24);
254 ir_node *imm32 = ia32_create_Immediate(NULL, 0, 32);
255 ir_node *cnst, *psllq;
257 /* fine, lower 32bit are zero, produce 32bit value */
258 val = get_tarval_sub_bits(tv, 4) |
259 (get_tarval_sub_bits(tv, 5) << 8) |
260 (get_tarval_sub_bits(tv, 6) << 16) |
261 (get_tarval_sub_bits(tv, 7) << 24);
262 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
263 load = new_bd_ia32_xMovd(dbgi, block, cnst);
264 set_ia32_ls_mode(load, mode);
265 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
266 set_ia32_ls_mode(psllq, mode);
271 #endif /* CONSTRUCT_SSE_CONST */
272 floatent = create_float_const_entity(node);
274 load = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode);
275 set_ia32_op_type(load, ia32_AddrModeS);
276 set_ia32_am_sc(load, floatent);
277 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
278 res = new_r_Proj(block, load, mode_xmm, pn_ia32_xLoad_res);
281 if (is_Const_null(node)) {
282 load = new_bd_ia32_vfldz(dbgi, block);
284 set_ia32_ls_mode(load, mode);
285 } else if (is_Const_one(node)) {
286 load = new_bd_ia32_vfld1(dbgi, block);
288 set_ia32_ls_mode(load, mode);
292 floatent = create_float_const_entity(node);
293 /* create_float_const_ent is smart and sometimes creates
295 ls_mode = get_type_mode(get_entity_type(floatent));
297 load = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem,
299 set_ia32_op_type(load, ia32_AddrModeS);
300 set_ia32_am_sc(load, floatent);
301 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
302 res = new_r_Proj(block, load, mode_vfp, pn_ia32_vfld_res);
305 #ifdef CONSTRUCT_SSE_CONST
307 #endif /* CONSTRUCT_SSE_CONST */
308 SET_IA32_ORIG_NODE(load, node);
310 be_dep_on_frame(load);
312 } else { /* non-float mode */
314 tarval *tv = get_Const_tarval(node);
317 tv = tarval_convert_to(tv, mode_Iu);
319 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
321 panic("couldn't convert constant tarval (%+F)", node);
323 val = get_tarval_long(tv);
325 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, 0, val);
326 SET_IA32_ORIG_NODE(cnst, node);
328 be_dep_on_frame(cnst);
334 * Transforms a SymConst.
336 static ir_node *gen_SymConst(ir_node *node)
338 ir_node *old_block = get_nodes_block(node);
339 ir_node *block = be_transform_node(old_block);
340 dbg_info *dbgi = get_irn_dbg_info(node);
341 ir_mode *mode = get_irn_mode(node);
344 if (mode_is_float(mode)) {
345 if (ia32_cg_config.use_sse2)
346 cnst = new_bd_ia32_xLoad(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
348 cnst = new_bd_ia32_vfld(dbgi, block, noreg_GP, noreg_GP, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if (get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0, 0);
361 SET_IA32_ORIG_NODE(cnst, node);
363 be_dep_on_frame(cnst);
368 * Create a float type for the given mode and cache it.
370 * @param mode the mode for the float type (might be integer mode for SSE2 types)
371 * @param align alignment
373 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
379 if (mode == mode_Iu) {
380 static ir_type *int_Iu[16] = {NULL, };
382 if (int_Iu[align] == NULL) {
383 snprintf(buf, sizeof(buf), "int_Iu_%u", align);
384 int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
385 /* set the specified alignment */
386 set_type_alignment_bytes(tp, align);
388 return int_Iu[align];
389 } else if (mode == mode_Lu) {
390 static ir_type *int_Lu[16] = {NULL, };
392 if (int_Lu[align] == NULL) {
393 snprintf(buf, sizeof(buf), "int_Lu_%u", align);
394 int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
395 /* set the specified alignment */
396 set_type_alignment_bytes(tp, align);
398 return int_Lu[align];
399 } else if (mode == mode_F) {
400 static ir_type *float_F[16] = {NULL, };
402 if (float_F[align] == NULL) {
403 snprintf(buf, sizeof(buf), "float_F_%u", align);
404 float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
405 /* set the specified alignment */
406 set_type_alignment_bytes(tp, align);
408 return float_F[align];
409 } else if (mode == mode_D) {
410 static ir_type *float_D[16] = {NULL, };
412 if (float_D[align] == NULL) {
413 snprintf(buf, sizeof(buf), "float_D_%u", align);
414 float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
415 /* set the specified alignment */
416 set_type_alignment_bytes(tp, align);
418 return float_D[align];
420 static ir_type *float_E[16] = {NULL, };
422 if (float_E[align] == NULL) {
423 snprintf(buf, sizeof(buf), "float_E_%u", align);
424 float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
425 /* set the specified alignment */
426 set_type_alignment_bytes(tp, align);
428 return float_E[align];
433 * Create a float[2] array type for the given atomic type.
435 * @param tp the atomic type
437 static ir_type *ia32_create_float_array(ir_type *tp) {
439 ir_mode *mode = get_type_mode(tp);
440 unsigned align = get_type_alignment_bytes(tp);
445 if (mode == mode_F) {
446 static ir_type *float_F[16] = {NULL, };
448 if (float_F[align] != NULL)
449 return float_F[align];
450 snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
451 arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
452 } else if (mode == mode_D) {
453 static ir_type *float_D[16] = {NULL, };
455 if (float_D[align] != NULL)
456 return float_D[align];
457 snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
458 arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
460 static ir_type *float_E[16] = {NULL, };
462 if (float_E[align] != NULL)
463 return float_E[align];
464 snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
465 arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
467 set_type_alignment_bytes(arr, align);
468 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
469 set_type_state(arr, layout_fixed);
473 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
474 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
476 static const struct {
477 const char *ent_name;
478 const char *cnst_str;
481 } names [ia32_known_const_max] = {
482 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
483 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
484 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
485 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
486 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
488 static ir_entity *ent_cache[ia32_known_const_max];
490 const char *ent_name, *cnst_str;
496 ent_name = names[kct].ent_name;
497 if (! ent_cache[kct]) {
498 cnst_str = names[kct].cnst_str;
500 switch (names[kct].mode) {
501 case 0: mode = mode_Iu; break;
502 case 1: mode = mode_Lu; break;
503 default: mode = mode_F; break;
505 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
506 tp = ia32_create_float_type(mode, names[kct].align);
508 if (kct == ia32_ULLBIAS)
509 tp = ia32_create_float_array(tp);
510 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
512 set_entity_ld_ident(ent, get_entity_ident(ent));
513 set_entity_visibility(ent, visibility_local);
514 set_entity_variability(ent, variability_constant);
515 set_entity_allocation(ent, allocation_static);
517 if (kct == ia32_ULLBIAS) {
518 ir_initializer_t *initializer = create_initializer_compound(2);
520 set_initializer_compound_value(initializer, 0,
521 create_initializer_tarval(get_tarval_null(mode)));
522 set_initializer_compound_value(initializer, 1,
523 create_initializer_tarval(tv));
525 set_entity_initializer(ent, initializer);
527 set_entity_initializer(ent, create_initializer_tarval(tv));
530 /* cache the entry */
531 ent_cache[kct] = ent;
534 return ent_cache[kct];
538 * return true if the node is a Proj(Load) and could be used in source address
539 * mode for another node. Will return only true if the @p other node is not
540 * dependent on the memory of the Load (for binary operations use the other
541 * input here, for unary operations use NULL).
543 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
544 ir_node *other, ir_node *other2, match_flags_t flags)
549 /* float constants are always available */
550 if (is_Const(node)) {
551 ir_mode *mode = get_irn_mode(node);
552 if (mode_is_float(mode)) {
553 if (ia32_cg_config.use_sse2) {
554 if (is_simple_sse_Const(node))
557 if (is_simple_x87_Const(node))
560 if (get_irn_n_edges(node) > 1)
568 load = get_Proj_pred(node);
569 pn = get_Proj_proj(node);
570 if (!is_Load(load) || pn != pn_Load_res)
572 if (get_nodes_block(load) != block)
574 /* we only use address mode if we're the only user of the load */
575 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
577 /* in some edge cases with address mode we might reach the load normally
578 * and through some AM sequence, if it is already materialized then we
579 * can't create an AM node from it */
580 if (be_is_transformed(node))
583 /* don't do AM if other node inputs depend on the load (via mem-proj) */
584 if (other != NULL && prevents_AM(block, load, other))
587 if (other2 != NULL && prevents_AM(block, load, other2))
593 typedef struct ia32_address_mode_t ia32_address_mode_t;
594 struct ia32_address_mode_t {
599 ia32_op_type_t op_type;
603 unsigned commutative : 1;
604 unsigned ins_permuted : 1;
607 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
609 /* construct load address */
610 memset(addr, 0, sizeof(addr[0]));
611 ia32_create_address_mode(addr, ptr, 0);
613 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
614 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
615 addr->mem = be_transform_node(mem);
618 static void build_address(ia32_address_mode_t *am, ir_node *node,
619 ia32_create_am_flags_t flags)
621 ia32_address_t *addr = &am->addr;
627 if (is_Const(node)) {
628 ir_entity *entity = create_float_const_entity(node);
629 addr->base = noreg_GP;
630 addr->index = noreg_GP;
632 addr->symconst_ent = entity;
634 am->ls_mode = get_type_mode(get_entity_type(entity));
635 am->pinned = op_pin_state_floats;
639 load = get_Proj_pred(node);
640 ptr = get_Load_ptr(load);
641 mem = get_Load_mem(load);
642 new_mem = be_transform_node(mem);
643 am->pinned = get_irn_pinned(load);
644 am->ls_mode = get_Load_mode(load);
645 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
648 /* construct load address */
649 ia32_create_address_mode(addr, ptr, flags);
651 addr->base = addr->base ? be_transform_node(addr->base) : noreg_GP;
652 addr->index = addr->index ? be_transform_node(addr->index) : noreg_GP;
656 static void set_address(ir_node *node, const ia32_address_t *addr)
658 set_ia32_am_scale(node, addr->scale);
659 set_ia32_am_sc(node, addr->symconst_ent);
660 set_ia32_am_offs_int(node, addr->offset);
661 if (addr->symconst_sign)
662 set_ia32_am_sc_sign(node);
664 set_ia32_use_frame(node);
665 set_ia32_frame_ent(node, addr->frame_entity);
669 * Apply attributes of a given address mode to a node.
671 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
673 set_address(node, &am->addr);
675 set_ia32_op_type(node, am->op_type);
676 set_ia32_ls_mode(node, am->ls_mode);
677 if (am->pinned == op_pin_state_pinned) {
678 /* beware: some nodes are already pinned and did not allow to change the state */
679 if (get_irn_pinned(node) != op_pin_state_pinned)
680 set_irn_pinned(node, op_pin_state_pinned);
683 set_ia32_commutative(node);
687 * Check, if a given node is a Down-Conv, ie. a integer Conv
688 * from a mode with a mode with more bits to a mode with lesser bits.
689 * Moreover, we return only true if the node has not more than 1 user.
691 * @param node the node
692 * @return non-zero if node is a Down-Conv
694 static int is_downconv(const ir_node *node)
702 /* we only want to skip the conv when we're the only user
703 * (not optimal but for now...)
705 if (get_irn_n_edges(node) > 1)
708 src_mode = get_irn_mode(get_Conv_op(node));
709 dest_mode = get_irn_mode(node);
711 ia32_mode_needs_gp_reg(src_mode) &&
712 ia32_mode_needs_gp_reg(dest_mode) &&
713 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
716 /* Skip all Down-Conv's on a given node and return the resulting node. */
717 ir_node *ia32_skip_downconv(ir_node *node)
719 while (is_downconv(node))
720 node = get_Conv_op(node);
725 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
727 ir_mode *mode = get_irn_mode(node);
732 if (mode_is_signed(mode)) {
737 block = get_nodes_block(node);
738 dbgi = get_irn_dbg_info(node);
740 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
744 * matches operands of a node into ia32 addressing/operand modes. This covers
745 * usage of source address mode, immediates, operations with non 32-bit modes,
747 * The resulting data is filled into the @p am struct. block is the block
748 * of the node whose arguments are matched. op1, op2 are the first and second
749 * input that are matched (op1 may be NULL). other_op is another unrelated
750 * input that is not matched! but which is needed sometimes to check if AM
751 * for op1/op2 is legal.
752 * @p flags describes the supported modes of the operation in detail.
754 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
755 ir_node *op1, ir_node *op2, ir_node *other_op,
758 ia32_address_t *addr = &am->addr;
759 ir_mode *mode = get_irn_mode(op2);
760 int mode_bits = get_mode_size_bits(mode);
761 ir_node *new_op1, *new_op2;
763 unsigned commutative;
764 int use_am_and_immediates;
767 memset(am, 0, sizeof(am[0]));
769 commutative = (flags & match_commutative) != 0;
770 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
771 use_am = (flags & match_am) != 0;
772 use_immediate = (flags & match_immediate) != 0;
773 assert(!use_am_and_immediates || use_immediate);
776 assert(!commutative || op1 != NULL);
777 assert(use_am || !(flags & match_8bit_am));
778 assert(use_am || !(flags & match_16bit_am));
780 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
781 (mode_bits == 16 && !(flags & match_16bit_am))) {
785 /* we can simply skip downconvs for mode neutral nodes: the upper bits
786 * can be random for these operations */
787 if (flags & match_mode_neutral) {
788 op2 = ia32_skip_downconv(op2);
790 op1 = ia32_skip_downconv(op1);
794 /* match immediates. firm nodes are normalized: constants are always on the
797 if (!(flags & match_try_am) && use_immediate) {
798 new_op2 = try_create_Immediate(op2, 0);
801 if (new_op2 == NULL &&
802 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
803 build_address(am, op2, 0);
804 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
805 if (mode_is_float(mode)) {
806 new_op2 = ia32_new_NoReg_vfp(env_cg);
810 am->op_type = ia32_AddrModeS;
811 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
813 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
815 build_address(am, op1, 0);
817 if (mode_is_float(mode)) {
818 noreg = ia32_new_NoReg_vfp(env_cg);
823 if (new_op2 != NULL) {
826 new_op1 = be_transform_node(op2);
828 am->ins_permuted = 1;
830 am->op_type = ia32_AddrModeS;
833 am->op_type = ia32_Normal;
835 if (flags & match_try_am) {
841 mode = get_irn_mode(op2);
842 if (flags & match_upconv_32 && get_mode_size_bits(mode) != 32) {
843 new_op1 = (op1 == NULL ? NULL : create_upconv(op1, NULL));
845 new_op2 = create_upconv(op2, NULL);
846 am->ls_mode = mode_Iu;
848 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
850 new_op2 = be_transform_node(op2);
851 am->ls_mode = (flags & match_mode_neutral) ? mode_Iu : mode;
854 if (addr->base == NULL)
855 addr->base = noreg_GP;
856 if (addr->index == NULL)
857 addr->index = noreg_GP;
858 if (addr->mem == NULL)
861 am->new_op1 = new_op1;
862 am->new_op2 = new_op2;
863 am->commutative = commutative;
867 * "Fixes" a node that uses address mode by turning it into mode_T
868 * and returning a pn_ia32_res Proj.
870 * @param node the node
871 * @param am its address mode
873 * @return a Proj(pn_ia32_res) if a memory address mode is used,
876 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
881 if (am->mem_proj == NULL)
884 /* we have to create a mode_T so the old MemProj can attach to us */
885 mode = get_irn_mode(node);
886 load = get_Proj_pred(am->mem_proj);
888 be_set_transformed_node(load, node);
890 if (mode != mode_T) {
891 set_irn_mode(node, mode_T);
892 return new_rd_Proj(NULL, get_nodes_block(node), node, mode, pn_ia32_res);
899 * Construct a standard binary operation, set AM and immediate if required.
901 * @param node The original node for which the binop is created
902 * @param op1 The first operand
903 * @param op2 The second operand
904 * @param func The node constructor function
905 * @return The constructed ia32 node.
907 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
908 construct_binop_func *func, match_flags_t flags)
911 ir_node *block, *new_block, *new_node;
912 ia32_address_mode_t am;
913 ia32_address_t *addr = &am.addr;
915 block = get_nodes_block(node);
916 match_arguments(&am, block, op1, op2, NULL, flags);
918 dbgi = get_irn_dbg_info(node);
919 new_block = be_transform_node(block);
920 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
921 am.new_op1, am.new_op2);
922 set_am_attributes(new_node, &am);
923 /* we can't use source address mode anymore when using immediates */
924 if (!(flags & match_am_and_immediates) &&
925 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
926 set_ia32_am_support(new_node, ia32_am_none);
927 SET_IA32_ORIG_NODE(new_node, node);
929 new_node = fix_mem_proj(new_node, &am);
935 * Generic names for the inputs of an ia32 binary op.
938 n_ia32_l_binop_left, /**< ia32 left input */
939 n_ia32_l_binop_right, /**< ia32 right input */
940 n_ia32_l_binop_eflags /**< ia32 eflags input */
942 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
943 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
944 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
945 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
946 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
947 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
950 * Construct a binary operation which also consumes the eflags.
952 * @param node The node to transform
953 * @param func The node constructor function
954 * @param flags The match flags
955 * @return The constructor ia32 node
957 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
960 ir_node *src_block = get_nodes_block(node);
961 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
962 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
963 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
965 ir_node *block, *new_node, *new_eflags;
966 ia32_address_mode_t am;
967 ia32_address_t *addr = &am.addr;
969 match_arguments(&am, src_block, op1, op2, eflags, flags);
971 dbgi = get_irn_dbg_info(node);
972 block = be_transform_node(src_block);
973 new_eflags = be_transform_node(eflags);
974 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
975 am.new_op1, am.new_op2, new_eflags);
976 set_am_attributes(new_node, &am);
977 /* we can't use source address mode anymore when using immediates */
978 if (!(flags & match_am_and_immediates) &&
979 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
980 set_ia32_am_support(new_node, ia32_am_none);
981 SET_IA32_ORIG_NODE(new_node, node);
983 new_node = fix_mem_proj(new_node, &am);
988 static ir_node *get_fpcw(void)
991 if (initial_fpcw != NULL)
994 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
995 &ia32_fp_cw_regs[REG_FPCW]);
996 initial_fpcw = be_transform_node(fpcw);
1002 * Construct a standard binary operation, set AM and immediate if required.
1004 * @param op1 The first operand
1005 * @param op2 The second operand
1006 * @param func The node constructor function
1007 * @return The constructed ia32 node.
1009 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1010 construct_binop_float_func *func)
1012 ir_mode *mode = get_irn_mode(node);
1014 ir_node *block, *new_block, *new_node;
1015 ia32_address_mode_t am;
1016 ia32_address_t *addr = &am.addr;
1017 ia32_x87_attr_t *attr;
1018 /* All operations are considered commutative, because there are reverse
1020 match_flags_t flags = match_commutative;
1022 /* happens for div nodes... */
1024 mode = get_divop_resmod(node);
1026 /* cannot use address mode with long double on x87 */
1027 if (get_mode_size_bits(mode) <= 64)
1030 block = get_nodes_block(node);
1031 match_arguments(&am, block, op1, op2, NULL, flags);
1033 dbgi = get_irn_dbg_info(node);
1034 new_block = be_transform_node(block);
1035 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1036 am.new_op1, am.new_op2, get_fpcw());
1037 set_am_attributes(new_node, &am);
1039 attr = get_ia32_x87_attr(new_node);
1040 attr->attr.data.ins_permuted = am.ins_permuted;
1042 SET_IA32_ORIG_NODE(new_node, node);
1044 new_node = fix_mem_proj(new_node, &am);
1050 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1052 * @param op1 The first operand
1053 * @param op2 The second operand
1054 * @param func The node constructor function
1055 * @return The constructed ia32 node.
1057 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1058 construct_shift_func *func,
1059 match_flags_t flags)
1062 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1064 assert(! mode_is_float(get_irn_mode(node)));
1065 assert(flags & match_immediate);
1066 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1068 if (flags & match_mode_neutral) {
1069 op1 = ia32_skip_downconv(op1);
1070 new_op1 = be_transform_node(op1);
1071 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1072 new_op1 = create_upconv(op1, node);
1074 new_op1 = be_transform_node(op1);
1077 /* the shift amount can be any mode that is bigger than 5 bits, since all
1078 * other bits are ignored anyway */
1079 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1080 ir_node *const op = get_Conv_op(op2);
1081 if (mode_is_float(get_irn_mode(op)))
1084 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1086 new_op2 = create_immediate_or_transform(op2, 0);
1088 dbgi = get_irn_dbg_info(node);
1089 block = get_nodes_block(node);
1090 new_block = be_transform_node(block);
1091 new_node = func(dbgi, new_block, new_op1, new_op2);
1092 SET_IA32_ORIG_NODE(new_node, node);
1094 /* lowered shift instruction may have a dependency operand, handle it here */
1095 if (get_irn_arity(node) == 3) {
1096 /* we have a dependency */
1097 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1098 add_irn_dep(new_node, new_dep);
1106 * Construct a standard unary operation, set AM and immediate if required.
1108 * @param op The operand
1109 * @param func The node constructor function
1110 * @return The constructed ia32 node.
1112 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1113 match_flags_t flags)
1116 ir_node *block, *new_block, *new_op, *new_node;
1118 assert(flags == 0 || flags == match_mode_neutral);
1119 if (flags & match_mode_neutral) {
1120 op = ia32_skip_downconv(op);
1123 new_op = be_transform_node(op);
1124 dbgi = get_irn_dbg_info(node);
1125 block = get_nodes_block(node);
1126 new_block = be_transform_node(block);
1127 new_node = func(dbgi, new_block, new_op);
1129 SET_IA32_ORIG_NODE(new_node, node);
1134 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1135 ia32_address_t *addr)
1137 ir_node *base, *index, *res;
1143 base = be_transform_node(base);
1146 index = addr->index;
1147 if (index == NULL) {
1150 index = be_transform_node(index);
1153 res = new_bd_ia32_Lea(dbgi, block, base, index);
1154 set_address(res, addr);
1160 * Returns non-zero if a given address mode has a symbolic or
1161 * numerical offset != 0.
1163 static int am_has_immediates(const ia32_address_t *addr)
1165 return addr->offset != 0 || addr->symconst_ent != NULL
1166 || addr->frame_entity || addr->use_frame;
1170 * Creates an ia32 Add.
1172 * @return the created ia32 Add node
1174 static ir_node *gen_Add(ir_node *node)
1176 ir_mode *mode = get_irn_mode(node);
1177 ir_node *op1 = get_Add_left(node);
1178 ir_node *op2 = get_Add_right(node);
1180 ir_node *block, *new_block, *new_node, *add_immediate_op;
1181 ia32_address_t addr;
1182 ia32_address_mode_t am;
1184 if (mode_is_float(mode)) {
1185 if (ia32_cg_config.use_sse2)
1186 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1187 match_commutative | match_am);
1189 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1192 ia32_mark_non_am(node);
1194 op2 = ia32_skip_downconv(op2);
1195 op1 = ia32_skip_downconv(op1);
1199 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1200 * 1. Add with immediate -> Lea
1201 * 2. Add with possible source address mode -> Add
1202 * 3. Otherwise -> Lea
1204 memset(&addr, 0, sizeof(addr));
1205 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1206 add_immediate_op = NULL;
1208 dbgi = get_irn_dbg_info(node);
1209 block = get_nodes_block(node);
1210 new_block = be_transform_node(block);
1213 if (addr.base == NULL && addr.index == NULL) {
1214 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1215 addr.symconst_sign, 0, addr.offset);
1216 be_dep_on_frame(new_node);
1217 SET_IA32_ORIG_NODE(new_node, node);
1220 /* add with immediate? */
1221 if (addr.index == NULL) {
1222 add_immediate_op = addr.base;
1223 } else if (addr.base == NULL && addr.scale == 0) {
1224 add_immediate_op = addr.index;
1227 if (add_immediate_op != NULL) {
1228 if (!am_has_immediates(&addr)) {
1229 #ifdef DEBUG_libfirm
1230 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1233 return be_transform_node(add_immediate_op);
1236 new_node = create_lea_from_address(dbgi, new_block, &addr);
1237 SET_IA32_ORIG_NODE(new_node, node);
1241 /* test if we can use source address mode */
1242 match_arguments(&am, block, op1, op2, NULL, match_commutative
1243 | match_mode_neutral | match_am | match_immediate | match_try_am);
1245 /* construct an Add with source address mode */
1246 if (am.op_type == ia32_AddrModeS) {
1247 ia32_address_t *am_addr = &am.addr;
1248 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1249 am_addr->index, am_addr->mem, am.new_op1,
1251 set_am_attributes(new_node, &am);
1252 SET_IA32_ORIG_NODE(new_node, node);
1254 new_node = fix_mem_proj(new_node, &am);
1259 /* otherwise construct a lea */
1260 new_node = create_lea_from_address(dbgi, new_block, &addr);
1261 SET_IA32_ORIG_NODE(new_node, node);
1266 * Creates an ia32 Mul.
1268 * @return the created ia32 Mul node
1270 static ir_node *gen_Mul(ir_node *node)
1272 ir_node *op1 = get_Mul_left(node);
1273 ir_node *op2 = get_Mul_right(node);
1274 ir_mode *mode = get_irn_mode(node);
1276 if (mode_is_float(mode)) {
1277 if (ia32_cg_config.use_sse2)
1278 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1279 match_commutative | match_am);
1281 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1283 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1284 match_commutative | match_am | match_mode_neutral |
1285 match_immediate | match_am_and_immediates);
1289 * Creates an ia32 Mulh.
1290 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1291 * this result while Mul returns the lower 32 bit.
1293 * @return the created ia32 Mulh node
1295 static ir_node *gen_Mulh(ir_node *node)
1297 ir_node *block = get_nodes_block(node);
1298 ir_node *new_block = be_transform_node(block);
1299 dbg_info *dbgi = get_irn_dbg_info(node);
1300 ir_node *op1 = get_Mulh_left(node);
1301 ir_node *op2 = get_Mulh_right(node);
1302 ir_mode *mode = get_irn_mode(node);
1304 ir_node *proj_res_high;
1306 if (get_mode_size_bits(mode) != 32) {
1307 panic("Mulh without 32bit size not supported in ia32 backend (%+F)", node);
1310 if (mode_is_signed(mode)) {
1311 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1312 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_IMul1OP_res_high);
1314 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1315 proj_res_high = new_rd_Proj(dbgi, new_block, new_node, mode_Iu, pn_ia32_Mul_res_high);
1317 return proj_res_high;
1321 * Creates an ia32 And.
1323 * @return The created ia32 And node
1325 static ir_node *gen_And(ir_node *node)
1327 ir_node *op1 = get_And_left(node);
1328 ir_node *op2 = get_And_right(node);
1329 assert(! mode_is_float(get_irn_mode(node)));
1331 /* is it a zero extension? */
1332 if (is_Const(op2)) {
1333 tarval *tv = get_Const_tarval(op2);
1334 long v = get_tarval_long(tv);
1336 if (v == 0xFF || v == 0xFFFF) {
1337 dbg_info *dbgi = get_irn_dbg_info(node);
1338 ir_node *block = get_nodes_block(node);
1345 assert(v == 0xFFFF);
1348 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1353 return gen_binop(node, op1, op2, new_bd_ia32_And,
1354 match_commutative | match_mode_neutral | match_am | match_immediate);
1360 * Creates an ia32 Or.
1362 * @return The created ia32 Or node
1364 static ir_node *gen_Or(ir_node *node)
1366 ir_node *op1 = get_Or_left(node);
1367 ir_node *op2 = get_Or_right(node);
1369 assert (! mode_is_float(get_irn_mode(node)));
1370 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1371 | match_mode_neutral | match_am | match_immediate);
1377 * Creates an ia32 Eor.
1379 * @return The created ia32 Eor node
1381 static ir_node *gen_Eor(ir_node *node)
1383 ir_node *op1 = get_Eor_left(node);
1384 ir_node *op2 = get_Eor_right(node);
1386 assert(! mode_is_float(get_irn_mode(node)));
1387 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1388 | match_mode_neutral | match_am | match_immediate);
1393 * Creates an ia32 Sub.
1395 * @return The created ia32 Sub node
1397 static ir_node *gen_Sub(ir_node *node)
1399 ir_node *op1 = get_Sub_left(node);
1400 ir_node *op2 = get_Sub_right(node);
1401 ir_mode *mode = get_irn_mode(node);
1403 if (mode_is_float(mode)) {
1404 if (ia32_cg_config.use_sse2)
1405 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1407 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1410 if (is_Const(op2)) {
1411 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1415 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1416 | match_am | match_immediate);
1419 static ir_node *transform_AM_mem(ir_node *const block,
1420 ir_node *const src_val,
1421 ir_node *const src_mem,
1422 ir_node *const am_mem)
1424 if (is_NoMem(am_mem)) {
1425 return be_transform_node(src_mem);
1426 } else if (is_Proj(src_val) &&
1428 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1429 /* avoid memory loop */
1431 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1432 ir_node *const ptr_pred = get_Proj_pred(src_val);
1433 int const arity = get_Sync_n_preds(src_mem);
1438 NEW_ARR_A(ir_node*, ins, arity + 1);
1440 /* NOTE: This sometimes produces dead-code because the old sync in
1441 * src_mem might not be used anymore, we should detect this case
1442 * and kill the sync... */
1443 for (i = arity - 1; i >= 0; --i) {
1444 ir_node *const pred = get_Sync_pred(src_mem, i);
1446 /* avoid memory loop */
1447 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1450 ins[n++] = be_transform_node(pred);
1455 return new_r_Sync(block, n, ins);
1459 ins[0] = be_transform_node(src_mem);
1461 return new_r_Sync(block, 2, ins);
1466 * Create a 32bit to 64bit signed extension.
1468 * @param dbgi debug info
1469 * @param block the block where node nodes should be placed
1470 * @param val the value to extend
1471 * @param orig the original node
1473 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1474 ir_node *val, const ir_node *orig)
1479 if (ia32_cg_config.use_short_sex_eax) {
1480 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1481 be_dep_on_frame(pval);
1482 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1484 ir_node *imm31 = ia32_create_Immediate(NULL, 0, 31);
1485 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1487 SET_IA32_ORIG_NODE(res, orig);
1492 * Generates an ia32 DivMod with additional infrastructure for the
1493 * register allocator if needed.
1495 static ir_node *create_Div(ir_node *node)
1497 dbg_info *dbgi = get_irn_dbg_info(node);
1498 ir_node *block = get_nodes_block(node);
1499 ir_node *new_block = be_transform_node(block);
1506 ir_node *sign_extension;
1507 ia32_address_mode_t am;
1508 ia32_address_t *addr = &am.addr;
1510 /* the upper bits have random contents for smaller modes */
1511 switch (get_irn_opcode(node)) {
1513 op1 = get_Div_left(node);
1514 op2 = get_Div_right(node);
1515 mem = get_Div_mem(node);
1516 mode = get_Div_resmode(node);
1519 op1 = get_Mod_left(node);
1520 op2 = get_Mod_right(node);
1521 mem = get_Mod_mem(node);
1522 mode = get_Mod_resmode(node);
1525 op1 = get_DivMod_left(node);
1526 op2 = get_DivMod_right(node);
1527 mem = get_DivMod_mem(node);
1528 mode = get_DivMod_resmode(node);
1531 panic("invalid divmod node %+F", node);
1534 match_arguments(&am, block, op1, op2, NULL, match_am | match_upconv_32);
1536 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1537 is the memory of the consumed address. We can have only the second op as address
1538 in Div nodes, so check only op2. */
1539 new_mem = transform_AM_mem(block, op2, mem, addr->mem);
1541 if (mode_is_signed(mode)) {
1542 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1543 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1544 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1546 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0, 0);
1547 be_dep_on_frame(sign_extension);
1549 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1550 addr->index, new_mem, am.new_op2,
1551 am.new_op1, sign_extension);
1554 set_irn_pinned(new_node, get_irn_pinned(node));
1556 set_am_attributes(new_node, &am);
1557 SET_IA32_ORIG_NODE(new_node, node);
1559 new_node = fix_mem_proj(new_node, &am);
1565 * Generates an ia32 Mod.
1567 static ir_node *gen_Mod(ir_node *node)
1569 return create_Div(node);
1573 * Generates an ia32 Div.
1575 static ir_node *gen_Div(ir_node *node)
1577 return create_Div(node);
1581 * Generates an ia32 DivMod.
1583 static ir_node *gen_DivMod(ir_node *node)
1585 return create_Div(node);
1591 * Creates an ia32 floating Div.
1593 * @return The created ia32 xDiv node
1595 static ir_node *gen_Quot(ir_node *node)
1597 ir_node *op1 = get_Quot_left(node);
1598 ir_node *op2 = get_Quot_right(node);
1600 if (ia32_cg_config.use_sse2) {
1601 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1603 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1609 * Creates an ia32 Shl.
1611 * @return The created ia32 Shl node
1613 static ir_node *gen_Shl(ir_node *node)
1615 ir_node *left = get_Shl_left(node);
1616 ir_node *right = get_Shl_right(node);
1618 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1619 match_mode_neutral | match_immediate);
1623 * Creates an ia32 Shr.
1625 * @return The created ia32 Shr node
1627 static ir_node *gen_Shr(ir_node *node)
1629 ir_node *left = get_Shr_left(node);
1630 ir_node *right = get_Shr_right(node);
1632 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1638 * Creates an ia32 Sar.
1640 * @return The created ia32 Shrs node
1642 static ir_node *gen_Shrs(ir_node *node)
1644 ir_node *left = get_Shrs_left(node);
1645 ir_node *right = get_Shrs_right(node);
1647 if (is_Const(right)) {
1648 tarval *tv = get_Const_tarval(right);
1649 long val = get_tarval_long(tv);
1651 /* this is a sign extension */
1652 dbg_info *dbgi = get_irn_dbg_info(node);
1653 ir_node *block = be_transform_node(get_nodes_block(node));
1654 ir_node *new_op = be_transform_node(left);
1656 return create_sex_32_64(dbgi, block, new_op, node);
1660 /* 8 or 16 bit sign extension? */
1661 if (is_Const(right) && is_Shl(left)) {
1662 ir_node *shl_left = get_Shl_left(left);
1663 ir_node *shl_right = get_Shl_right(left);
1664 if (is_Const(shl_right)) {
1665 tarval *tv1 = get_Const_tarval(right);
1666 tarval *tv2 = get_Const_tarval(shl_right);
1667 if (tv1 == tv2 && tarval_is_long(tv1)) {
1668 long val = get_tarval_long(tv1);
1669 if (val == 16 || val == 24) {
1670 dbg_info *dbgi = get_irn_dbg_info(node);
1671 ir_node *block = get_nodes_block(node);
1681 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1690 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1696 * Creates an ia32 Rol.
1698 * @param op1 The first operator
1699 * @param op2 The second operator
1700 * @return The created ia32 RotL node
1702 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1704 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1710 * Creates an ia32 Ror.
1711 * NOTE: There is no RotR with immediate because this would always be a RotL
1712 * "imm-mode_size_bits" which can be pre-calculated.
1714 * @param op1 The first operator
1715 * @param op2 The second operator
1716 * @return The created ia32 RotR node
1718 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1720 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1726 * Creates an ia32 RotR or RotL (depending on the found pattern).
1728 * @return The created ia32 RotL or RotR node
1730 static ir_node *gen_Rotl(ir_node *node)
1732 ir_node *rotate = NULL;
1733 ir_node *op1 = get_Rotl_left(node);
1734 ir_node *op2 = get_Rotl_right(node);
1736 /* Firm has only RotL, so we are looking for a right (op2)
1737 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1738 that means we can create a RotR instead of an Add and a RotL */
1742 ir_node *left = get_Add_left(add);
1743 ir_node *right = get_Add_right(add);
1744 if (is_Const(right)) {
1745 tarval *tv = get_Const_tarval(right);
1746 ir_mode *mode = get_irn_mode(node);
1747 long bits = get_mode_size_bits(mode);
1749 if (is_Minus(left) &&
1750 tarval_is_long(tv) &&
1751 get_tarval_long(tv) == bits &&
1754 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1755 rotate = gen_Ror(node, op1, get_Minus_op(left));
1760 if (rotate == NULL) {
1761 rotate = gen_Rol(node, op1, op2);
1770 * Transforms a Minus node.
1772 * @return The created ia32 Minus node
1774 static ir_node *gen_Minus(ir_node *node)
1776 ir_node *op = get_Minus_op(node);
1777 ir_node *block = be_transform_node(get_nodes_block(node));
1778 dbg_info *dbgi = get_irn_dbg_info(node);
1779 ir_mode *mode = get_irn_mode(node);
1784 if (mode_is_float(mode)) {
1785 ir_node *new_op = be_transform_node(op);
1786 if (ia32_cg_config.use_sse2) {
1787 /* TODO: non-optimal... if we have many xXors, then we should
1788 * rather create a load for the const and use that instead of
1789 * several AM nodes... */
1790 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1792 new_node = new_bd_ia32_xXor(dbgi, block, noreg_GP, noreg_GP,
1793 nomem, new_op, noreg_xmm);
1795 size = get_mode_size_bits(mode);
1796 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1798 set_ia32_am_sc(new_node, ent);
1799 set_ia32_op_type(new_node, ia32_AddrModeS);
1800 set_ia32_ls_mode(new_node, mode);
1802 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1805 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1808 SET_IA32_ORIG_NODE(new_node, node);
1814 * Transforms a Not node.
1816 * @return The created ia32 Not node
1818 static ir_node *gen_Not(ir_node *node)
1820 ir_node *op = get_Not_op(node);
1822 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1823 assert (! mode_is_float(get_irn_mode(node)));
1825 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1831 * Transforms an Abs node.
1833 * @return The created ia32 Abs node
1835 static ir_node *gen_Abs(ir_node *node)
1837 ir_node *block = get_nodes_block(node);
1838 ir_node *new_block = be_transform_node(block);
1839 ir_node *op = get_Abs_op(node);
1840 dbg_info *dbgi = get_irn_dbg_info(node);
1841 ir_mode *mode = get_irn_mode(node);
1847 if (mode_is_float(mode)) {
1848 new_op = be_transform_node(op);
1850 if (ia32_cg_config.use_sse2) {
1851 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1852 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_GP, noreg_GP,
1853 nomem, new_op, noreg_fp);
1855 size = get_mode_size_bits(mode);
1856 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1858 set_ia32_am_sc(new_node, ent);
1860 SET_IA32_ORIG_NODE(new_node, node);
1862 set_ia32_op_type(new_node, ia32_AddrModeS);
1863 set_ia32_ls_mode(new_node, mode);
1865 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1866 SET_IA32_ORIG_NODE(new_node, node);
1869 ir_node *xor, *sign_extension;
1871 if (get_mode_size_bits(mode) == 32) {
1872 new_op = be_transform_node(op);
1874 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1877 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1879 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_GP, noreg_GP,
1880 nomem, new_op, sign_extension);
1881 SET_IA32_ORIG_NODE(xor, node);
1883 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP,
1884 nomem, xor, sign_extension);
1885 SET_IA32_ORIG_NODE(new_node, node);
1892 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1894 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1896 dbg_info *dbgi = get_irn_dbg_info(cmp);
1897 ir_node *block = get_nodes_block(cmp);
1898 ir_node *new_block = be_transform_node(block);
1899 ir_node *op1 = be_transform_node(x);
1900 ir_node *op2 = be_transform_node(n);
1902 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1906 * Transform a node returning a "flag" result.
1908 * @param node the node to transform
1909 * @param pnc_out the compare mode to use
1911 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1918 /* we have a Cmp as input */
1919 if (is_Proj(node)) {
1920 ir_node *pred = get_Proj_pred(node);
1922 pn_Cmp pnc = get_Proj_proj(node);
1923 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1924 ir_node *l = get_Cmp_left(pred);
1925 ir_node *r = get_Cmp_right(pred);
1927 ir_node *la = get_And_left(l);
1928 ir_node *ra = get_And_right(l);
1930 ir_node *c = get_Shl_left(la);
1931 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1932 /* (1 << n) & ra) */
1933 ir_node *n = get_Shl_right(la);
1934 flags = gen_bt(pred, ra, n);
1935 /* we must generate a Jc/Jnc jump */
1936 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1939 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1944 ir_node *c = get_Shl_left(ra);
1945 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1946 /* la & (1 << n)) */
1947 ir_node *n = get_Shl_right(ra);
1948 flags = gen_bt(pred, la, n);
1949 /* we must generate a Jc/Jnc jump */
1950 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1953 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1959 flags = be_transform_node(pred);
1965 /* a mode_b value, we have to compare it against 0 */
1966 dbgi = get_irn_dbg_info(node);
1967 new_block = be_transform_node(get_nodes_block(node));
1968 new_op = be_transform_node(node);
1969 flags = new_bd_ia32_Test(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_op,
1970 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1971 *pnc_out = pn_Cmp_Lg;
1976 * Transforms a Load.
1978 * @return the created ia32 Load node
1980 static ir_node *gen_Load(ir_node *node)
1982 ir_node *old_block = get_nodes_block(node);
1983 ir_node *block = be_transform_node(old_block);
1984 ir_node *ptr = get_Load_ptr(node);
1985 ir_node *mem = get_Load_mem(node);
1986 ir_node *new_mem = be_transform_node(mem);
1989 dbg_info *dbgi = get_irn_dbg_info(node);
1990 ir_mode *mode = get_Load_mode(node);
1993 ia32_address_t addr;
1995 /* construct load address */
1996 memset(&addr, 0, sizeof(addr));
1997 ia32_create_address_mode(&addr, ptr, 0);
2004 base = be_transform_node(base);
2007 if (index == NULL) {
2010 index = be_transform_node(index);
2013 if (mode_is_float(mode)) {
2014 if (ia32_cg_config.use_sse2) {
2015 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
2017 res_mode = mode_xmm;
2019 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
2021 res_mode = mode_vfp;
2024 assert(mode != mode_b);
2026 /* create a conv node with address mode for smaller modes */
2027 if (get_mode_size_bits(mode) < 32) {
2028 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
2029 new_mem, noreg_GP, mode);
2031 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
2036 set_irn_pinned(new_node, get_irn_pinned(node));
2037 set_ia32_op_type(new_node, ia32_AddrModeS);
2038 set_ia32_ls_mode(new_node, mode);
2039 set_address(new_node, &addr);
2041 if (get_irn_pinned(node) == op_pin_state_floats) {
2042 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2043 && pn_ia32_vfld_res == pn_ia32_Load_res
2044 && pn_ia32_Load_res == pn_ia32_res);
2045 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2048 SET_IA32_ORIG_NODE(new_node, node);
2050 be_dep_on_frame(new_node);
2054 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2055 ir_node *ptr, ir_node *other)
2062 /* we only use address mode if we're the only user of the load */
2063 if (get_irn_n_edges(node) > 1)
2066 load = get_Proj_pred(node);
2069 if (get_nodes_block(load) != block)
2072 /* store should have the same pointer as the load */
2073 if (get_Load_ptr(load) != ptr)
2076 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2077 if (other != NULL &&
2078 get_nodes_block(other) == block &&
2079 heights_reachable_in_block(heights, other, load)) {
2083 if (prevents_AM(block, load, mem))
2085 /* Store should be attached to the load via mem */
2086 assert(heights_reachable_in_block(heights, mem, load));
2091 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2092 ir_node *mem, ir_node *ptr, ir_mode *mode,
2093 construct_binop_dest_func *func,
2094 construct_binop_dest_func *func8bit,
2095 match_flags_t flags)
2097 ir_node *src_block = get_nodes_block(node);
2105 ia32_address_mode_t am;
2106 ia32_address_t *addr = &am.addr;
2107 memset(&am, 0, sizeof(am));
2109 assert(flags & match_immediate); /* there is no destam node without... */
2110 commutative = (flags & match_commutative) != 0;
2112 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2113 build_address(&am, op1, ia32_create_am_double_use);
2114 new_op = create_immediate_or_transform(op2, 0);
2115 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2116 build_address(&am, op2, ia32_create_am_double_use);
2117 new_op = create_immediate_or_transform(op1, 0);
2122 if (addr->base == NULL)
2123 addr->base = noreg_GP;
2124 if (addr->index == NULL)
2125 addr->index = noreg_GP;
2126 if (addr->mem == NULL)
2129 dbgi = get_irn_dbg_info(node);
2130 block = be_transform_node(src_block);
2131 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2133 if (get_mode_size_bits(mode) == 8) {
2134 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2136 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2138 set_address(new_node, addr);
2139 set_ia32_op_type(new_node, ia32_AddrModeD);
2140 set_ia32_ls_mode(new_node, mode);
2141 SET_IA32_ORIG_NODE(new_node, node);
2143 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2144 mem_proj = be_transform_node(am.mem_proj);
2145 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2150 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2151 ir_node *ptr, ir_mode *mode,
2152 construct_unop_dest_func *func)
2154 ir_node *src_block = get_nodes_block(node);
2160 ia32_address_mode_t am;
2161 ia32_address_t *addr = &am.addr;
2163 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2166 memset(&am, 0, sizeof(am));
2167 build_address(&am, op, ia32_create_am_double_use);
2169 dbgi = get_irn_dbg_info(node);
2170 block = be_transform_node(src_block);
2171 new_mem = transform_AM_mem(block, am.am_node, mem, addr->mem);
2172 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2173 set_address(new_node, addr);
2174 set_ia32_op_type(new_node, ia32_AddrModeD);
2175 set_ia32_ls_mode(new_node, mode);
2176 SET_IA32_ORIG_NODE(new_node, node);
2178 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2179 mem_proj = be_transform_node(am.mem_proj);
2180 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2185 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2187 ir_mode *mode = get_irn_mode(node);
2188 ir_node *mux_true = get_Mux_true(node);
2189 ir_node *mux_false = get_Mux_false(node);
2199 ia32_address_t addr;
2201 if (get_mode_size_bits(mode) != 8)
2204 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2206 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2212 build_address_ptr(&addr, ptr, mem);
2214 dbgi = get_irn_dbg_info(node);
2215 block = get_nodes_block(node);
2216 new_block = be_transform_node(block);
2217 cond = get_Mux_sel(node);
2218 flags = get_flags_node(cond, &pnc);
2219 new_mem = be_transform_node(mem);
2220 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2221 addr.index, addr.mem, flags, pnc, negated);
2222 set_address(new_node, &addr);
2223 set_ia32_op_type(new_node, ia32_AddrModeD);
2224 set_ia32_ls_mode(new_node, mode);
2225 SET_IA32_ORIG_NODE(new_node, node);
2230 static ir_node *try_create_dest_am(ir_node *node)
2232 ir_node *val = get_Store_value(node);
2233 ir_node *mem = get_Store_mem(node);
2234 ir_node *ptr = get_Store_ptr(node);
2235 ir_mode *mode = get_irn_mode(val);
2236 unsigned bits = get_mode_size_bits(mode);
2241 /* handle only GP modes for now... */
2242 if (!ia32_mode_needs_gp_reg(mode))
2246 /* store must be the only user of the val node */
2247 if (get_irn_n_edges(val) > 1)
2249 /* skip pointless convs */
2251 ir_node *conv_op = get_Conv_op(val);
2252 ir_mode *pred_mode = get_irn_mode(conv_op);
2253 if (!ia32_mode_needs_gp_reg(pred_mode))
2255 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2263 /* value must be in the same block */
2264 if (get_nodes_block(node) != get_nodes_block(val))
2267 switch (get_irn_opcode(val)) {
2269 op1 = get_Add_left(val);
2270 op2 = get_Add_right(val);
2271 if (ia32_cg_config.use_incdec) {
2272 if (is_Const_1(op2)) {
2273 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2275 } else if (is_Const_Minus_1(op2)) {
2276 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2280 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2281 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2282 match_commutative | match_immediate);
2285 op1 = get_Sub_left(val);
2286 op2 = get_Sub_right(val);
2287 if (is_Const(op2)) {
2288 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2290 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2291 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2295 op1 = get_And_left(val);
2296 op2 = get_And_right(val);
2297 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2298 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2299 match_commutative | match_immediate);
2302 op1 = get_Or_left(val);
2303 op2 = get_Or_right(val);
2304 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2305 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2306 match_commutative | match_immediate);
2309 op1 = get_Eor_left(val);
2310 op2 = get_Eor_right(val);
2311 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2312 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2313 match_commutative | match_immediate);
2316 op1 = get_Shl_left(val);
2317 op2 = get_Shl_right(val);
2318 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2319 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2323 op1 = get_Shr_left(val);
2324 op2 = get_Shr_right(val);
2325 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2326 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2330 op1 = get_Shrs_left(val);
2331 op2 = get_Shrs_right(val);
2332 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2333 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2337 op1 = get_Rotl_left(val);
2338 op2 = get_Rotl_right(val);
2339 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2340 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2343 /* TODO: match ROR patterns... */
2345 new_node = try_create_SetMem(val, ptr, mem);
2348 op1 = get_Minus_op(val);
2349 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2352 /* should be lowered already */
2353 assert(mode != mode_b);
2354 op1 = get_Not_op(val);
2355 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2361 if (new_node != NULL) {
2362 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2363 get_irn_pinned(node) == op_pin_state_pinned) {
2364 set_irn_pinned(new_node, op_pin_state_pinned);
2371 static bool possible_int_mode_for_fp(ir_mode *mode)
2375 if (!mode_is_signed(mode))
2377 size = get_mode_size_bits(mode);
2378 if (size != 16 && size != 32)
2383 static int is_float_to_int_conv(const ir_node *node)
2385 ir_mode *mode = get_irn_mode(node);
2389 if (!possible_int_mode_for_fp(mode))
2394 conv_op = get_Conv_op(node);
2395 conv_mode = get_irn_mode(conv_op);
2397 if (!mode_is_float(conv_mode))
2404 * Transform a Store(floatConst) into a sequence of
2407 * @return the created ia32 Store node
2409 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2411 ir_mode *mode = get_irn_mode(cns);
2412 unsigned size = get_mode_size_bytes(mode);
2413 tarval *tv = get_Const_tarval(cns);
2414 ir_node *block = get_nodes_block(node);
2415 ir_node *new_block = be_transform_node(block);
2416 ir_node *ptr = get_Store_ptr(node);
2417 ir_node *mem = get_Store_mem(node);
2418 dbg_info *dbgi = get_irn_dbg_info(node);
2422 ia32_address_t addr;
2424 assert(size % 4 == 0);
2427 build_address_ptr(&addr, ptr, mem);
2431 get_tarval_sub_bits(tv, ofs) |
2432 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2433 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2434 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2435 ir_node *imm = ia32_create_Immediate(NULL, 0, val);
2437 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2438 addr.index, addr.mem, imm);
2440 set_irn_pinned(new_node, get_irn_pinned(node));
2441 set_ia32_op_type(new_node, ia32_AddrModeD);
2442 set_ia32_ls_mode(new_node, mode_Iu);
2443 set_address(new_node, &addr);
2444 SET_IA32_ORIG_NODE(new_node, node);
2447 ins[i++] = new_node;
2452 } while (size != 0);
2455 return new_rd_Sync(dbgi, new_block, i, ins);
2462 * Generate a vfist or vfisttp instruction.
2464 static ir_node *gen_vfist(dbg_info *dbgi, ir_node *block, ir_node *base, ir_node *index,
2465 ir_node *mem, ir_node *val, ir_node **fist)
2469 if (ia32_cg_config.use_fisttp) {
2470 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2471 if other users exists */
2472 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2473 ir_node *value = new_r_Proj(block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2474 be_new_Keep(block, 1, &value);
2476 new_node = new_r_Proj(block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2479 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2482 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2488 * Transforms a general (no special case) Store.
2490 * @return the created ia32 Store node
2492 static ir_node *gen_general_Store(ir_node *node)
2494 ir_node *val = get_Store_value(node);
2495 ir_mode *mode = get_irn_mode(val);
2496 ir_node *block = get_nodes_block(node);
2497 ir_node *new_block = be_transform_node(block);
2498 ir_node *ptr = get_Store_ptr(node);
2499 ir_node *mem = get_Store_mem(node);
2500 dbg_info *dbgi = get_irn_dbg_info(node);
2501 ir_node *new_val, *new_node, *store;
2502 ia32_address_t addr;
2504 /* check for destination address mode */
2505 new_node = try_create_dest_am(node);
2506 if (new_node != NULL)
2509 /* construct store address */
2510 memset(&addr, 0, sizeof(addr));
2511 ia32_create_address_mode(&addr, ptr, 0);
2513 if (addr.base == NULL) {
2514 addr.base = noreg_GP;
2516 addr.base = be_transform_node(addr.base);
2519 if (addr.index == NULL) {
2520 addr.index = noreg_GP;
2522 addr.index = be_transform_node(addr.index);
2524 addr.mem = be_transform_node(mem);
2526 if (mode_is_float(mode)) {
2527 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2529 while (is_Conv(val) && mode == get_irn_mode(val)) {
2530 ir_node *op = get_Conv_op(val);
2531 if (!mode_is_float(get_irn_mode(op)))
2535 new_val = be_transform_node(val);
2536 if (ia32_cg_config.use_sse2) {
2537 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2538 addr.index, addr.mem, new_val);
2540 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2541 addr.index, addr.mem, new_val, mode);
2544 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2545 val = get_Conv_op(val);
2547 /* TODO: is this optimisation still necessary at all (middleend)? */
2548 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2549 while (is_Conv(val)) {
2550 ir_node *op = get_Conv_op(val);
2551 if (!mode_is_float(get_irn_mode(op)))
2553 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2557 new_val = be_transform_node(val);
2558 new_node = gen_vfist(dbgi, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2560 new_val = create_immediate_or_transform(val, 0);
2561 assert(mode != mode_b);
2563 if (get_mode_size_bits(mode) == 8) {
2564 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2565 addr.index, addr.mem, new_val);
2567 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2568 addr.index, addr.mem, new_val);
2573 set_irn_pinned(store, get_irn_pinned(node));
2574 set_ia32_op_type(store, ia32_AddrModeD);
2575 set_ia32_ls_mode(store, mode);
2577 set_address(store, &addr);
2578 SET_IA32_ORIG_NODE(store, node);
2584 * Transforms a Store.
2586 * @return the created ia32 Store node
2588 static ir_node *gen_Store(ir_node *node)
2590 ir_node *val = get_Store_value(node);
2591 ir_mode *mode = get_irn_mode(val);
2593 if (mode_is_float(mode) && is_Const(val)) {
2594 /* We can transform every floating const store
2595 into a sequence of integer stores.
2596 If the constant is already in a register,
2597 it would be better to use it, but we don't
2598 have this information here. */
2599 return gen_float_const_Store(node, val);
2601 return gen_general_Store(node);
2605 * Transforms a Switch.
2607 * @return the created ia32 SwitchJmp node
2609 static ir_node *create_Switch(ir_node *node)
2611 dbg_info *dbgi = get_irn_dbg_info(node);
2612 ir_node *block = be_transform_node(get_nodes_block(node));
2613 ir_node *sel = get_Cond_selector(node);
2614 ir_node *new_sel = be_transform_node(sel);
2615 long switch_min = LONG_MAX;
2616 long switch_max = LONG_MIN;
2617 long default_pn = get_Cond_default_proj(node);
2619 const ir_edge_t *edge;
2621 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2623 /* determine the smallest switch case value */
2624 foreach_out_edge(node, edge) {
2625 ir_node *proj = get_edge_src_irn(edge);
2626 long pn = get_Proj_proj(proj);
2627 if (pn == default_pn)
2630 if (pn < switch_min)
2632 if (pn > switch_max)
2636 if ((unsigned long) (switch_max - switch_min) > 128000) {
2637 panic("Size of switch %+F bigger than 128000", node);
2640 if (switch_min != 0) {
2641 /* if smallest switch case is not 0 we need an additional sub */
2642 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg_GP);
2643 add_ia32_am_offs_int(new_sel, -switch_min);
2644 set_ia32_op_type(new_sel, ia32_AddrModeS);
2646 SET_IA32_ORIG_NODE(new_sel, node);
2649 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2650 SET_IA32_ORIG_NODE(new_node, node);
2656 * Transform a Cond node.
2658 static ir_node *gen_Cond(ir_node *node)
2660 ir_node *block = get_nodes_block(node);
2661 ir_node *new_block = be_transform_node(block);
2662 dbg_info *dbgi = get_irn_dbg_info(node);
2663 ir_node *sel = get_Cond_selector(node);
2664 ir_mode *sel_mode = get_irn_mode(sel);
2665 ir_node *flags = NULL;
2669 if (sel_mode != mode_b) {
2670 return create_Switch(node);
2673 /* we get flags from a Cmp */
2674 flags = get_flags_node(sel, &pnc);
2676 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2677 SET_IA32_ORIG_NODE(new_node, node);
2683 * Transform a be_Copy.
2685 static ir_node *gen_be_Copy(ir_node *node)
2687 ir_node *new_node = be_duplicate_node(node);
2688 ir_mode *mode = get_irn_mode(new_node);
2690 if (ia32_mode_needs_gp_reg(mode)) {
2691 set_irn_mode(new_node, mode_Iu);
2697 static ir_node *create_Fucom(ir_node *node)
2699 dbg_info *dbgi = get_irn_dbg_info(node);
2700 ir_node *block = get_nodes_block(node);
2701 ir_node *new_block = be_transform_node(block);
2702 ir_node *left = get_Cmp_left(node);
2703 ir_node *new_left = be_transform_node(left);
2704 ir_node *right = get_Cmp_right(node);
2708 if (ia32_cg_config.use_fucomi) {
2709 new_right = be_transform_node(right);
2710 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2712 set_ia32_commutative(new_node);
2713 SET_IA32_ORIG_NODE(new_node, node);
2715 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2716 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2718 new_right = be_transform_node(right);
2719 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2722 set_ia32_commutative(new_node);
2724 SET_IA32_ORIG_NODE(new_node, node);
2726 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2727 SET_IA32_ORIG_NODE(new_node, node);
2733 static ir_node *create_Ucomi(ir_node *node)
2735 dbg_info *dbgi = get_irn_dbg_info(node);
2736 ir_node *src_block = get_nodes_block(node);
2737 ir_node *new_block = be_transform_node(src_block);
2738 ir_node *left = get_Cmp_left(node);
2739 ir_node *right = get_Cmp_right(node);
2741 ia32_address_mode_t am;
2742 ia32_address_t *addr = &am.addr;
2744 match_arguments(&am, src_block, left, right, NULL,
2745 match_commutative | match_am);
2747 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2748 addr->mem, am.new_op1, am.new_op2,
2750 set_am_attributes(new_node, &am);
2752 SET_IA32_ORIG_NODE(new_node, node);
2754 new_node = fix_mem_proj(new_node, &am);
2760 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2761 * to fold an and into a test node
2763 static bool can_fold_test_and(ir_node *node)
2765 const ir_edge_t *edge;
2767 /** we can only have eq and lg projs */
2768 foreach_out_edge(node, edge) {
2769 ir_node *proj = get_edge_src_irn(edge);
2770 pn_Cmp pnc = get_Proj_proj(proj);
2771 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2779 * returns true if it is assured, that the upper bits of a node are "clean"
2780 * which means for a 16 or 8 bit value, that the upper bits in the register
2781 * are 0 for unsigned and a copy of the last significant bit for signed
2784 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2786 assert(ia32_mode_needs_gp_reg(mode));
2787 if (get_mode_size_bits(mode) >= 32)
2790 if (is_Proj(transformed_node))
2791 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2793 switch (get_ia32_irn_opcode(transformed_node)) {
2794 case iro_ia32_Conv_I2I:
2795 case iro_ia32_Conv_I2I8Bit: {
2796 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2797 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2799 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2806 if (mode_is_signed(mode)) {
2807 return false; /* TODO handle signed modes */
2809 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2810 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2811 const ia32_immediate_attr_t *attr
2812 = get_ia32_immediate_attr_const(right);
2813 if (attr->symconst == 0 &&
2814 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2818 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2822 /* TODO too conservative if shift amount is constant */
2823 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2826 if (!mode_is_signed(mode)) {
2828 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2829 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2831 /* TODO if one is known to be zero extended, then || is sufficient */
2836 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2837 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2839 case iro_ia32_Const:
2840 case iro_ia32_Immediate: {
2841 const ia32_immediate_attr_t *attr =
2842 get_ia32_immediate_attr_const(transformed_node);
2843 if (mode_is_signed(mode)) {
2844 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2845 return shifted == 0 || shifted == -1;
2847 unsigned long shifted = (unsigned long)attr->offset;
2848 shifted >>= get_mode_size_bits(mode);
2849 return shifted == 0;
2859 * Generate code for a Cmp.
2861 static ir_node *gen_Cmp(ir_node *node)
2863 dbg_info *dbgi = get_irn_dbg_info(node);
2864 ir_node *block = get_nodes_block(node);
2865 ir_node *new_block = be_transform_node(block);
2866 ir_node *left = get_Cmp_left(node);
2867 ir_node *right = get_Cmp_right(node);
2868 ir_mode *cmp_mode = get_irn_mode(left);
2870 ia32_address_mode_t am;
2871 ia32_address_t *addr = &am.addr;
2874 if (mode_is_float(cmp_mode)) {
2875 if (ia32_cg_config.use_sse2) {
2876 return create_Ucomi(node);
2878 return create_Fucom(node);
2882 assert(ia32_mode_needs_gp_reg(cmp_mode));
2884 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2885 cmp_unsigned = !mode_is_signed(cmp_mode);
2886 if (is_Const_0(right) &&
2888 get_irn_n_edges(left) == 1 &&
2889 can_fold_test_and(node)) {
2890 /* Test(and_left, and_right) */
2891 ir_node *and_left = get_And_left(left);
2892 ir_node *and_right = get_And_right(left);
2894 /* matze: code here used mode instead of cmd_mode, I think it is always
2895 * the same as cmp_mode, but I leave this here to see if this is really
2898 assert(get_irn_mode(and_left) == cmp_mode);
2900 match_arguments(&am, block, and_left, and_right, NULL,
2902 match_am | match_8bit_am | match_16bit_am |
2903 match_am_and_immediates | match_immediate);
2905 /* use 32bit compare mode if possible since the opcode is smaller */
2906 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2907 upper_bits_clean(am.new_op2, cmp_mode)) {
2908 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2911 if (get_mode_size_bits(cmp_mode) == 8) {
2912 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2913 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2916 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2917 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2920 /* Cmp(left, right) */
2921 match_arguments(&am, block, left, right, NULL,
2922 match_commutative | match_am | match_8bit_am |
2923 match_16bit_am | match_am_and_immediates |
2925 /* use 32bit compare mode if possible since the opcode is smaller */
2926 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2927 upper_bits_clean(am.new_op2, cmp_mode)) {
2928 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2931 if (get_mode_size_bits(cmp_mode) == 8) {
2932 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2933 addr->index, addr->mem, am.new_op1,
2934 am.new_op2, am.ins_permuted,
2937 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2938 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2941 set_am_attributes(new_node, &am);
2942 set_ia32_ls_mode(new_node, cmp_mode);
2944 SET_IA32_ORIG_NODE(new_node, node);
2946 new_node = fix_mem_proj(new_node, &am);
2951 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2954 dbg_info *dbgi = get_irn_dbg_info(node);
2955 ir_node *block = get_nodes_block(node);
2956 ir_node *new_block = be_transform_node(block);
2957 ir_node *val_true = get_Mux_true(node);
2958 ir_node *val_false = get_Mux_false(node);
2960 ia32_address_mode_t am;
2961 ia32_address_t *addr;
2963 assert(ia32_cg_config.use_cmov);
2964 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2968 match_arguments(&am, block, val_false, val_true, flags,
2969 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2971 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2972 addr->mem, am.new_op1, am.new_op2, new_flags,
2973 am.ins_permuted, pnc);
2974 set_am_attributes(new_node, &am);
2976 SET_IA32_ORIG_NODE(new_node, node);
2978 new_node = fix_mem_proj(new_node, &am);
2984 * Creates a ia32 Setcc instruction.
2986 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2987 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2990 ir_mode *mode = get_irn_mode(orig_node);
2993 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2994 SET_IA32_ORIG_NODE(new_node, orig_node);
2996 /* we might need to conv the result up */
2997 if (get_mode_size_bits(mode) > 8) {
2998 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
2999 nomem, new_node, mode_Bu);
3000 SET_IA32_ORIG_NODE(new_node, orig_node);
3007 * Create instruction for an unsigned Difference or Zero.
3009 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
3011 ir_mode *mode = get_irn_mode(psi);
3012 ir_node *new_node, *sub, *sbb, *eflags, *block;
3016 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
3017 match_mode_neutral | match_am | match_immediate | match_two_users);
3019 block = get_nodes_block(new_node);
3021 if (is_Proj(new_node)) {
3022 sub = get_Proj_pred(new_node);
3023 assert(is_ia32_Sub(sub));
3026 set_irn_mode(sub, mode_T);
3027 new_node = new_rd_Proj(NULL, block, sub, mode, pn_ia32_res);
3029 eflags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3031 dbgi = get_irn_dbg_info(psi);
3032 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
3034 new_node = new_bd_ia32_And(dbgi, block, noreg_GP, noreg_GP, nomem, new_node, sbb);
3035 set_ia32_commutative(new_node);
3040 * Create an const array of two float consts.
3042 * @param c0 the first constant
3043 * @param c1 the second constant
3044 * @param new_mode IN/OUT for the mode of the constants, if NULL
3045 * smallest possible mode will be used
3047 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3049 ir_mode *mode = *new_mode;
3051 ir_initializer_t *initializer;
3052 tarval *tv0 = get_Const_tarval(c0);
3053 tarval *tv1 = get_Const_tarval(c1);
3056 /* detect the best mode for the constants */
3057 mode = get_tarval_mode(tv0);
3059 if (mode != mode_F) {
3060 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3061 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3063 tv0 = tarval_convert_to(tv0, mode);
3064 tv1 = tarval_convert_to(tv1, mode);
3065 } else if (mode != mode_D) {
3066 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3067 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3069 tv0 = tarval_convert_to(tv0, mode);
3070 tv1 = tarval_convert_to(tv1, mode);
3077 tp = ia32_create_float_type(mode, 4);
3078 tp = ia32_create_float_array(tp);
3080 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3082 set_entity_ld_ident(ent, get_entity_ident(ent));
3083 set_entity_visibility(ent, visibility_local);
3084 set_entity_variability(ent, variability_constant);
3085 set_entity_allocation(ent, allocation_static);
3087 initializer = create_initializer_compound(2);
3089 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3090 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3092 set_entity_initializer(ent, initializer);
3099 * Transforms a Mux node into some code sequence.
3101 * @return The transformed node.
3103 static ir_node *gen_Mux(ir_node *node)
3105 dbg_info *dbgi = get_irn_dbg_info(node);
3106 ir_node *block = get_nodes_block(node);
3107 ir_node *new_block = be_transform_node(block);
3108 ir_node *mux_true = get_Mux_true(node);
3109 ir_node *mux_false = get_Mux_false(node);
3110 ir_node *cond = get_Mux_sel(node);
3111 ir_mode *mode = get_irn_mode(node);
3116 assert(get_irn_mode(cond) == mode_b);
3118 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3119 if (mode_is_float(mode)) {
3120 ir_node *cmp = get_Proj_pred(cond);
3121 ir_node *cmp_left = get_Cmp_left(cmp);
3122 ir_node *cmp_right = get_Cmp_right(cmp);
3123 pn_Cmp pnc = get_Proj_proj(cond);
3125 if (ia32_cg_config.use_sse2) {
3126 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3127 if (cmp_left == mux_true && cmp_right == mux_false) {
3128 /* Mux(a <= b, a, b) => MIN */
3129 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3130 match_commutative | match_am | match_two_users);
3131 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3132 /* Mux(a <= b, b, a) => MAX */
3133 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3134 match_commutative | match_am | match_two_users);
3136 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3137 if (cmp_left == mux_true && cmp_right == mux_false) {
3138 /* Mux(a >= b, a, b) => MAX */
3139 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3140 match_commutative | match_am | match_two_users);
3141 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3142 /* Mux(a >= b, b, a) => MIN */
3143 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3144 match_commutative | match_am | match_two_users);
3148 if (is_Const(mux_true) && is_Const(mux_false)) {
3149 ia32_address_mode_t am;
3154 flags = get_flags_node(cond, &pnc);
3155 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_permuted=*/0);
3157 if (ia32_cg_config.use_sse2) {
3158 /* cannot load from different mode on SSE */
3161 /* x87 can load any mode */
3165 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3167 switch (get_mode_size_bytes(new_mode)) {
3177 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3178 set_ia32_am_scale(new_node, 2);
3183 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3184 set_ia32_am_scale(new_node, 1);
3187 /* arg, shift 16 NOT supported */
3189 new_node = new_bd_ia32_Add(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_node, new_node);
3192 panic("Unsupported constant size");
3195 am.ls_mode = new_mode;
3196 am.addr.base = noreg_GP;
3197 am.addr.index = new_node;
3198 am.addr.mem = nomem;
3200 am.addr.scale = scale;
3201 am.addr.use_frame = 0;
3202 am.addr.frame_entity = NULL;
3203 am.addr.symconst_sign = 0;
3204 am.mem_proj = am.addr.mem;
3205 am.op_type = ia32_AddrModeS;
3208 am.pinned = op_pin_state_floats;
3210 am.ins_permuted = 0;
3212 if (ia32_cg_config.use_sse2)
3213 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3215 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3216 set_am_attributes(load, &am);
3218 return new_rd_Proj(NULL, block, load, mode_vfp, pn_ia32_res);
3220 panic("cannot transform floating point Mux");
3223 assert(ia32_mode_needs_gp_reg(mode));
3225 if (is_Proj(cond)) {
3226 ir_node *cmp = get_Proj_pred(cond);
3228 ir_node *cmp_left = get_Cmp_left(cmp);
3229 ir_node *cmp_right = get_Cmp_right(cmp);
3230 pn_Cmp pnc = get_Proj_proj(cond);
3232 /* check for unsigned Doz first */
3233 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3234 is_Const_0(mux_false) && is_Sub(mux_true) &&
3235 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3236 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3237 return create_Doz(node, cmp_left, cmp_right);
3238 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3239 is_Const_0(mux_true) && is_Sub(mux_false) &&
3240 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3241 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3242 return create_Doz(node, cmp_left, cmp_right);
3247 flags = get_flags_node(cond, &pnc);
3249 if (is_Const(mux_true) && is_Const(mux_false)) {
3250 /* both are const, good */
3251 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3252 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3253 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3254 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3256 /* Not that simple. */
3261 new_node = create_CMov(node, cond, flags, pnc);
3269 * Create a conversion from x87 state register to general purpose.
3271 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3273 ir_node *block = be_transform_node(get_nodes_block(node));
3274 ir_node *op = get_Conv_op(node);
3275 ir_node *new_op = be_transform_node(op);
3276 ir_graph *irg = current_ir_graph;
3277 dbg_info *dbgi = get_irn_dbg_info(node);
3278 ir_mode *mode = get_irn_mode(node);
3279 ir_node *fist, *load, *mem;
3281 mem = gen_vfist(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op, &fist);
3282 set_irn_pinned(fist, op_pin_state_floats);
3283 set_ia32_use_frame(fist);
3284 set_ia32_op_type(fist, ia32_AddrModeD);
3286 assert(get_mode_size_bits(mode) <= 32);
3287 /* exception we can only store signed 32 bit integers, so for unsigned
3288 we store a 64bit (signed) integer and load the lower bits */
3289 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3290 set_ia32_ls_mode(fist, mode_Ls);
3292 set_ia32_ls_mode(fist, mode_Is);
3294 SET_IA32_ORIG_NODE(fist, node);
3297 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg_GP, mem);
3299 set_irn_pinned(load, op_pin_state_floats);
3300 set_ia32_use_frame(load);
3301 set_ia32_op_type(load, ia32_AddrModeS);
3302 set_ia32_ls_mode(load, mode_Is);
3303 if (get_ia32_ls_mode(fist) == mode_Ls) {
3304 ia32_attr_t *attr = get_ia32_attr(load);
3305 attr->data.need_64bit_stackent = 1;
3307 ia32_attr_t *attr = get_ia32_attr(load);
3308 attr->data.need_32bit_stackent = 1;
3310 SET_IA32_ORIG_NODE(load, node);
3312 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
3316 * Creates a x87 strict Conv by placing a Store and a Load
3318 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3320 ir_node *block = get_nodes_block(node);
3321 ir_graph *irg = get_Block_irg(block);
3322 dbg_info *dbgi = get_irn_dbg_info(node);
3323 ir_node *frame = get_irg_frame(irg);
3324 ir_node *store, *load;
3327 store = new_bd_ia32_vfst(dbgi, block, frame, noreg_GP, nomem, node, tgt_mode);
3328 set_ia32_use_frame(store);
3329 set_ia32_op_type(store, ia32_AddrModeD);
3330 SET_IA32_ORIG_NODE(store, node);
3332 load = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, store, tgt_mode);
3333 set_ia32_use_frame(load);
3334 set_ia32_op_type(load, ia32_AddrModeS);
3335 SET_IA32_ORIG_NODE(load, node);
3337 new_node = new_r_Proj(block, load, mode_E, pn_ia32_vfld_res);
3341 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3342 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3344 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3346 func = get_mode_size_bits(mode) == 8 ?
3347 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3348 return func(dbgi, block, base, index, mem, val, mode);
3352 * Create a conversion from general purpose to x87 register
3354 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3356 ir_node *src_block = get_nodes_block(node);
3357 ir_node *block = be_transform_node(src_block);
3358 ir_graph *irg = get_Block_irg(block);
3359 dbg_info *dbgi = get_irn_dbg_info(node);
3360 ir_node *op = get_Conv_op(node);
3361 ir_node *new_op = NULL;
3363 ir_mode *store_mode;
3368 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3369 if (possible_int_mode_for_fp(src_mode)) {
3370 ia32_address_mode_t am;
3372 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3373 if (am.op_type == ia32_AddrModeS) {
3374 ia32_address_t *addr = &am.addr;
3376 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index, addr->mem);
3377 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3379 set_am_attributes(fild, &am);
3380 SET_IA32_ORIG_NODE(fild, node);
3382 fix_mem_proj(fild, &am);
3387 if (new_op == NULL) {
3388 new_op = be_transform_node(op);
3391 mode = get_irn_mode(op);
3393 /* first convert to 32 bit signed if necessary */
3394 if (get_mode_size_bits(src_mode) < 32) {
3395 if (!upper_bits_clean(new_op, src_mode)) {
3396 new_op = create_Conv_I2I(dbgi, block, noreg_GP, noreg_GP, nomem, new_op, src_mode);
3397 SET_IA32_ORIG_NODE(new_op, node);
3402 assert(get_mode_size_bits(mode) == 32);
3405 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg_GP, nomem, new_op);
3407 set_ia32_use_frame(store);
3408 set_ia32_op_type(store, ia32_AddrModeD);
3409 set_ia32_ls_mode(store, mode_Iu);
3411 /* exception for 32bit unsigned, do a 64bit spill+load */
3412 if (!mode_is_signed(mode)) {
3415 ir_node *zero_const = ia32_create_Immediate(NULL, 0, 0);
3417 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3418 noreg_GP, nomem, zero_const);
3420 set_ia32_use_frame(zero_store);
3421 set_ia32_op_type(zero_store, ia32_AddrModeD);
3422 add_ia32_am_offs_int(zero_store, 4);
3423 set_ia32_ls_mode(zero_store, mode_Iu);
3428 store = new_rd_Sync(dbgi, block, 2, in);
3429 store_mode = mode_Ls;
3431 store_mode = mode_Is;
3435 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg_GP, store);
3437 set_ia32_use_frame(fild);
3438 set_ia32_op_type(fild, ia32_AddrModeS);
3439 set_ia32_ls_mode(fild, store_mode);
3441 new_node = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
3447 * Create a conversion from one integer mode into another one
3449 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3450 dbg_info *dbgi, ir_node *block, ir_node *op,
3453 ir_node *new_block = be_transform_node(block);
3455 ir_mode *smaller_mode;
3456 ia32_address_mode_t am;
3457 ia32_address_t *addr = &am.addr;
3460 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3461 smaller_mode = src_mode;
3463 smaller_mode = tgt_mode;
3466 #ifdef DEBUG_libfirm
3468 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3473 match_arguments(&am, block, NULL, op, NULL,
3474 match_am | match_8bit_am | match_16bit_am);
3476 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3477 /* unnecessary conv. in theory it shouldn't have been AM */
3478 assert(is_ia32_NoReg_GP(addr->base));
3479 assert(is_ia32_NoReg_GP(addr->index));
3480 assert(is_NoMem(addr->mem));
3481 assert(am.addr.offset == 0);
3482 assert(am.addr.symconst_ent == NULL);
3486 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3487 addr->mem, am.new_op2, smaller_mode);
3488 set_am_attributes(new_node, &am);
3489 /* match_arguments assume that out-mode = in-mode, this isn't true here
3491 set_ia32_ls_mode(new_node, smaller_mode);
3492 SET_IA32_ORIG_NODE(new_node, node);
3493 new_node = fix_mem_proj(new_node, &am);
3498 * Transforms a Conv node.
3500 * @return The created ia32 Conv node
3502 static ir_node *gen_Conv(ir_node *node)
3504 ir_node *block = get_nodes_block(node);
3505 ir_node *new_block = be_transform_node(block);
3506 ir_node *op = get_Conv_op(node);
3507 ir_node *new_op = NULL;
3508 dbg_info *dbgi = get_irn_dbg_info(node);
3509 ir_mode *src_mode = get_irn_mode(op);
3510 ir_mode *tgt_mode = get_irn_mode(node);
3511 int src_bits = get_mode_size_bits(src_mode);
3512 int tgt_bits = get_mode_size_bits(tgt_mode);
3513 ir_node *res = NULL;
3515 assert(!mode_is_int(src_mode) || src_bits <= 32);
3516 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3518 if (src_mode == mode_b) {
3519 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3520 /* nothing to do, we already model bools as 0/1 ints */
3521 return be_transform_node(op);
3524 if (src_mode == tgt_mode) {
3525 if (get_Conv_strict(node)) {
3526 if (ia32_cg_config.use_sse2) {
3527 /* when we are in SSE mode, we can kill all strict no-op conversion */
3528 return be_transform_node(op);
3531 /* this should be optimized already, but who knows... */
3532 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3533 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3534 return be_transform_node(op);
3538 if (mode_is_float(src_mode)) {
3539 new_op = be_transform_node(op);
3540 /* we convert from float ... */
3541 if (mode_is_float(tgt_mode)) {
3543 /* Matze: I'm a bit unsure what the following is for? seems wrong
3545 if (src_mode == mode_E && tgt_mode == mode_D
3546 && !get_Conv_strict(node)) {
3547 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3553 if (ia32_cg_config.use_sse2) {
3554 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3555 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg_GP, noreg_GP,
3557 set_ia32_ls_mode(res, tgt_mode);
3559 if (get_Conv_strict(node)) {
3560 /* if fp_no_float_fold is not set then we assume that we
3561 * don't have any float operations in a non
3562 * mode_float_arithmetic mode and can skip strict upconvs */
3563 if (src_bits < tgt_bits
3564 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3565 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3568 res = gen_x87_strict_conv(tgt_mode, new_op);
3569 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3573 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3578 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3579 if (ia32_cg_config.use_sse2) {
3580 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg_GP, noreg_GP,
3582 set_ia32_ls_mode(res, src_mode);
3584 return gen_x87_fp_to_gp(node);
3588 /* we convert from int ... */
3589 if (mode_is_float(tgt_mode)) {
3591 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3592 if (ia32_cg_config.use_sse2) {
3593 new_op = be_transform_node(op);
3594 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg_GP, noreg_GP,
3596 set_ia32_ls_mode(res, tgt_mode);
3598 unsigned int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3599 unsigned float_mantissa = tarval_ieee754_get_mantissa_size(tgt_mode);
3600 res = gen_x87_gp_to_fp(node, src_mode);
3602 /* we need a strict-Conv, if the int mode has more bits than the
3604 if (float_mantissa < int_mantissa) {
3605 res = gen_x87_strict_conv(tgt_mode, res);
3606 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3610 } else if (tgt_mode == mode_b) {
3611 /* mode_b lowering already took care that we only have 0/1 values */
3612 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3613 src_mode, tgt_mode));
3614 return be_transform_node(op);
3617 if (src_bits == tgt_bits) {
3618 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3619 src_mode, tgt_mode));
3620 return be_transform_node(op);
3623 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3631 static ir_node *create_immediate_or_transform(ir_node *node,
3632 char immediate_constraint_type)
3634 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3635 if (new_node == NULL) {
3636 new_node = be_transform_node(node);
3642 * Transforms a FrameAddr into an ia32 Add.
3644 static ir_node *gen_be_FrameAddr(ir_node *node)
3646 ir_node *block = be_transform_node(get_nodes_block(node));
3647 ir_node *op = be_get_FrameAddr_frame(node);
3648 ir_node *new_op = be_transform_node(op);
3649 dbg_info *dbgi = get_irn_dbg_info(node);
3652 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg_GP);
3653 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3654 set_ia32_use_frame(new_node);
3656 SET_IA32_ORIG_NODE(new_node, node);
3662 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3664 static ir_node *gen_be_Return(ir_node *node)
3666 ir_graph *irg = current_ir_graph;
3667 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3668 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3669 ir_entity *ent = get_irg_entity(irg);
3670 ir_type *tp = get_entity_type(ent);
3675 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3676 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3678 int pn_ret_val, pn_ret_mem, arity, i;
3680 assert(ret_val != NULL);
3681 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3682 return be_duplicate_node(node);
3685 res_type = get_method_res_type(tp, 0);
3687 if (! is_Primitive_type(res_type)) {
3688 return be_duplicate_node(node);
3691 mode = get_type_mode(res_type);
3692 if (! mode_is_float(mode)) {
3693 return be_duplicate_node(node);
3696 assert(get_method_n_ress(tp) == 1);
3698 pn_ret_val = get_Proj_proj(ret_val);
3699 pn_ret_mem = get_Proj_proj(ret_mem);
3701 /* get the Barrier */
3702 barrier = get_Proj_pred(ret_val);
3704 /* get result input of the Barrier */
3705 ret_val = get_irn_n(barrier, pn_ret_val);
3706 new_ret_val = be_transform_node(ret_val);
3708 /* get memory input of the Barrier */
3709 ret_mem = get_irn_n(barrier, pn_ret_mem);
3710 new_ret_mem = be_transform_node(ret_mem);
3712 frame = get_irg_frame(irg);
3714 dbgi = get_irn_dbg_info(barrier);
3715 block = be_transform_node(get_nodes_block(barrier));
3717 /* store xmm0 onto stack */
3718 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg_GP,
3719 new_ret_mem, new_ret_val);
3720 set_ia32_ls_mode(sse_store, mode);
3721 set_ia32_op_type(sse_store, ia32_AddrModeD);
3722 set_ia32_use_frame(sse_store);
3724 /* load into x87 register */
3725 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg_GP, sse_store, mode);
3726 set_ia32_op_type(fld, ia32_AddrModeS);
3727 set_ia32_use_frame(fld);
3729 mproj = new_r_Proj(block, fld, mode_M, pn_ia32_vfld_M);
3730 fld = new_r_Proj(block, fld, mode_vfp, pn_ia32_vfld_res);
3732 /* create a new barrier */
3733 arity = get_irn_arity(barrier);
3734 in = ALLOCAN(ir_node*, arity);
3735 for (i = 0; i < arity; ++i) {
3738 if (i == pn_ret_val) {
3740 } else if (i == pn_ret_mem) {
3743 ir_node *in = get_irn_n(barrier, i);
3744 new_in = be_transform_node(in);
3749 new_barrier = new_ir_node(dbgi, irg, block,
3750 get_irn_op(barrier), get_irn_mode(barrier),
3752 copy_node_attr(barrier, new_barrier);
3753 be_duplicate_deps(barrier, new_barrier);
3754 be_set_transformed_node(barrier, new_barrier);
3756 /* transform normally */
3757 return be_duplicate_node(node);
3761 * Transform a be_AddSP into an ia32_SubSP.
3763 static ir_node *gen_be_AddSP(ir_node *node)
3765 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3766 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3768 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3769 match_am | match_immediate);
3773 * Transform a be_SubSP into an ia32_AddSP
3775 static ir_node *gen_be_SubSP(ir_node *node)
3777 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3778 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3780 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3781 match_am | match_immediate);
3785 * Change some phi modes
3787 static ir_node *gen_Phi(ir_node *node)
3789 const arch_register_req_t *req;
3790 ir_node *block = be_transform_node(get_nodes_block(node));
3791 ir_graph *irg = current_ir_graph;
3792 dbg_info *dbgi = get_irn_dbg_info(node);
3793 ir_mode *mode = get_irn_mode(node);
3796 if (ia32_mode_needs_gp_reg(mode)) {
3797 /* we shouldn't have any 64bit stuff around anymore */
3798 assert(get_mode_size_bits(mode) <= 32);
3799 /* all integer operations are on 32bit registers now */
3801 req = ia32_reg_classes[CLASS_ia32_gp].class_req;
3802 } else if (mode_is_float(mode)) {
3803 if (ia32_cg_config.use_sse2) {
3805 req = ia32_reg_classes[CLASS_ia32_xmm].class_req;
3808 req = ia32_reg_classes[CLASS_ia32_vfp].class_req;
3811 req = arch_no_register_req;
3814 /* phi nodes allow loops, so we use the old arguments for now
3815 * and fix this later */
3816 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3817 get_irn_in(node) + 1);
3818 copy_node_attr(node, phi);
3819 be_duplicate_deps(node, phi);
3821 arch_set_out_register_req(phi, 0, req);
3823 be_enqueue_preds(node);
3828 static ir_node *gen_Jmp(ir_node *node)
3830 ir_node *block = get_nodes_block(node);
3831 ir_node *new_block = be_transform_node(block);
3832 dbg_info *dbgi = get_irn_dbg_info(node);
3835 new_node = new_bd_ia32_Jmp(dbgi, new_block);
3836 SET_IA32_ORIG_NODE(new_node, node);
3844 static ir_node *gen_IJmp(ir_node *node)
3846 ir_node *block = get_nodes_block(node);
3847 ir_node *new_block = be_transform_node(block);
3848 dbg_info *dbgi = get_irn_dbg_info(node);
3849 ir_node *op = get_IJmp_target(node);
3851 ia32_address_mode_t am;
3852 ia32_address_t *addr = &am.addr;
3854 assert(get_irn_mode(op) == mode_P);
3856 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3858 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3859 addr->mem, am.new_op2);
3860 set_am_attributes(new_node, &am);
3861 SET_IA32_ORIG_NODE(new_node, node);
3863 new_node = fix_mem_proj(new_node, &am);
3869 * Transform a Bound node.
3871 static ir_node *gen_Bound(ir_node *node)
3874 ir_node *lower = get_Bound_lower(node);
3875 dbg_info *dbgi = get_irn_dbg_info(node);
3877 if (is_Const_0(lower)) {
3878 /* typical case for Java */
3879 ir_node *sub, *res, *flags, *block;
3881 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3882 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3884 block = get_nodes_block(res);
3885 if (! is_Proj(res)) {
3887 set_irn_mode(sub, mode_T);
3888 res = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_res);
3890 sub = get_Proj_pred(res);
3892 flags = new_rd_Proj(NULL, block, sub, mode_Iu, pn_ia32_Sub_flags);
3893 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3894 SET_IA32_ORIG_NODE(new_node, node);
3896 panic("generic Bound not supported in ia32 Backend");
3902 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3904 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3905 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3907 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3908 match_immediate | match_mode_neutral);
3911 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3913 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3914 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3915 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3919 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3921 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3922 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3923 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3927 static ir_node *gen_ia32_l_Add(ir_node *node)
3929 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3930 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3931 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3932 match_commutative | match_am | match_immediate |
3933 match_mode_neutral);
3935 if (is_Proj(lowered)) {
3936 lowered = get_Proj_pred(lowered);
3938 assert(is_ia32_Add(lowered));
3939 set_irn_mode(lowered, mode_T);
3945 static ir_node *gen_ia32_l_Adc(ir_node *node)
3947 return gen_binop_flags(node, new_bd_ia32_Adc,
3948 match_commutative | match_am | match_immediate |
3949 match_mode_neutral);
3953 * Transforms a l_MulS into a "real" MulS node.
3955 * @return the created ia32 Mul node
3957 static ir_node *gen_ia32_l_Mul(ir_node *node)
3959 ir_node *left = get_binop_left(node);
3960 ir_node *right = get_binop_right(node);
3962 return gen_binop(node, left, right, new_bd_ia32_Mul,
3963 match_commutative | match_am | match_mode_neutral);
3967 * Transforms a l_IMulS into a "real" IMul1OPS node.
3969 * @return the created ia32 IMul1OP node
3971 static ir_node *gen_ia32_l_IMul(ir_node *node)
3973 ir_node *left = get_binop_left(node);
3974 ir_node *right = get_binop_right(node);
3976 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3977 match_commutative | match_am | match_mode_neutral);
3980 static ir_node *gen_ia32_l_Sub(ir_node *node)
3982 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3983 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3984 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3985 match_am | match_immediate | match_mode_neutral);
3987 if (is_Proj(lowered)) {
3988 lowered = get_Proj_pred(lowered);
3990 assert(is_ia32_Sub(lowered));
3991 set_irn_mode(lowered, mode_T);
3997 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3999 return gen_binop_flags(node, new_bd_ia32_Sbb,
4000 match_am | match_immediate | match_mode_neutral);
4004 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4005 * op1 - target to be shifted
4006 * op2 - contains bits to be shifted into target
4008 * Only op3 can be an immediate.
4010 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4011 ir_node *low, ir_node *count)
4013 ir_node *block = get_nodes_block(node);
4014 ir_node *new_block = be_transform_node(block);
4015 dbg_info *dbgi = get_irn_dbg_info(node);
4016 ir_node *new_high = be_transform_node(high);
4017 ir_node *new_low = be_transform_node(low);
4021 /* the shift amount can be any mode that is bigger than 5 bits, since all
4022 * other bits are ignored anyway */
4023 while (is_Conv(count) &&
4024 get_irn_n_edges(count) == 1 &&
4025 mode_is_int(get_irn_mode(count))) {
4026 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4027 count = get_Conv_op(count);
4029 new_count = create_immediate_or_transform(count, 0);
4031 if (is_ia32_l_ShlD(node)) {
4032 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4035 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4038 SET_IA32_ORIG_NODE(new_node, node);
4043 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4045 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4046 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4047 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4048 return gen_lowered_64bit_shifts(node, high, low, count);
4051 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4053 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4054 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4055 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4056 return gen_lowered_64bit_shifts(node, high, low, count);
4059 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4061 ir_node *src_block = get_nodes_block(node);
4062 ir_node *block = be_transform_node(src_block);
4063 ir_graph *irg = current_ir_graph;
4064 dbg_info *dbgi = get_irn_dbg_info(node);
4065 ir_node *frame = get_irg_frame(irg);
4066 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4067 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4068 ir_node *new_val_low = be_transform_node(val_low);
4069 ir_node *new_val_high = be_transform_node(val_high);
4071 ir_node *sync, *fild, *res;
4072 ir_node *store_low, *store_high;
4074 if (ia32_cg_config.use_sse2) {
4075 panic("ia32_l_LLtoFloat not implemented for SSE2");
4079 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4081 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg_GP, nomem,
4083 SET_IA32_ORIG_NODE(store_low, node);
4084 SET_IA32_ORIG_NODE(store_high, node);
4086 set_ia32_use_frame(store_low);
4087 set_ia32_use_frame(store_high);
4088 set_ia32_op_type(store_low, ia32_AddrModeD);
4089 set_ia32_op_type(store_high, ia32_AddrModeD);
4090 set_ia32_ls_mode(store_low, mode_Iu);
4091 set_ia32_ls_mode(store_high, mode_Is);
4092 add_ia32_am_offs_int(store_high, 4);
4096 sync = new_rd_Sync(dbgi, block, 2, in);
4099 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg_GP, sync);
4101 set_ia32_use_frame(fild);
4102 set_ia32_op_type(fild, ia32_AddrModeS);
4103 set_ia32_ls_mode(fild, mode_Ls);
4105 SET_IA32_ORIG_NODE(fild, node);
4107 res = new_r_Proj(block, fild, mode_vfp, pn_ia32_vfild_res);
4109 if (! mode_is_signed(get_irn_mode(val_high))) {
4110 ia32_address_mode_t am;
4112 ir_node *count = ia32_create_Immediate(NULL, 0, 31);
4115 am.addr.base = noreg_GP;
4116 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4117 am.addr.mem = nomem;
4120 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4121 am.addr.use_frame = 0;
4122 am.addr.frame_entity = NULL;
4123 am.addr.symconst_sign = 0;
4124 am.ls_mode = mode_F;
4125 am.mem_proj = nomem;
4126 am.op_type = ia32_AddrModeS;
4128 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4129 am.pinned = op_pin_state_floats;
4131 am.ins_permuted = 0;
4133 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4134 am.new_op1, am.new_op2, get_fpcw());
4135 set_am_attributes(fadd, &am);
4137 set_irn_mode(fadd, mode_T);
4138 res = new_rd_Proj(NULL, block, fadd, mode_vfp, pn_ia32_res);
4143 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4145 ir_node *src_block = get_nodes_block(node);
4146 ir_node *block = be_transform_node(src_block);
4147 ir_graph *irg = get_Block_irg(block);
4148 dbg_info *dbgi = get_irn_dbg_info(node);
4149 ir_node *frame = get_irg_frame(irg);
4150 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4151 ir_node *new_val = be_transform_node(val);
4152 ir_node *fist, *mem;
4154 mem = gen_vfist(dbgi, block, frame, noreg_GP, nomem, new_val, &fist);
4155 SET_IA32_ORIG_NODE(fist, node);
4156 set_ia32_use_frame(fist);
4157 set_ia32_op_type(fist, ia32_AddrModeD);
4158 set_ia32_ls_mode(fist, mode_Ls);
4164 * the BAD transformer.
4166 static ir_node *bad_transform(ir_node *node)
4168 panic("No transform function for %+F available.", node);
4172 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4174 ir_node *block = be_transform_node(get_nodes_block(node));
4175 ir_graph *irg = get_Block_irg(block);
4176 ir_node *pred = get_Proj_pred(node);
4177 ir_node *new_pred = be_transform_node(pred);
4178 ir_node *frame = get_irg_frame(irg);
4179 dbg_info *dbgi = get_irn_dbg_info(node);
4180 long pn = get_Proj_proj(node);
4185 load = new_bd_ia32_Load(dbgi, block, frame, noreg_GP, new_pred);
4186 SET_IA32_ORIG_NODE(load, node);
4187 set_ia32_use_frame(load);
4188 set_ia32_op_type(load, ia32_AddrModeS);
4189 set_ia32_ls_mode(load, mode_Iu);
4190 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4191 * 32 bit from it with this particular load */
4192 attr = get_ia32_attr(load);
4193 attr->data.need_64bit_stackent = 1;
4195 if (pn == pn_ia32_l_FloattoLL_res_high) {
4196 add_ia32_am_offs_int(load, 4);
4198 assert(pn == pn_ia32_l_FloattoLL_res_low);
4201 proj = new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4207 * Transform the Projs of an AddSP.
4209 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4211 ir_node *block = be_transform_node(get_nodes_block(node));
4212 ir_node *pred = get_Proj_pred(node);
4213 ir_node *new_pred = be_transform_node(pred);
4214 dbg_info *dbgi = get_irn_dbg_info(node);
4215 long proj = get_Proj_proj(node);
4217 if (proj == pn_be_AddSP_sp) {
4218 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4219 pn_ia32_SubSP_stack);
4220 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4222 } else if (proj == pn_be_AddSP_res) {
4223 return new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4224 pn_ia32_SubSP_addr);
4225 } else if (proj == pn_be_AddSP_M) {
4226 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_SubSP_M);
4229 panic("No idea how to transform proj->AddSP");
4233 * Transform the Projs of a SubSP.
4235 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4237 ir_node *block = be_transform_node(get_nodes_block(node));
4238 ir_node *pred = get_Proj_pred(node);
4239 ir_node *new_pred = be_transform_node(pred);
4240 dbg_info *dbgi = get_irn_dbg_info(node);
4241 long proj = get_Proj_proj(node);
4243 if (proj == pn_be_SubSP_sp) {
4244 ir_node *res = new_rd_Proj(dbgi, block, new_pred, mode_Iu,
4245 pn_ia32_AddSP_stack);
4246 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4248 } else if (proj == pn_be_SubSP_M) {
4249 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_AddSP_M);
4252 panic("No idea how to transform proj->SubSP");
4256 * Transform and renumber the Projs from a Load.
4258 static ir_node *gen_Proj_Load(ir_node *node)
4261 ir_node *block = be_transform_node(get_nodes_block(node));
4262 ir_node *pred = get_Proj_pred(node);
4263 dbg_info *dbgi = get_irn_dbg_info(node);
4264 long proj = get_Proj_proj(node);
4266 /* loads might be part of source address mode matches, so we don't
4267 * transform the ProjMs yet (with the exception of loads whose result is
4270 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4272 ir_node *old_block = get_nodes_block(node);
4274 /* this is needed, because sometimes we have loops that are only
4275 reachable through the ProjM */
4276 be_enqueue_preds(node);
4277 /* do it in 2 steps, to silence firm verifier */
4278 res = new_rd_Proj(dbgi, old_block, pred, mode_M, pn_Load_M);
4279 set_Proj_proj(res, pn_ia32_mem);
4283 /* renumber the proj */
4284 new_pred = be_transform_node(pred);
4285 if (is_ia32_Load(new_pred)) {
4288 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Load_res);
4290 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Load_M);
4291 case pn_Load_X_regular:
4292 return new_rd_Jmp(dbgi, block);
4293 case pn_Load_X_except:
4294 /* This Load might raise an exception. Mark it. */
4295 set_ia32_exc_label(new_pred, 1);
4296 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4300 } else if (is_ia32_Conv_I2I(new_pred) ||
4301 is_ia32_Conv_I2I8Bit(new_pred)) {
4302 set_irn_mode(new_pred, mode_T);
4303 if (proj == pn_Load_res) {
4304 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_res);
4305 } else if (proj == pn_Load_M) {
4306 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_mem);
4308 } else if (is_ia32_xLoad(new_pred)) {
4311 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4313 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xLoad_M);
4314 case pn_Load_X_regular:
4315 return new_rd_Jmp(dbgi, block);
4316 case pn_Load_X_except:
4317 /* This Load might raise an exception. Mark it. */
4318 set_ia32_exc_label(new_pred, 1);
4319 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4323 } else if (is_ia32_vfld(new_pred)) {
4326 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4328 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfld_M);
4329 case pn_Load_X_regular:
4330 return new_rd_Jmp(dbgi, block);
4331 case pn_Load_X_except:
4332 /* This Load might raise an exception. Mark it. */
4333 set_ia32_exc_label(new_pred, 1);
4334 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_vfld_X_exc);
4339 /* can happen for ProJMs when source address mode happened for the
4342 /* however it should not be the result proj, as that would mean the
4343 load had multiple users and should not have been used for
4345 if (proj != pn_Load_M) {
4346 panic("internal error: transformed node not a Load");
4348 return new_rd_Proj(dbgi, block, new_pred, mode_M, 1);
4351 panic("No idea how to transform proj");
4355 * Transform and renumber the Projs from a DivMod like instruction.
4357 static ir_node *gen_Proj_DivMod(ir_node *node)
4359 ir_node *block = be_transform_node(get_nodes_block(node));
4360 ir_node *pred = get_Proj_pred(node);
4361 ir_node *new_pred = be_transform_node(pred);
4362 dbg_info *dbgi = get_irn_dbg_info(node);
4363 long proj = get_Proj_proj(node);
4365 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4367 switch (get_irn_opcode(pred)) {
4371 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4373 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4374 case pn_Div_X_regular:
4375 return new_rd_Jmp(dbgi, block);
4376 case pn_Div_X_except:
4377 set_ia32_exc_label(new_pred, 1);
4378 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4386 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4388 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4389 case pn_Mod_X_except:
4390 set_ia32_exc_label(new_pred, 1);
4391 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4399 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_Div_M);
4400 case pn_DivMod_res_div:
4401 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4402 case pn_DivMod_res_mod:
4403 return new_rd_Proj(dbgi, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4404 case pn_DivMod_X_regular:
4405 return new_rd_Jmp(dbgi, block);
4406 case pn_DivMod_X_except:
4407 set_ia32_exc_label(new_pred, 1);
4408 return new_rd_Proj(dbgi, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4417 panic("No idea how to transform proj->DivMod");
4421 * Transform and renumber the Projs from a CopyB.
4423 static ir_node *gen_Proj_CopyB(ir_node *node)
4425 ir_node *block = be_transform_node(get_nodes_block(node));
4426 ir_node *pred = get_Proj_pred(node);
4427 ir_node *new_pred = be_transform_node(pred);
4428 dbg_info *dbgi = get_irn_dbg_info(node);
4429 long proj = get_Proj_proj(node);
4432 case pn_CopyB_M_regular:
4433 if (is_ia32_CopyB_i(new_pred)) {
4434 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4435 } else if (is_ia32_CopyB(new_pred)) {
4436 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_CopyB_M);
4443 panic("No idea how to transform proj->CopyB");
4447 * Transform and renumber the Projs from a Quot.
4449 static ir_node *gen_Proj_Quot(ir_node *node)
4451 ir_node *block = be_transform_node(get_nodes_block(node));
4452 ir_node *pred = get_Proj_pred(node);
4453 ir_node *new_pred = be_transform_node(pred);
4454 dbg_info *dbgi = get_irn_dbg_info(node);
4455 long proj = get_Proj_proj(node);
4459 if (is_ia32_xDiv(new_pred)) {
4460 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_xDiv_M);
4461 } else if (is_ia32_vfdiv(new_pred)) {
4462 return new_rd_Proj(dbgi, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4466 if (is_ia32_xDiv(new_pred)) {
4467 return new_rd_Proj(dbgi, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4468 } else if (is_ia32_vfdiv(new_pred)) {
4469 return new_rd_Proj(dbgi, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4472 case pn_Quot_X_regular:
4473 case pn_Quot_X_except:
4478 panic("No idea how to transform proj->Quot");
4481 static ir_node *gen_be_Call(ir_node *node)
4483 dbg_info *const dbgi = get_irn_dbg_info(node);
4484 ir_node *const src_block = get_nodes_block(node);
4485 ir_node *const block = be_transform_node(src_block);
4486 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4487 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4488 ir_node *const sp = be_transform_node(src_sp);
4489 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4490 ia32_address_mode_t am;
4491 ia32_address_t *const addr = &am.addr;
4496 ir_node * eax = noreg_GP;
4497 ir_node * ecx = noreg_GP;
4498 ir_node * edx = noreg_GP;
4499 unsigned const pop = be_Call_get_pop(node);
4500 ir_type *const call_tp = be_Call_get_type(node);
4501 int old_no_pic_adjust;
4503 /* Run the x87 simulator if the call returns a float value */
4504 if (get_method_n_ress(call_tp) > 0) {
4505 ir_type *const res_type = get_method_res_type(call_tp, 0);
4506 ir_mode *const res_mode = get_type_mode(res_type);
4508 if (res_mode != NULL && mode_is_float(res_mode)) {
4509 env_cg->do_x87_sim = 1;
4513 /* We do not want be_Call direct calls */
4514 assert(be_Call_get_entity(node) == NULL);
4516 /* special case for PIC trampoline calls */
4517 old_no_pic_adjust = no_pic_adjust;
4518 no_pic_adjust = env_cg->birg->main_env->options->pic;
4520 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4521 match_am | match_immediate);
4523 no_pic_adjust = old_no_pic_adjust;
4525 i = get_irn_arity(node) - 1;
4526 fpcw = be_transform_node(get_irn_n(node, i--));
4527 for (; i >= be_pos_Call_first_arg; --i) {
4528 arch_register_req_t const *const req = arch_get_register_req(node, i);
4529 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4531 assert(req->type == arch_register_req_type_limited);
4532 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4534 switch (*req->limited) {
4535 case 1 << REG_EAX: assert(eax == noreg_GP); eax = reg_parm; break;
4536 case 1 << REG_ECX: assert(ecx == noreg_GP); ecx = reg_parm; break;
4537 case 1 << REG_EDX: assert(edx == noreg_GP); edx = reg_parm; break;
4538 default: panic("Invalid GP register for register parameter");
4542 mem = transform_AM_mem(block, src_ptr, src_mem, addr->mem);
4543 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4544 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4545 set_am_attributes(call, &am);
4546 call = fix_mem_proj(call, &am);
4548 if (get_irn_pinned(node) == op_pin_state_pinned)
4549 set_irn_pinned(call, op_pin_state_pinned);
4551 SET_IA32_ORIG_NODE(call, node);
4553 if (ia32_cg_config.use_sse2) {
4554 /* remember this call for post-processing */
4555 ARR_APP1(ir_node *, call_list, call);
4556 ARR_APP1(ir_type *, call_types, be_Call_get_type(node));
4563 * Transform Builtin trap
4565 static ir_node *gen_trap(ir_node *node) {
4566 dbg_info *dbgi = get_irn_dbg_info(node);
4567 ir_node *block = be_transform_node(get_nodes_block(node));
4568 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4570 return new_bd_ia32_UD2(dbgi, block, mem);
4574 * Transform Builtin debugbreak
4576 static ir_node *gen_debugbreak(ir_node *node) {
4577 dbg_info *dbgi = get_irn_dbg_info(node);
4578 ir_node *block = be_transform_node(get_nodes_block(node));
4579 ir_node *mem = be_transform_node(get_Builtin_mem(node));
4581 return new_bd_ia32_Breakpoint(dbgi, block, mem);
4585 * Transform Builtin return_address
4587 static ir_node *gen_return_address(ir_node *node) {
4588 ir_node *param = get_Builtin_param(node, 0);
4589 ir_node *frame = get_Builtin_param(node, 1);
4590 dbg_info *dbgi = get_irn_dbg_info(node);
4591 tarval *tv = get_Const_tarval(param);
4592 unsigned long value = get_tarval_long(tv);
4594 ir_node *block = be_transform_node(get_nodes_block(node));
4595 ir_node *ptr = be_transform_node(frame);
4599 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4600 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4601 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4604 /* load the return address from this frame */
4605 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4607 set_irn_pinned(load, get_irn_pinned(node));
4608 set_ia32_op_type(load, ia32_AddrModeS);
4609 set_ia32_ls_mode(load, mode_Iu);
4611 set_ia32_am_offs_int(load, 0);
4612 set_ia32_use_frame(load);
4613 set_ia32_frame_ent(load, ia32_get_return_address_entity());
4615 if (get_irn_pinned(node) == op_pin_state_floats) {
4616 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4617 && pn_ia32_vfld_res == pn_ia32_Load_res
4618 && pn_ia32_Load_res == pn_ia32_res);
4619 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4622 SET_IA32_ORIG_NODE(load, node);
4623 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4627 * Transform Builtin frame_address
4629 static ir_node *gen_frame_address(ir_node *node) {
4630 ir_node *param = get_Builtin_param(node, 0);
4631 ir_node *frame = get_Builtin_param(node, 1);
4632 dbg_info *dbgi = get_irn_dbg_info(node);
4633 tarval *tv = get_Const_tarval(param);
4634 unsigned long value = get_tarval_long(tv);
4636 ir_node *block = be_transform_node(get_nodes_block(node));
4637 ir_node *ptr = be_transform_node(frame);
4642 ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block);
4643 ir_node *res = new_bd_ia32_ProduceVal(dbgi, block);
4644 ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value);
4647 /* load the frame address from this frame */
4648 load = new_bd_ia32_Load(dbgi, block, ptr, noreg_GP, nomem);
4650 set_irn_pinned(load, get_irn_pinned(node));
4651 set_ia32_op_type(load, ia32_AddrModeS);
4652 set_ia32_ls_mode(load, mode_Iu);
4654 ent = ia32_get_frame_address_entity();
4656 set_ia32_am_offs_int(load, 0);
4657 set_ia32_use_frame(load);
4658 set_ia32_frame_ent(load, ent);
4660 /* will fail anyway, but gcc does this: */
4661 set_ia32_am_offs_int(load, 0);
4664 if (get_irn_pinned(node) == op_pin_state_floats) {
4665 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
4666 && pn_ia32_vfld_res == pn_ia32_Load_res
4667 && pn_ia32_Load_res == pn_ia32_res);
4668 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
4671 SET_IA32_ORIG_NODE(load, node);
4672 return new_r_Proj(block, load, mode_Iu, pn_ia32_Load_res);
4676 * Transform Builtin frame_address
4678 static ir_node *gen_prefetch(ir_node *node) {
4680 ir_node *ptr, *block, *mem, *base, *index;
4681 ir_node *param, *new_node;
4684 ia32_address_t addr;
4686 if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) {
4687 /* no prefetch at all, route memory */
4688 return be_transform_node(get_Builtin_mem(node));
4691 param = get_Builtin_param(node, 1);
4692 tv = get_Const_tarval(param);
4693 rw = get_tarval_long(tv);
4695 /* construct load address */
4696 memset(&addr, 0, sizeof(addr));
4697 ptr = get_Builtin_param(node, 0);
4698 ia32_create_address_mode(&addr, ptr, 0);
4705 base = be_transform_node(base);
4708 if (index == NULL) {
4711 index = be_transform_node(index);
4714 dbgi = get_irn_dbg_info(node);
4715 block = be_transform_node(get_nodes_block(node));
4716 mem = be_transform_node(get_Builtin_mem(node));
4718 if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) {
4719 /* we have 3DNow!, this was already checked above */
4720 new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem);
4721 } else if (ia32_cg_config.use_sse_prefetch) {
4722 /* note: rw == 1 is IGNORED in that case */
4723 param = get_Builtin_param(node, 2);
4724 tv = get_Const_tarval(param);
4725 locality = get_tarval_long(tv);
4727 /* SSE style prefetch */
4730 new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem);
4733 new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem);
4736 new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem);
4739 new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem);
4743 assert(ia32_cg_config.use_3dnow_prefetch);
4744 /* 3DNow! style prefetch */
4745 new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem);
4748 set_irn_pinned(new_node, get_irn_pinned(node));
4749 set_ia32_op_type(new_node, ia32_AddrModeS);
4750 set_ia32_ls_mode(new_node, mode_Bu);
4751 set_address(new_node, &addr);
4753 SET_IA32_ORIG_NODE(new_node, node);
4755 be_dep_on_frame(new_node);
4756 return new_r_Proj(block, new_node, mode_M, pn_ia32_Prefetch_M);
4760 * Transform bsf like node
4762 static ir_node *gen_unop_AM(ir_node *node, construct_binop_dest_func *func)
4764 ir_node *param = get_Builtin_param(node, 0);
4765 dbg_info *dbgi = get_irn_dbg_info(node);
4767 ir_node *block = get_nodes_block(node);
4768 ir_node *new_block = be_transform_node(block);
4770 ia32_address_mode_t am;
4771 ia32_address_t *addr = &am.addr;
4774 match_arguments(&am, block, NULL, param, NULL, match_am);
4776 cnt = func(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4777 set_am_attributes(cnt, &am);
4778 set_ia32_ls_mode(cnt, get_irn_mode(param));
4780 SET_IA32_ORIG_NODE(cnt, node);
4781 return fix_mem_proj(cnt, &am);
4785 * Transform builtin ffs.
4787 static ir_node *gen_ffs(ir_node *node)
4789 ir_node *bsf = gen_unop_AM(node, new_bd_ia32_Bsf);
4790 ir_node *real = skip_Proj(bsf);
4791 dbg_info *dbgi = get_irn_dbg_info(real);
4792 ir_node *block = get_nodes_block(real);
4793 ir_node *flag, *set, *conv, *neg, *or;
4796 if (get_irn_mode(real) != mode_T) {
4797 set_irn_mode(real, mode_T);
4798 bsf = new_r_Proj(block, real, mode_Iu, pn_ia32_res);
4801 flag = new_r_Proj(block, real, mode_b, pn_ia32_flags);
4804 set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0);
4805 SET_IA32_ORIG_NODE(set, node);
4808 conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg_GP, noreg_GP, nomem, set, mode_Bu);
4809 SET_IA32_ORIG_NODE(conv, node);
4812 neg = new_bd_ia32_Neg(dbgi, block, conv);
4815 or = new_bd_ia32_Or(dbgi, block, noreg_GP, noreg_GP, nomem, bsf, neg);
4816 set_ia32_commutative(or);
4819 return new_bd_ia32_Add(dbgi, block, noreg_GP, noreg_GP, nomem, or, ia32_create_Immediate(NULL, 0, 1));
4823 * Transform builtin clz.
4825 static ir_node *gen_clz(ir_node *node)
4827 ir_node *bsr = gen_unop_AM(node, new_bd_ia32_Bsr);
4828 ir_node *real = skip_Proj(bsr);
4829 dbg_info *dbgi = get_irn_dbg_info(real);
4830 ir_node *block = get_nodes_block(real);
4831 ir_node *imm = ia32_create_Immediate(NULL, 0, 31);
4833 return new_bd_ia32_Xor(dbgi, block, noreg_GP, noreg_GP, nomem, bsr, imm);
4837 * Transform builtin ctz.
4839 static ir_node *gen_ctz(ir_node *node)
4841 return gen_unop_AM(node, new_bd_ia32_Bsf);
4845 * Transform builtin parity.
4847 static ir_node *gen_parity(ir_node *node)
4849 ir_node *param = get_Builtin_param(node, 0);
4850 dbg_info *dbgi = get_irn_dbg_info(node);
4852 ir_node *block = get_nodes_block(node);
4854 ir_node *new_block = be_transform_node(block);
4855 ir_node *imm, *cmp, *new_node;
4857 ia32_address_mode_t am;
4858 ia32_address_t *addr = &am.addr;
4862 match_arguments(&am, block, NULL, param, NULL, match_am);
4863 imm = ia32_create_Immediate(NULL, 0, 0);
4864 cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
4865 addr->mem, imm, am.new_op2, am.ins_permuted, 0);
4866 set_am_attributes(cmp, &am);
4867 set_ia32_ls_mode(cmp, mode_Iu);
4869 SET_IA32_ORIG_NODE(cmp, node);
4871 cmp = fix_mem_proj(cmp, &am);
4874 new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0);
4875 SET_IA32_ORIG_NODE(new_node, node);
4878 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg_GP, noreg_GP,
4879 nomem, new_node, mode_Bu);
4880 SET_IA32_ORIG_NODE(new_node, node);
4885 * Transform builtin popcount
4887 static ir_node *gen_popcount(ir_node *node) {
4888 ir_node *param = get_Builtin_param(node, 0);
4889 dbg_info *dbgi = get_irn_dbg_info(node);
4891 ir_node *block = get_nodes_block(node);
4892 ir_node *new_block = be_transform_node(block);
4895 ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13;
4897 /* check for SSE4.2 or SSE4a and use the popcnt instruction */
4898 if (ia32_cg_config.use_popcnt) {
4899 ia32_address_mode_t am;
4900 ia32_address_t *addr = &am.addr;
4903 match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am);
4905 cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2);
4906 set_am_attributes(cnt, &am);
4907 set_ia32_ls_mode(cnt, get_irn_mode(param));
4909 SET_IA32_ORIG_NODE(cnt, node);
4910 return fix_mem_proj(cnt, &am);
4913 new_param = be_transform_node(param);
4915 /* do the standard popcount algo */
4917 /* m1 = x & 0x55555555 */
4918 imm = ia32_create_Immediate(NULL, 0, 0x55555555);
4919 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, new_param, imm);
4922 simm = ia32_create_Immediate(NULL, 0, 1);
4923 s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm);
4925 /* m2 = s1 & 0x55555555 */
4926 m2 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s1, imm);
4929 m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1);
4931 /* m4 = m3 & 0x33333333 */
4932 imm = ia32_create_Immediate(NULL, 0, 0x33333333);
4933 m4 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m3, imm);
4936 simm = ia32_create_Immediate(NULL, 0, 2);
4937 s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm);
4939 /* m5 = s2 & 0x33333333 */
4940 m5 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, imm);
4943 m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5);
4945 /* m7 = m6 & 0x0F0F0F0F */
4946 imm = ia32_create_Immediate(NULL, 0, 0x0F0F0F0F);
4947 m7 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m6, imm);
4950 simm = ia32_create_Immediate(NULL, 0, 4);
4951 s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm);
4953 /* m8 = s3 & 0x0F0F0F0F */
4954 m8 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, imm);
4957 m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8);
4959 /* m10 = m9 & 0x00FF00FF */
4960 imm = ia32_create_Immediate(NULL, 0, 0x00FF00FF);
4961 m10 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m9, imm);
4964 simm = ia32_create_Immediate(NULL, 0, 8);
4965 s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm);
4967 /* m11 = s4 & 0x00FF00FF */
4968 m11 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s4, imm);
4970 /* m12 = m10 + m11 */
4971 m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11);
4973 /* m13 = m12 & 0x0000FFFF */
4974 imm = ia32_create_Immediate(NULL, 0, 0x0000FFFF);
4975 m13 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, m12, imm);
4977 /* s5 = m12 >> 16 */
4978 simm = ia32_create_Immediate(NULL, 0, 16);
4979 s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm);
4981 /* res = m13 + s5 */
4982 return new_bd_ia32_Lea(dbgi, new_block, m13, s5);
4986 * Transform builtin byte swap.
4988 static ir_node *gen_bswap(ir_node *node) {
4989 ir_node *param = be_transform_node(get_Builtin_param(node, 0));
4990 dbg_info *dbgi = get_irn_dbg_info(node);
4992 ir_node *block = get_nodes_block(node);
4993 ir_node *new_block = be_transform_node(block);
4994 ir_mode *mode = get_irn_mode(param);
4995 unsigned size = get_mode_size_bits(mode);
4996 ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
5000 if (ia32_cg_config.use_i486) {
5001 /* swap available */
5002 return new_bd_ia32_Bswap(dbgi, new_block, param);
5004 s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5005 s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5007 m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
5008 m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
5010 s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
5012 m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
5013 m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
5015 s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
5016 return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
5019 /* swap16 always available */
5020 return new_bd_ia32_Bswap16(dbgi, new_block, param);
5023 panic("Invalid bswap size (%d)", size);
5028 * Transform builtin outport.
5030 static ir_node *gen_outport(ir_node *node) {
5031 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5032 ir_node *oldv = get_Builtin_param(node, 1);
5033 ir_mode *mode = get_irn_mode(oldv);
5034 ir_node *value = be_transform_node(oldv);
5035 ir_node *block = be_transform_node(get_nodes_block(node));
5036 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5037 dbg_info *dbgi = get_irn_dbg_info(node);
5039 ir_node *res = new_bd_ia32_Outport(dbgi, block, port, value, mem);
5040 set_ia32_ls_mode(res, mode);
5045 * Transform builtin inport.
5047 static ir_node *gen_inport(ir_node *node) {
5048 ir_type *tp = get_Builtin_type(node);
5049 ir_type *rstp = get_method_res_type(tp, 0);
5050 ir_mode *mode = get_type_mode(rstp);
5051 ir_node *port = create_immediate_or_transform(get_Builtin_param(node, 0), 0);
5052 ir_node *block = be_transform_node(get_nodes_block(node));
5053 ir_node *mem = be_transform_node(get_Builtin_mem(node));
5054 dbg_info *dbgi = get_irn_dbg_info(node);
5056 ir_node *res = new_bd_ia32_Inport(dbgi, block, port, mem);
5057 set_ia32_ls_mode(res, mode);
5059 /* check for missing Result Proj */
5064 * Transform a builtin inner trampoline
5066 static ir_node *gen_inner_trampoline(ir_node *node) {
5067 ir_node *ptr = get_Builtin_param(node, 0);
5068 ir_node *callee = get_Builtin_param(node, 1);
5069 ir_node *env = be_transform_node(get_Builtin_param(node, 2));
5070 ir_node *mem = get_Builtin_mem(node);
5071 ir_node *block = get_nodes_block(node);
5072 ir_node *new_block = be_transform_node(block);
5076 ir_node *trampoline;
5078 dbg_info *dbgi = get_irn_dbg_info(node);
5079 ia32_address_t addr;
5081 /* construct store address */
5082 memset(&addr, 0, sizeof(addr));
5083 ia32_create_address_mode(&addr, ptr, 0);
5085 if (addr.base == NULL) {
5086 addr.base = noreg_GP;
5088 addr.base = be_transform_node(addr.base);
5091 if (addr.index == NULL) {
5092 addr.index = noreg_GP;
5094 addr.index = be_transform_node(addr.index);
5096 addr.mem = be_transform_node(mem);
5098 /* mov ecx, <env> */
5099 val = ia32_create_Immediate(NULL, 0, 0xB9);
5100 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5101 addr.index, addr.mem, val);
5102 set_irn_pinned(store, get_irn_pinned(node));
5103 set_ia32_op_type(store, ia32_AddrModeD);
5104 set_ia32_ls_mode(store, mode_Bu);
5105 set_address(store, &addr);
5109 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5110 addr.index, addr.mem, env);
5111 set_irn_pinned(store, get_irn_pinned(node));
5112 set_ia32_op_type(store, ia32_AddrModeD);
5113 set_ia32_ls_mode(store, mode_Iu);
5114 set_address(store, &addr);
5118 /* jmp rel <callee> */
5119 val = ia32_create_Immediate(NULL, 0, 0xE9);
5120 store = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
5121 addr.index, addr.mem, val);
5122 set_irn_pinned(store, get_irn_pinned(node));
5123 set_ia32_op_type(store, ia32_AddrModeD);
5124 set_ia32_ls_mode(store, mode_Bu);
5125 set_address(store, &addr);
5129 trampoline = be_transform_node(ptr);
5131 /* the callee is typically an immediate */
5132 if (is_SymConst(callee)) {
5133 rel = new_bd_ia32_Const(dbgi, new_block, get_SymConst_entity(callee), 0, 0, -10);
5135 rel = new_bd_ia32_Lea(dbgi, new_block, be_transform_node(callee), ia32_create_Immediate(NULL, 0, -10));
5137 rel = new_bd_ia32_Sub(dbgi, new_block, noreg_GP, noreg_GP, nomem, rel, trampoline);
5139 store = new_bd_ia32_Store(dbgi, new_block, addr.base,
5140 addr.index, addr.mem, rel);
5141 set_irn_pinned(store, get_irn_pinned(node));
5142 set_ia32_op_type(store, ia32_AddrModeD);
5143 set_ia32_ls_mode(store, mode_Iu);
5144 set_address(store, &addr);
5149 return new_r_Tuple(new_block, 2, in);
5153 * Transform Builtin node.
5155 static ir_node *gen_Builtin(ir_node *node) {
5156 ir_builtin_kind kind = get_Builtin_kind(node);
5160 return gen_trap(node);
5161 case ir_bk_debugbreak:
5162 return gen_debugbreak(node);
5163 case ir_bk_return_address:
5164 return gen_return_address(node);
5165 case ir_bk_frame_address:
5166 return gen_frame_address(node);
5167 case ir_bk_prefetch:
5168 return gen_prefetch(node);
5170 return gen_ffs(node);
5172 return gen_clz(node);
5174 return gen_ctz(node);
5176 return gen_parity(node);
5177 case ir_bk_popcount:
5178 return gen_popcount(node);
5180 return gen_bswap(node);
5182 return gen_outport(node);
5184 return gen_inport(node);
5185 case ir_bk_inner_trampoline:
5186 return gen_inner_trampoline(node);
5188 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5192 * Transform Proj(Builtin) node.
5194 static ir_node *gen_Proj_Builtin(ir_node *proj) {
5195 ir_node *node = get_Proj_pred(proj);
5196 ir_node *new_node = be_transform_node(node);
5197 ir_builtin_kind kind = get_Builtin_kind(node);
5200 case ir_bk_return_address:
5201 case ir_bk_frame_address:
5206 case ir_bk_popcount:
5208 assert(get_Proj_proj(proj) == pn_Builtin_1_result);
5211 case ir_bk_debugbreak:
5212 case ir_bk_prefetch:
5214 assert(get_Proj_proj(proj) == pn_Builtin_M);
5217 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5218 return new_r_Proj(get_nodes_block(new_node),
5219 new_node, get_irn_mode(proj), pn_ia32_Inport_res);
5221 assert(get_Proj_proj(proj) == pn_Builtin_M);
5222 return new_r_Proj(get_nodes_block(new_node),
5223 new_node, mode_M, pn_ia32_Inport_M);
5225 case ir_bk_inner_trampoline:
5226 if (get_Proj_proj(proj) == pn_Builtin_1_result) {
5227 return get_Tuple_pred(new_node, 1);
5229 assert(get_Proj_proj(proj) == pn_Builtin_M);
5230 return get_Tuple_pred(new_node, 0);
5233 panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind));
5236 static ir_node *gen_be_IncSP(ir_node *node)
5238 ir_node *res = be_duplicate_node(node);
5239 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
5245 * Transform the Projs from a be_Call.
5247 static ir_node *gen_Proj_be_Call(ir_node *node)
5249 ir_node *block = be_transform_node(get_nodes_block(node));
5250 ir_node *call = get_Proj_pred(node);
5251 ir_node *new_call = be_transform_node(call);
5252 dbg_info *dbgi = get_irn_dbg_info(node);
5253 long proj = get_Proj_proj(node);
5254 ir_mode *mode = get_irn_mode(node);
5257 if (proj == pn_be_Call_M_regular) {
5258 return new_rd_Proj(dbgi, block, new_call, mode_M, n_ia32_Call_mem);
5260 /* transform call modes */
5261 if (mode_is_data(mode)) {
5262 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
5266 /* Map from be_Call to ia32_Call proj number */
5267 if (proj == pn_be_Call_sp) {
5268 proj = pn_ia32_Call_stack;
5269 } else if (proj == pn_be_Call_M_regular) {
5270 proj = pn_ia32_Call_M;
5272 arch_register_req_t const *const req = arch_get_register_req_out(node);
5273 int const n_outs = arch_irn_get_n_outs(new_call);
5276 assert(proj >= pn_be_Call_first_res);
5277 assert(req->type & arch_register_req_type_limited);
5279 for (i = 0; i < n_outs; ++i) {
5280 arch_register_req_t const *const new_req
5281 = arch_get_out_register_req(new_call, i);
5283 if (!(new_req->type & arch_register_req_type_limited) ||
5284 new_req->cls != req->cls ||
5285 *new_req->limited != *req->limited)
5294 res = new_rd_Proj(dbgi, block, new_call, mode, proj);
5296 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
5298 case pn_ia32_Call_stack:
5299 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
5302 case pn_ia32_Call_fpcw:
5303 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
5311 * Transform the Projs from a Cmp.
5313 static ir_node *gen_Proj_Cmp(ir_node *node)
5315 /* this probably means not all mode_b nodes were lowered... */
5316 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
5321 * Transform the Projs from a Bound.
5323 static ir_node *gen_Proj_Bound(ir_node *node)
5325 ir_node *new_node, *block;
5326 ir_node *pred = get_Proj_pred(node);
5328 switch (get_Proj_proj(node)) {
5330 return be_transform_node(get_Bound_mem(pred));
5331 case pn_Bound_X_regular:
5332 new_node = be_transform_node(pred);
5333 block = get_nodes_block(new_node);
5334 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_true);
5335 case pn_Bound_X_except:
5336 new_node = be_transform_node(pred);
5337 block = get_nodes_block(new_node);
5338 return new_r_Proj(block, new_node, mode_X, pn_ia32_Jcc_false);
5340 return be_transform_node(get_Bound_index(pred));
5342 panic("unsupported Proj from Bound");
5346 static ir_node *gen_Proj_ASM(ir_node *node)
5348 ir_mode *mode = get_irn_mode(node);
5349 ir_node *pred = get_Proj_pred(node);
5350 ir_node *new_pred = be_transform_node(pred);
5351 ir_node *block = get_nodes_block(new_pred);
5352 long pos = get_Proj_proj(node);
5354 if (mode == mode_M) {
5355 pos = arch_irn_get_n_outs(new_pred)-1;
5356 } else if (mode_is_int(mode) || mode_is_reference(mode)) {
5358 } else if (mode_is_float(mode)) {
5361 panic("unexpected proj mode at ASM");
5364 return new_r_Proj(block, new_pred, mode, pos);
5368 * Transform and potentially renumber Proj nodes.
5370 static ir_node *gen_Proj(ir_node *node)
5372 ir_node *pred = get_Proj_pred(node);
5375 switch (get_irn_opcode(pred)) {
5377 proj = get_Proj_proj(node);
5378 if (proj == pn_Store_M) {
5379 return be_transform_node(pred);
5381 panic("No idea how to transform proj->Store");
5384 return gen_Proj_Load(node);
5386 return gen_Proj_ASM(node);
5388 return gen_Proj_Builtin(node);
5392 return gen_Proj_DivMod(node);
5394 return gen_Proj_CopyB(node);
5396 return gen_Proj_Quot(node);
5398 return gen_Proj_be_SubSP(node);
5400 return gen_Proj_be_AddSP(node);
5402 return gen_Proj_be_Call(node);
5404 return gen_Proj_Cmp(node);
5406 return gen_Proj_Bound(node);
5408 proj = get_Proj_proj(node);
5410 case pn_Start_X_initial_exec: {
5411 ir_node *block = get_nodes_block(pred);
5412 ir_node *new_block = be_transform_node(block);
5413 dbg_info *dbgi = get_irn_dbg_info(node);
5414 /* we exchange the ProjX with a jump */
5415 ir_node *jump = new_rd_Jmp(dbgi, new_block);
5420 case pn_Start_P_tls:
5421 return gen_Proj_tls(node);
5426 if (is_ia32_l_FloattoLL(pred)) {
5427 return gen_Proj_l_FloattoLL(node);
5429 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5433 ir_mode *mode = get_irn_mode(node);
5434 if (ia32_mode_needs_gp_reg(mode)) {
5435 ir_node *new_pred = be_transform_node(pred);
5436 ir_node *block = be_transform_node(get_nodes_block(node));
5437 ir_node *new_proj = new_r_Proj(block, new_pred,
5438 mode_Iu, get_Proj_proj(node));
5439 new_proj->node_nr = node->node_nr;
5444 return be_duplicate_node(node);
5448 * Enters all transform functions into the generic pointer
5450 static void register_transformers(void)
5452 /* first clear the generic function pointer for all ops */
5453 clear_irp_opcodes_generic_func();
5455 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5456 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5496 /* transform ops from intrinsic lowering */
5508 GEN(ia32_l_LLtoFloat);
5509 GEN(ia32_l_FloattoLL);
5515 /* we should never see these nodes */
5530 /* handle builtins */
5533 /* handle generic backend nodes */
5547 * Pre-transform all unknown and noreg nodes.
5549 static void ia32_pretransform_node(void)
5551 ia32_code_gen_t *cg = env_cg;
5553 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5554 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5555 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5556 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5557 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5558 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5560 nomem = get_irg_no_mem(current_ir_graph);
5561 noreg_GP = ia32_new_NoReg_gp(cg);
5567 * Walker, checks if all ia32 nodes producing more than one result have their
5568 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
5570 static void add_missing_keep_walker(ir_node *node, void *data)
5573 unsigned found_projs = 0;
5574 const ir_edge_t *edge;
5575 ir_mode *mode = get_irn_mode(node);
5580 if (!is_ia32_irn(node))
5583 n_outs = arch_irn_get_n_outs(node);
5586 if (is_ia32_SwitchJmp(node))
5589 assert(n_outs < (int) sizeof(unsigned) * 8);
5590 foreach_out_edge(node, edge) {
5591 ir_node *proj = get_edge_src_irn(edge);
5594 /* The node could be kept */
5598 if (get_irn_mode(proj) == mode_M)
5601 pn = get_Proj_proj(proj);
5602 assert(pn < n_outs);
5603 found_projs |= 1 << pn;
5607 /* are keeps missing? */
5609 for (i = 0; i < n_outs; ++i) {
5612 const arch_register_req_t *req;
5613 const arch_register_class_t *cls;
5615 if (found_projs & (1 << i)) {
5619 req = arch_get_out_register_req(node, i);
5624 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
5628 block = get_nodes_block(node);
5629 in[0] = new_r_Proj(block, node, arch_register_class_mode(cls), i);
5630 if (last_keep != NULL) {
5631 be_Keep_add_node(last_keep, cls, in[0]);
5633 last_keep = be_new_Keep(block, 1, in);
5634 if (sched_is_scheduled(node)) {
5635 sched_add_after(node, last_keep);
5642 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5645 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5647 ir_graph *irg = be_get_birg_irg(cg->birg);
5648 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5652 * Post-process all calls if we are in SSE mode.
5653 * The ABI requires that the results are in st0, copy them
5654 * to a xmm register.
5656 static void postprocess_fp_call_results(void) {
5659 for (i = ARR_LEN(call_list) - 1; i >= 0; --i) {
5660 ir_node *call = call_list[i];
5661 ir_type *mtp = call_types[i];
5664 for (j = get_method_n_ress(mtp) - 1; j >= 0; --j) {
5665 ir_type *res_tp = get_method_res_type(mtp, j);
5666 ir_node *res, *new_res;
5667 const ir_edge_t *edge, *next;
5670 if (! is_atomic_type(res_tp)) {
5671 /* no floating point return */
5674 mode = get_type_mode(res_tp);
5675 if (! mode_is_float(mode)) {
5676 /* no floating point return */
5680 res = be_get_Proj_for_pn(call, pn_ia32_Call_vf0 + j);
5683 /* now patch the users */
5684 foreach_out_edge_safe(res, edge, next) {
5685 ir_node *succ = get_edge_src_irn(edge);
5688 if (be_is_Keep(succ))
5691 if (is_ia32_xStore(succ)) {
5692 /* an xStore can be patched into an vfst */
5693 dbg_info *db = get_irn_dbg_info(succ);
5694 ir_node *block = get_nodes_block(succ);
5695 ir_node *base = get_irn_n(succ, n_ia32_xStore_base);
5696 ir_node *index = get_irn_n(succ, n_ia32_xStore_index);
5697 ir_node *mem = get_irn_n(succ, n_ia32_xStore_mem);
5698 ir_node *value = get_irn_n(succ, n_ia32_xStore_val);
5699 ir_mode *mode = get_ia32_ls_mode(succ);
5701 ir_node *st = new_bd_ia32_vfst(db, block, base, index, mem, value, mode);
5702 set_ia32_am_offs_int(st, get_ia32_am_offs_int(succ));
5703 if (is_ia32_use_frame(succ))
5704 set_ia32_use_frame(st);
5705 set_ia32_frame_ent(st, get_ia32_frame_ent(succ));
5706 set_irn_pinned(st, get_irn_pinned(succ));
5707 set_ia32_op_type(st, ia32_AddrModeD);
5711 if (new_res == NULL) {
5712 dbg_info *db = get_irn_dbg_info(call);
5713 ir_node *block = get_nodes_block(call);
5714 ir_node *frame = get_irg_frame(current_ir_graph);
5715 ir_node *old_mem = be_get_Proj_for_pn(call, pn_ia32_Call_M);
5716 ir_node *call_mem = new_r_Proj(block, call, mode_M, pn_ia32_Call_M);
5717 ir_node *vfst, *xld, *new_mem;
5719 /* store st(0) on stack */
5720 vfst = new_bd_ia32_vfst(db, block, frame, noreg_GP, call_mem, res, mode);
5721 set_ia32_op_type(vfst, ia32_AddrModeD);
5722 set_ia32_use_frame(vfst);
5724 /* load into SSE register */
5725 xld = new_bd_ia32_xLoad(db, block, frame, noreg_GP, vfst, mode);
5726 set_ia32_op_type(xld, ia32_AddrModeS);
5727 set_ia32_use_frame(xld);
5729 new_res = new_r_Proj(block, xld, mode, pn_ia32_xLoad_res);
5730 new_mem = new_r_Proj(block, xld, mode_M, pn_ia32_xLoad_M);
5732 if (old_mem != NULL) {
5733 edges_reroute(old_mem, new_mem, current_ir_graph);
5737 set_irn_n(succ, get_edge_src_pos(edge), new_res);
5744 /* do the transformation */
5745 void ia32_transform_graph(ia32_code_gen_t *cg)
5749 register_transformers();
5751 initial_fpcw = NULL;
5754 be_timer_push(T_HEIGHTS);
5755 heights = heights_new(cg->irg);
5756 be_timer_pop(T_HEIGHTS);
5757 ia32_calculate_non_address_mode_nodes(cg->birg);
5759 /* the transform phase is not safe for CSE (yet) because several nodes get
5760 * attributes set after their creation */
5761 cse_last = get_opt_cse();
5764 call_list = NEW_ARR_F(ir_node *, 0);
5765 call_types = NEW_ARR_F(ir_type *, 0);
5766 be_transform_graph(cg->birg, ia32_pretransform_node);
5768 if (ia32_cg_config.use_sse2)
5769 postprocess_fp_call_results();
5770 DEL_ARR_F(call_types);
5771 DEL_ARR_F(call_list);
5773 set_opt_cse(cse_last);
5775 ia32_free_non_address_mode_nodes();
5776 heights_free(heights);
5780 void ia32_init_transform(void)
5782 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");